import numpy as np
import matplotlib.pyplot as plt
from pyCRLD.Environments.EcologicalPublicGood import EcologicalPublicGood as EPG
from pyCRLD.Agents.StrategyActorCritic import stratAC
import _code.SimulationScripts as sim
5 Abrupt transitions
In this section, we illustrate complex phenomena around abrupt transitions in the parameter space of CRLD. In this example, we focus on the discount factor, which indicates how much the agents care for future rewards. Abrupt transitions go by many names, such as critical transitions, regime shifts, bifurcations, or tipping elements, to name some of them.
First, we import everything we need:
Compute data
We start by computing the CRLD trajectories from 250 random initial strategies along a varying discount factor.
# Set data trajectory for storing results, e.g.,
= '/Users/wolf/Downloads/CoCoIn_data'
ddir
# Initialize first environment and multi-agent environment interface (MAEi)
= EPG(N=2, f=1.2, c=5, m=-5, qc=0.2, qr=0.01, degraded_choice=False)
env = stratAC(env=env, learning_rates=0.1, discount_factors=0.99, use_prefactor=True)
MAEi
# Create random initial strategies from simulation scripts 'sim'
= sim.initial_strategies(MAEi, 250)
Xs
# Create discount factors to loop through
= np.sort(np.unique(list(np.linspace(0.1, 1.0, 10)[:-1].round(2))
discountfacts + list(np.arange(0.5, 1.0, 0.05).round(2))
+ list(np.arange(0.65, 0.9, 0.0125).round(4))
+ [0.01, 0.99]))
print("Discount factors:")
print(discountfacts)
Discount factors:
[0.01 0.1 0.2 0.3 0.4 0.5 0.55 0.6 0.65 0.6625
0.675 0.6875 0.7 0.7125 0.725 0.7375 0.75 0.7625 0.775 0.7875
0.8 0.8125 0.825 0.8375 0.85 0.8625 0.875 0.8875 0.9 0.95
0.99 ]
# compute or load the data from disk (if they exist)
= []; rewss = []; coops = []
convtimes for dcf in discountfacts:
print(f" = = = = {dcf} = = = =")
= stratAC(env=env, learning_rates=0.1, discount_factors=dcf,
MAEi =False)
use_prefactor
= sim.obtain_trajectories(MAEi, Xs, 25000, ddir=ddir)
trjs, fprs
# convergence times
len(trj) for trj in trjs])
convtimes.append([
# final rewards
rewss.append(sim.final_rewards(MAEi, trjs))
# cooperative acts
-1].astype(float)[:,1,0] for trj in trjs]) coops.append([trj[
Plotting function
Next, we create a function to plot the data along the varying parameter.
def plot_valuehistograms_vs_parameters(parameters, values, bins, rnge,
='.', alpha=1.0, color='black',
marker='viridis', ax=None):
cmap"""
Plot a histogram for each parameter next to each other.
Parameters
----------
parameters: iterable
of float-valued parameters
values: iterable
of iterable of values for each parameter
bins: int
The number of bins for the histograms
rnge: tupe
Range of the histogram as (min, max)
"""
# Figure
if ax is None:
= plt.subplots()
_, ax
# Create iterable of histograms for values
=[]
valhistfor conv in values:
= np.histogram(conv, bins=bins, range=rnge)[0]
h
valhist.append(h)
# Adjust spacing
= np.array(parameters)
params = params[1:] - params[:-1]
delta = np.concatenate(([parameters[0]-0.5*delta[0]],
paramedges -1] + 0.5*delta,
parameters[:-1]+0.5*delta[-1]]))
[parameters[= np.linspace(rnge[0], rnge[1], bins+1)
valedges
# Plot histograms with colormap
= np.meshgrid(paramedges, valedges)
X, Y =cmap, alpha=alpha*0.75)
ax.pcolormesh(X, Y, np.array(valhist).T, cmap
# Plot median, quantiles and mean
= np.percentile(values, [25, 50, 75], axis=1)
quartile1, medians, quartile3 =color, alpha=alpha*0.2)
ax.fill_between(params, quartile1, quartile3, color=marker, markersize=4, linestyle='-',
ax.plot(params, medians, marker=color, alpha=0.5*alpha)
color=1), marker=marker, linestyle='',
ax.plot(params, np.mean(values, axis=color, alpha=alpha)
color
# Adjust the visible y range
0], rnge[1]) ax.set_ylim(rnge[
Abrupt transition
We use the created plotting function (Section 5.2) to visualize the phenomenon of an abrupt transition from complete defection to complete cooperation.
We show the abrupt transition in the level of cooperation at convergence.
# Create the canves
= 0.7 # figure size factor
fsf = plt.subplots(figsize=(fsf*6, fsf*3))
fig, ax
# Plot the cooperation probabilities versus the discount factors
=discountfacts,
plot_valuehistograms_vs_parameters(parameters=np.array(coops).mean(-1), ax=ax,
values=21, rnge=(-0.1, 1.1), cmap='Blues')
bins
# Make labels and axis nice
'Cooperation')
plt.ylabel('Discount factor')
plt.xlabel(
# Save plot
=0.15, right=0.98, top=0.98, bottom=0.2) plt.subplots_adjust(left
We also show the abrupt transition in the level of final rewards obtained by the agents.
# Create the canves
= 0.7 # figure size factor
fsf = plt.subplots(figsize=(fsf*6, fsf*3))
fig, ax
# Plot the reward levels versus the discount factors
=discountfacts,
plot_valuehistograms_vs_parameters(parameters=np.array(rewss).mean(-1), ax=ax,
values=21, rnge=(-5.25, 1.25), cmap='Reds')
bins
# Make labels and axis nice
'Reward')
plt.ylabel('Discount factor')
plt.xlabel(=0.15, right=0.98, top=0.98, bottom=0.2) plt.subplots_adjust(left
Since the transition from complete defection to complete cooperation and from low reward and high reward appear similar, we can also try to plot them together into one plot, with cooperation on the left y-axis and the reward on the right y-axis.
# Create the canves
= 0.7 # figure size factor
fsf = plt.subplots(figsize=(fsf*6, fsf*2.8))
fig, ax1 = ax1.twinx() # instantiate a second axes that shares the same x-axis
ax2
# Plot the cooperation probabilities versus the discount factors
=discountfacts,
plot_valuehistograms_vs_parameters(parameters=np.array(coops).mean(-1), ax=ax1,
values=21, rnge=(-0.1, 1.1), cmap='Blues',
bins='x', color='blue')
marker
# Plot the reward levels versus the discount factors
=discountfacts,
plot_valuehistograms_vs_parameters(parameters=np.array(rewss).mean(-1), ax=ax2,
values=21, rnge=(-5.35, 1.55), cmap='Reds',
bins='.', alpha=0.5, color='red')
marker
# Make labels and axis nice
'Discount factor')
ax1.set_xlabel('Cooperation (X)', color='Blue')
ax1.set_ylabel(='y', labelcolor='Blue')
ax1.tick_params(axis'Reward (⚫️)', color='Red')
ax2.set_ylabel(='y', labelcolor='Red')
ax2.tick_params(axis-5, 0, 1]);
ax2.set_yticks([=0.15, right=0.88, top=0.96, bottom=0.22)
plt.subplots_adjust(left'_figs/fig_02AbruptTransitionCooperationReward.png', dpi=150) plt.savefig(
Critical slowing down
We use the created plotting function (Section 5.2) to visualize the phenomenon of a critical slowing down of the learning speed around the tipping point.
# Create the canves
= 0.7 # figure size factor
fsf = plt.subplots(figsize=(fsf*6, fsf*3.5))
fig, ax
# Plot the convergence times versus the discount factors
=discountfacts, values=convtimes,
plot_valuehistograms_vs_parameters(parameters=21, rnge=(0, 150), cmap='Greys', ax=ax)
bins
# Make labels and axis nice
'Timesteps to convergence')
plt.ylabel('Discount factor')
plt.xlabel(
# Save plot
=0.15, right=0.98, top=0.96, bottom=0.18)
plt.subplots_adjust(left'_figs/fig_02AbruptTransitionSpeed.png', dpi=150) plt.savefig(