import numpy as np
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
from pyCRLD.Environments.EcologicalPublicGood import EcologicalPublicGood as EPG
from pyCRLD.Agents.StrategyActorCritic import stratAC
from pyCRLD.Utils import FlowPlot as fp
4 Multi-stability
In this section, we illustrate complex phenomena around multi-stability in the phase space of CRLD.
First, we import everything we need:
Phase space plot
We start by plotting the flow of CRLD in the strategy phase space projection of the prosperous state. For that, we define a function, to help us compile as initial strategies.
def compile_strategy(p0c:float, # cooperation probability of agent zero
float): # cooperation probability of agent one
p1c:= np.array([0.95, p0c]) # coop. prob. in degraded state set to 0.95
Pi = np.array([0.95, p1c])
Pj = np.array([Pi, 1-Pi]).T
xi = np.array([Pj, 1-Pj]).T
xj return np.array([xi, xj])
For example,
0.2, 0.95) compile_strategy(
array([[[0.95, 0.05],
[0.2 , 0.8 ]],
[[0.95, 0.05],
[0.95, 0.05]]])
The arrows indicate the strategy-average reward-prediction errors. Their colors additionally indicate their length.
# Inititalize the ecological public good environment
= EPG(N=2, f=1.2, c=5, m=-5, qc=0.2, qr=0.01, degraded_choice=False)
env
# Create multi-agent environment interface
= stratAC(env=env, learning_rates=0.1, discount_factors=0.75)
MAEi
# Strategy flow plot
# ------------------
= ([0], [1], [0]) # which (agent, observation, action) to plot on x axis
x = ([1], [1], [0]) # which (agent, observation, action) to plot on y axis
y =10e-3; action_probability_points = np.linspace(0+eps, 1.0-eps, 9)
eps= fp.plot_strategy_flow(MAEi, x, y, action_probability_points, NrRandom=64)
ax
# Trajectories
# ------------------
= [] # storing strategy trajectories
xtrajs = [] # and whether a fixed point is reached
fprs for pc in [0.15, 0.175, 0.2]: # cooperation probability of agent 2
= compile_strategy(pc, 0.95)
X = MAEi.trajectory(X, Tmax=2000, tolerance=10**-5)
xtraj, fixedpointreached ; fprs.append(fixedpointreached)
xtrajs.append(xtraj)print("Trajectory length:",len(xtraj))
# Add trajectories to flow plot
=x, y=y, fprs=fprs,
fp.plot_trajectories(xtrajs, x=['red','blue','blue'], lws=[2], msss=[2],
cols=['-'], alphas=[0.75],
lss=ax)
axes
# Add separatrix
= [0.619, 0.6191]; o1 = compile_strategy(*o); o2 = compile_strategy(*o[::-1])
o =[]; sep2=[]
sep1for _ in range(1000): o1, _ = MAEi.reverse_step(o1); sep1.append(o1)
for _ in range(1000): o2, _ = MAEi.reverse_step(o2); sep2.append(o2)
=x, y=y, cols=['purple'], lws=[1],
fp.plot_trajectories([sep1, sep2], x=['--'], alphas=[0.95], plot_startmarker=False, axes=ax)
lss
# Add saddle node
# by reversing the dynamics from two agents with identical strategies
= [0.5, 0.5]; o = compile_strategy(*o)
o for _ in range(1000): o, _ = MAEi.reverse_step(o)
0].scatter(*o[:,1,0], c='purple', marker='P', s=50)
ax[
# Make labels nice
0].set_ylabel(f"$X^2(s=Prosp.,a=Coop.)$")
ax[0].set_xlabel(f"$X^1(s=Prosp.,a=Coop.)$")
ax[
# # Save plot
'white') # for dark mode on web
plt.gcf().set_facecolor(
plt.tight_layout() '_figs/fig_01PhaseSpace.png', dpi=150) plt.savefig(
Trajectory length: 296
Trajectory length: 298
Trajectory length: 253
Sample trajectories
Next, we create a more fine-grained bundle of learning trajectories.
# Cooperation probability of agent 2
= np.concatenate((np.linspace(0.05, 0.95, 51),
pcs 0.15, 0.18, 101),
np.linspace(0.1646, 0.1649, 101),
np.linspace(0.16475, 0.164765, 51)))
np.linspace(= np.sort(np.unique(pcs))
pcs
= [] # storing strategy trajectories
xtrajs = [] # and whether a fixed point is reached
fprs for pc in pcs:
# Compile initial joint strategy
= np.array([0.95, pc])
Pi = np.array([0.95, 0.95])
Pj = np.array([Pi, 1-Pi]).T
xi = np.array([Pj, 1-Pj]).T
xj = np.array([xi, xj])
X
# Compute trajectory
= MAEi.trajectory(X, Tmax=2000, tolerance=10**-5)
xtraj, fixedpointreached
xtrajs.append(xtraj) fprs.append(fixedpointreached)
We obtain the critical point in this bundle of learning trajectories where the two agents switch or tip from complete defection to complete cooperation.
# assuming, that all trajectories convergend
assert np.all(fprs)
# obtaining the cooperation probability at convergences
= [xtraj[-1][:, 1, 0] for xtraj in xtrajs]
converged_pcs
# showing the biomodal distribution of full defection and full cooperation
-1), range=(0,1))[0] np.histogram(np.array(converged_pcs).mean(
array([138, 0, 0, 0, 0, 0, 0, 0, 0, 162])
Thus, the critical point lies at the index
= np.histogram(np.array(converged_pcs).mean(-1), range=(0,1))[0][0]
cp cp
138
and has an approximate value between
print(pcs[cp-1], 'and', pcs[cp], '.')
0.1647584 and 0.1647587 .
Critical slowing down
We use this more fine-grained bundle of learning trajectories to visualize the phenomenon of a critical slowing down by plotting the time steps required to reach convergence.
# Create the canves
= 0.7 # figure size factor
fsf =(fsf*4, fsf*2.5))
plt.figure(figsize
# Plot the time steps required to convergence, i.e. the trajectory lengths
len(xtraj) for xtraj in xtrajs[:cp]],
plt.plot(pcs[:cp], ['-', color='red', lw=2, alpha=0.8) # defectors in red
len(xtraj) for xtraj in xtrajs[cp:]],
plt.plot(pcs[cp:], ['-', color='blue', lw=2, alpha=0.6) # cooperators in blue
# Make labels and axis nice
f"$X^1(s=Prosp.,a=Coop.)$")
plt.xlabel('Timesteps to\nconvergence')
plt.ylabel(0,1)
plt.xlim(0, 800)
plt.ylim(False)
plt.gca().spines.right.set_visible(False)
plt.gca().spines.top.set_visible(
# Save plot
'white') # for dark mode on web
plt.gcf().set_facecolor(=0.95, bottom=0.3, left=0.28, right=0.94)
plt.subplots_adjust(top'_figs/fig_01SlowingDown.png', dpi=150) plt.savefig(
At the critical point
Detailed phase space
We plot a detailed phase space where we zoom in on the area around the critical saddle point on the separatrix.
# Create the canves
= 0.65 # figure size factor
fsf = plt.subplots(1,1, figsize=(fsf*4, fsf*3.5))
_, ax
# Plot the reward-prediction error flow
= np.linspace(0.612, 0.619, 17)
action_probability_points = fp.plot_strategy_flow(MAEi, x, y, action_probability_points, NrRandom=64,
ax =[ax])
axes
# Plot the defecting learning trajectories in red
=x, y=y, fprs=fprs, axes=ax, cols=['red'],
fp.plot_trajectories(xtrajs[:cp], x=[2], msss=[2], mss=['.'], lss=['-'], alphas=[0.15])
lws
# Plot the cooperating learning trajectories in blue
=x, y=y, fprs=fprs, axes=ax, cols=['blue'],
fp.plot_trajectories(xtrajs[cp:], x=[2], msss=[2], mss=['.'], lss=['-'], alphas=[0.15])
lws
# Make labels and axis nice
0].set_ylabel(f"$X^2(s=Prosp.,a=Coop.)$")
ax[0].set_xlabel(f"$X^1(s=Prosp.,a=Coop.)$")
ax[
0].set_ylim(0.613, 0.619)
ax[0].set_xlim(0.6125, 0.6155)
ax[
# Save plot
'white') # for dark mode on web
plt.gcf().set_facecolor(
plt.tight_layout()'_figs/fig_01PhaseSpaceDetail.png', dpi=150) plt.savefig(
Time scale separation
Last, we visulize the emergent time scale separation at the critical point by plotting the level of cooperation over time for the two initial strategies around the critical point.
# Create the canves
= 0.5 # figure size factor
fsf =(fsf*6, fsf*4))
plt.figure(figsize
# Plot the defecting learners in red
# agent 1 with dots
-1][:, 0, 1, 0], color='red', lw=5, ls=':')
plt.plot(xtrajs[cp# agent 2 with dashes
-1][:, 1, 1, 0], color='red', lw=4, ls="--", alpha=0.4)
plt.plot(xtrajs[cp
# Plot the cooperating learners in blue
# agent 1 with dots
0, 1, 0], color='blue', lw=3, ls=':')
plt.plot(xtrajs[cp][:, # agent 2 with dashes
1, 1, 0], color='blue', lw=2, ls="--", alpha=0.4)
plt.plot(xtrajs[cp][:,
# Create a nice legend
= [Line2D([0], [0], color='black', ls=':', lw=2),
custom_lines 0], [0], color='gray', ls='--', lw=2)]
Line2D(['Agent 1', 'Agent 2'], ncol=1)
plt.legend(custom_lines, [
# Make labels and axis nice
False)
plt.gca().spines.right.set_visible(False)
plt.gca().spines.top.set_visible("Timesteps")
plt.xlabel("Cooperation")
plt.ylabel(
# Save plot
'white') # for dark mode on web
plt.gcf().set_facecolor(=0.98, bottom=0.22, left=0.22, right=0.98)
plt.subplots_adjust(top'_figs/fig_01PhaseSpaceTrajectory.png', dpi=150) plt.savefig(