Markov Chains & Markov Dynamics#

Let’s say an event can exist in two states, represented in a State Vector \(S_t = \begin{bmatrix} s_{1,t} \\ s_{2,t} \end{bmatrix}\) (at time \(t\)), and there is a probability \(p_{i,j}\) to transfer information from state \(s_i\) to state \(s_j\). All such probabilities can be accounted together in a Transfer Matrix, \(P = \begin{bmatrix} p_{1,1} & p_{1,2} \\ p_{2,1} & p_{2,2} \end{bmatrix}\). Also given that these are probabilities, we have that \(\sum_{j=1}^{n} p_{i,j} = 1\). We find:
    \(P_{2\times2} = \begin{bmatrix} 1-p_{1,2} & p_{1,2} \\ p_{2,1} & 1-p_{2,1} \end{bmatrix}\),
    \(P_{3\times3} = \begin{bmatrix} 1-(p_{1,2}+p_{1,3}) & p_{1,2} & p_{1,3}\\ p_{2,1} & 1-(p_{2,1}+p_{2,3}) & p_{2,3}\\ p_{3,1} & 1-p_{3,2} & 1-(p_{3,1}+p_{3,2}) \end{bmatrix}\), and so on…

Markov Property:#

  1. Memory-lessness: the transfer matrix \(P\) is not dependent on time or the value of information in the states prior to the present state.

  2. Homogeneity: the function of information transfer will remain the same regardless of the information in the state. i.e. the probability of information transfer from State \(s_i\) to \(s_j\) will remain \(p_{i,j}\) for all time \(t\).

    We can then write a function using these tranfer probabilities such that \(s_{i,t+\Delta t} = \sum_{j=1}^{n} s_{j,t}\cdot p_{i,j}\), for all \(i \in \{1,2,...,n\}\). Our new state vector \(S\) at any given time \((t+\Delta t)\) can be written as:
    \(\begin{align} S_{t+\Delta t} &= \begin{bmatrix}s_{i,t+\Delta t}\end{bmatrix}\\ &= \begin{bmatrix}\sum_{j=1}^{n} s_{j,t}\cdot p_{i,j}\end{bmatrix}\\ &= \begin{bmatrix}s_{i,t}\end{bmatrix}\cdot\begin{bmatrix}p_{i,j}\end{bmatrix}\\ \therefore S_{t+\Delta t} &= S_t\cdot P \end{align}\)


Simply put, a network of such events, connected by directed edges whose strength are the probabilities, is called a ‘Markov Chain’.


# Initialize Libraries
import numpy as np
from matplotlib import pyplot as plt
import networkx as nx
# Function calculates and collects all the states for the Markov Process
def solve_for_markov_process(init_state, transition_mat, iterate_len=0, convergence_rounding=5):
    init_state = np.asarray(init_state).ravel();
    transition_mat = np.asmatrix(transition_mat).T;
    sz = len(init_state);
    if len(init_state) != np.unique(transition_mat.shape): return [];
    
    # Iterate over the states if specific number of steps are provided...
    if iterate_len > 0:
        sn = np.zeros([iterate_len,sz]);
        sn[0,:] = init_state;
        for state in np.arange(1,iterate_len):
            sn[state,:] = np.asarray([
                np.dot(np.asarray(sn[state-1,:]).ravel(),np.asarray(el).ravel())
                for el in transition_mat
            ]);
        return sn;
    
    # Iterate over the states until convergence is reached...
    sn = [];
    sn.append(init_state);
    factval = np.math.factorial(sz);
    while True:
        if len(sn)>factval:
            idx = np.sort(len(sn) - np.arange(factval) - 1);
            idy = np.sort(len(sn) - np.arange(factval) - 2);
            if np.round(np.mean(
                np.asmatrix(sn[(len(sn)-factval):len(sn)]).ravel()
                - np.asmatrix(sn[(len(sn)-factval-1):(len(sn)-1)]).ravel(),
            axis=0), convergence_rounding).all() == 0:
                break;
        sn.append(np.asarray([
            np.dot(np.asarray(sn[-1]).ravel(),np.asarray(el).ravel())
            for el in transition_mat
        ]));
    return np.asarray(sn);

# Plots all Markov States based on the 'solve_for_markov_process()' function
def plot_markov_states(s0, p):
    sn = solve_for_markov_process(s0, p);
    plt.plot(sn, linewidth=2);
    plt.ylim([0,1]);
    plt.title(f'S_0 = {np.round(s0,3)}\nS_n = {np.round(np.asarray(sn)[-1,:],3)}');
    plt.grid();

# Draw/Plot the Markov Chain based on the Transfer Matrix (P)
def draw_markov_chain(p, layout_type=0, print_prob=True):
    fig = plt.figure(figsize=(10,7));
    ax = fig.add_subplot(111, aspect='equal');

    G = nx.DiGraph(p);
    if layout_type==1: pos = nx.planar_layout(G);
    elif layout_type==2: pos = nx.kamada_kawai_layout(G);
    else: pos = nx.circular_layout(G);
    if print_prob:
        [print(f'(S{edge[0]+1}, S{edge[1]+1}): {np.round(p[edge],4)}', end='\t\t') for edge in G.edges()];

    nx.draw(
        G, pos,
        edge_color='black', linewidths=1, width=np.asarray([G[i][j]['weight']*2.5 for i,j in G.edges()]),
        node_size=500, node_color='k', alpha=0.9, font_color='w',
        labels={node: f'S{node+1}' for node in G.nodes()},
        connectionstyle='arc3, rad=0.15'
    );
    plt.axis('off');

    fig.tight_layout(pad=3);
    plt.show();
    del fig, ax;

# Get Probabilities of State ending in an Absorbing State:
def get_p2as_prob(p):
    as_s = np.where(p==1)[0];               # Absorbing States (AS)
    if len(as_s)==0: return np.array([]);   # Check if P is Absorbing
    
    # Get the Non-Absorbing States (NAS)
    nas = np.setdiff1d(np.arange(p.shape[0]), as_s);
    reorder = np.append(np.asarray(as_s), nas);
    
    # Record the Sub-Matrices:
    I_m = np.eye(len(as_s));
    R = p[np.meshgrid(nas,as_s)].T;
    Q_n = p[np.meshgrid(nas,nas)].T;
    
    # Calculate the NAS to AS probabilities
    I_n = np.eye(len(nas));
    F = np.linalg.inv(I_n-Q_n);
    nas2as = F * R;
    
    # Return All-to-AS probabilities
    all2as = np.concatenate((I_m, nas2as), axis=0);
    return all2as[np.argsort(reorder),:];

Basic idea: Let’s assume that \(t\rightarrow n\) and \(\Delta t\rightarrow1\)

\(S_{n+1} = S_n\cdot P\).
\(\ \) \(\ \) \(\ \) where, \(\ \) \(S_n\) := State Vector at instance \(n\).
\(\ \) \(\ \) \(\ \) \(\ \) \(\ \) \(\ \) \(P\) := transition matrix of size \(k\times k\).

\(P\) maintains its transition values no matter what; i.e. it is temporally invariant. i.e. if \(S_0\) := initial state of a system;
\(\ \) \(\ \) \(\ \) \(S_1 = S_0\cdot P\)
\(\ \) \(\ \) \(\ \) \(S_2 = S_1\cdot P = (S_0\cdot P)\cdot P = S_0\cdot P^2\)
\(\ \) \(\ \) \(\ \) \(S_3 = S_2\cdot P = (S_0\cdot P^2)\cdot P = S_0\cdot P^3\)
\(\ \) \(\ \) \(\ \) \(\vdots\)
\(\ \) \(\ \) \(\ \) \(S_n = S_0\cdot P^n\)

As an example, let \(P = \begin{bmatrix}0.3 & 0.7\\1.0 & 0.0\end{bmatrix}\) and \(\ S_0 = \begin{bmatrix}0.8 & 0.2\end{bmatrix}\);

We find:
\(S_1 = \begin{bmatrix}0.44 & 0.56\end{bmatrix}\),
\(S_2 = \begin{bmatrix}0.692 & 0.308\end{bmatrix}\),
\(S_3 = \begin{bmatrix}0.516 & 0.484\end{bmatrix}\),
\(\vdots\)
\(S_S \approx \begin{bmatrix}0.588 & 0.412\end{bmatrix}\) at instance ‘\(n_S\)’.

Let’s simulate this part…

s0 = np.asarray([0.8, 0.2]);
p = np.asmatrix([[0.3,0.7],[1,0]]);

print(f'P = {np.round(p,4)}\n');
draw_markov_chain(p, layout_type=1);
P = [[0.3 0.7]
 [1.  0. ]]

(S1, S1): 0.3		(S1, S2): 0.7		(S2, S1): 1.0		
../_images/71f0b4ea8867f39d6e2ea3ae0a673b2b49a10c420352cc1bebad333c79a4036e.png
# The function 'solve_for_markov_process' calculates all transitions
print('All state transitions:\n');
print(solve_for_markov_process(s0, p));

# The function 'plot_markov_states' plots all 'solve_for_markov_process' outputs
fig = plt.figure(figsize=(8,5));
ax = fig.add_subplot(111);
plot_markov_states(s0, p);
plt.show();
del fig, ax;
All state transitions:

[[0.8        0.2       ]
 [0.44       0.56      ]
 [0.692      0.308     ]
 [0.5156     0.4844    ]
 [0.63908    0.36092   ]
 [0.552644   0.447356  ]
 [0.6131492  0.3868508 ]
 [0.57079556 0.42920444]
 [0.60044311 0.39955689]
 [0.57968982 0.42031018]
 [0.59421712 0.40578288]
 [0.58404801 0.41595199]
 [0.59116639 0.40883361]
 [0.58618353 0.41381647]
 [0.58967153 0.41032847]
 [0.58722993 0.41277007]
 [0.58893905 0.41106095]
 [0.58774266 0.41225734]
 [0.58858013 0.41141987]
 [0.58799391 0.41200609]
 [0.58840427 0.41159573]
 [0.58811701 0.41188299]
 [0.58831809 0.41168191]
 [0.58817734 0.41182266]
 [0.58827586 0.41172414]
 [0.5882069  0.4117931 ]
 [0.58825517 0.41174483]
 [0.58822138 0.41177862]
 [0.58824504 0.41175496]
 [0.58822848 0.41177152]
 [0.58824007 0.41175993]
 [0.58823195 0.41176805]
 [0.58823763 0.41176237]
 [0.58823366 0.41176634]]
../_images/d05bcfd839b849d4867e5e7b29f0daef2ab304b1818ae90a24f0c098e111a08e.png

In fact, we find that if \(S_n = S_S\), then \(S_{n+1} = S_S\).

Such states, \(S_S\), are called Steady States. All transfer matrices (\(P\)) which end in such states are called Regular Markov chains. These markov chains have a property where the chain will converge at the Steady State no matter what the initial state (\(S_0\)) is.

fig = plt.figure(figsize=(13,10));
for el in np.arange(4)+1:
    s0 = np.absolute(np.random.random(p.shape[0]));
    s0 /= np.sum(s0);
    ax = fig.add_subplot(3,2,el);
    plot_markov_states(s0, p);
del el;

ax = fig.add_subplot(3,2,5); plot_markov_states(np.array([0,1]), p);
ax = fig.add_subplot(3,2,6); plot_markov_states(np.array([1,1])/p.shape[0], p);

fig.tight_layout(pad=3);
plt.show();
del fig, ax, s0;
../_images/428b2ba7982b0ae717865e6ad8200358cf349b8b6f80cad07087fa38ddeadd04.png


This holds true for most real-life markov chain models, no matter the size of the model. For example:

# Size-3 Markov Chains:
sz = 3;

p = np.absolute(np.random.random([sz,sz]));
p = np.asmatrix([el/np.sum(el) for el in p]);
print(f'P = {np.round(p,4)}\n');
draw_markov_chain(p);

fig = plt.figure(figsize=(13,10));
for el in np.arange(4)+1:
    s0 = np.absolute(np.random.random(p.shape[0]));
    s0 /= np.sum(s0);
    ax = fig.add_subplot(3,2,el);
    plot_markov_states(s0, p);
del el;

ax7 = fig.add_subplot(3,2,5); plot_markov_states(np.array([0,0,1]), p);
ax8 = fig.add_subplot(3,2,6); plot_markov_states(np.array([1,1,1])/3, p);

fig.tight_layout(pad=3);
plt.show();
del fig, ax, s0, sz;
P = [[0.3417 0.5104 0.1479]
 [0.6226 0.0167 0.3606]
 [0.5897 0.2503 0.1599]]

(S1, S1): 0.3417		(S1, S2): 0.5104		(S1, S3): 0.1479		(S2, S1): 0.6226		(S2, S2): 0.0167		(S2, S3): 0.3606		(S3, S1): 0.5897		(S3, S2): 0.2503		(S3, S3): 0.1599		
../_images/3d4aba06985ec29aca0e0876e630ec052db8a8aa00d7b0fda3e9ac8dadb2ce46.png ../_images/a2f136829328266b5c936413913284d14470e932db8106876e74d6e5a98c6288.png
# Size-5 Markov Chains:
sz = 5;

p = np.absolute(np.random.random([sz,sz]));
p = np.asmatrix([el/np.sum(el) for el in p]);
print(f'P = {np.round(p,4)}\n');
draw_markov_chain(p);

fig = plt.figure(figsize=(13,8));
for el in np.arange(2)+1:
    s0 = np.absolute(np.random.random(sz));
    s0 /= np.sum(s0);
    ax = fig.add_subplot(2,2,el);
    plot_markov_states(s0, p);
del el;

s0 = np.zeros(sz); s0[np.random.choice(np.arange(sz))] = 1;
ax = fig.add_subplot(2,2,3); plot_markov_states(s0, p);
ax = fig.add_subplot(2,2,4); plot_markov_states(np.ones(sz)/sz, p);

fig.tight_layout(pad=3);
plt.show();
del fig, ax, s0, sz;
P = [[0.3557 0.1077 0.1362 0.1439 0.2565]
 [0.3064 0.13   0.2996 0.0014 0.2626]
 [0.2678 0.1263 0.1503 0.3161 0.1395]
 [0.2613 0.0595 0.0933 0.5057 0.0802]
 [0.0625 0.0578 0.339  0.2492 0.2915]]

(S1, S1): 0.3557		(S1, S2): 0.1077		(S1, S3): 0.1362		(S1, S4): 0.1439		(S1, S5): 0.2565		(S2, S1): 0.3064		(S2, S2): 0.13		(S2, S3): 0.2996		(S2, S4): 0.0014		(S2, S5): 0.2626		(S3, S1): 0.2678		(S3, S2): 0.1263		(S3, S3): 0.1503		(S3, S4): 0.3161		(S3, S5): 0.1395		(S4, S1): 0.2613		(S4, S2): 0.0595		(S4, S3): 0.0933		(S4, S4): 0.5057		(S4, S5): 0.0802		(S5, S1): 0.0625		(S5, S2): 0.0578		(S5, S3): 0.339		(S5, S4): 0.2492		(S5, S5): 0.2915		
../_images/7c7fa90b5c1fa49298e8cd03d8af39627af990000c153763c8c3945357f86b7a.png ../_images/a11badc05f786bf399822bb2bfe7a2861bc657e346993d509c12dadca105c1df.png


… even size-105 markov chains, but that will require a lot of computational power. So, let’s jump to a different type of markov chains/states…

Absorbing Markov Chains and Absorbing States:#

Absorbing Markov Chains are such that there exists at least one such node/state that only has connections coming inwards and that all nodes/states can be linked to it. That is, if the transfer matrix, \(P = P_A\), has one or more elements which is of the type \([0,0,...0,1,0,...,0]\), then all transitions to the state are possible, but a transition out of this state is not possible. In this case, the transfer matrix ends in an Absorbing State (\(S_A\)), such that all the values of vector \(S\) are zero except \(S[k] = 1\).

Let’s take the example of Size-3 Markov chain from earlier, and make one state absorbing…

sz = 3; abs_state = np.random.choice(sz);

p = np.absolute(np.random.random([sz,sz]));
p = np.asmatrix([el/np.sum(el) for el in p]);
p[abs_state,:] = 0; p[abs_state,abs_state] = 1;
print(f'P = {np.round(p,4)}\n');
draw_markov_chain(p, layout_type=1);
print(f'That is, S{abs_state+1} is the Absorbing State.\n');

fig = plt.figure(figsize=(13,7));
for el in np.arange(3)+1:
    s0 = np.absolute(np.random.random(p.shape[0]));
    s0 /= np.sum(s0);
    ax = fig.add_subplot(2,2,el);
    plot_markov_states(s0, p);
del el;

s0 = np.zeros(p.shape[0]);
s0[np.random.choice(np.setdiff1d(np.arange(p.shape[0]),abs_state))]=1;
ax = fig.add_subplot(2,2,4); plot_markov_states(s0, p);

fig.tight_layout(pad=3);
plt.show();
del fig, ax, s0, sz, abs_state;
P = [[0.659  0.2037 0.1373]
 [0.     1.     0.    ]
 [0.0343 0.1864 0.7793]]

(S1, S1): 0.659		(S1, S2): 0.2037		(S1, S3): 0.1373		(S2, S2): 1.0		(S3, S1): 0.0343		(S3, S2): 0.1864		(S3, S3): 0.7793		
../_images/864d8166d86a0a7442bbe2474aa9c0ca06e00296c1202cf57efd89d909889af5.png
That is, S2 is the Absorbing State.
../_images/a9e0ffc6fda0adf48e1b1f63bc09305c53b52eebb3df9129b9fee29dab187dcd.png


Such tranfer matrices have the form of: \(P_{A\ ((m+n)\times(m+n))} = \begin{bmatrix}I_m&O\\R&Q_n\end{bmatrix}\).

One can then define the Fundamental Matrix as: \(F_n = (I_n - Q_n)^{-1}\);

And then find the matrix of probabilities of starting in Non-Absorbing State and ending in an Absorbing State as \(F\cdot R\).




Let’s try this out for the problem above. We already know what the Markov Chain looks like…

# Absorbing States (AS)
as_s = np.where(p==1)[0];

# Get the Non-Absorbing States (NAS)
nas = np.setdiff1d(np.arange(p.shape[0]), as_s);

# Reorder the matrix to get the form for Absorbing Transfer Matrix:
reorder = np.append(np.asarray(as_s), nas);
p_absorbing = p[np.meshgrid(reorder,reorder)].T;

print(f'\nP-Absorbing (re-ordered P):\n{p_absorbing}');

# Storing the Sub-Matrices:
I_m = np.eye(len(as_s));
R = p[np.meshgrid(nas,as_s)].T;
Q_n = p[np.meshgrid(nas,nas)].T;

# Calculate the NAS to AS probabilities
I_n = np.eye(len(nas));
F = np.linalg.inv(I_n-Q_n);
nas2as = F * R;

print(f'\nProbabilities that NAS will end up in AS: (re-ordered list)\n{nas2as}');

# Add AS-to-AS with NAS-to-AS probabilities
all2as = np.concatenate((I_m, nas2as), axis=0);

# Re-arrange all the values according to the list of the States:
print(f'\nAll arranged probabilities:\n{all2as[np.argsort(reorder),:]}');
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[9], line 9
      7 # Reorder the matrix to get the form for Absorbing Transfer Matrix:
      8 reorder = np.append(np.asarray(as_s), nas);
----> 9 p_absorbing = p[np.meshgrid(reorder,reorder)].T;
     11 print(f'\nP-Absorbing (re-ordered P):\n{p_absorbing}');
     13 # Storing the Sub-Matrices:

File /opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/numpy/matrixlib/defmatrix.py:966, in matrix.T(self)
    935 @property
    936 def T(self):
    937     """
    938     Returns the transpose of the matrix.
    939 
   (...)
    964 
    965     """
--> 966     return self.transpose()

File /opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/numpy/matrixlib/defmatrix.py:180, in matrix.__array_finalize__(self, obj)
    178         return
    179     elif (ndim > 2):
--> 180         raise ValueError("shape too large to be a matrix.")
    181 else:
    182     newshape = self.shape

ValueError: shape too large to be a matrix.

The calculations reflect those from the function get_p2as_prob( )
sz = 5; abs_state = np.random.choice(sz);
p = np.absolute(np.random.random([sz,sz]));
p = np.asmatrix([el/np.sum(el) for el in p]);
p[abs_state,:] = 0; p[abs_state,abs_state] = 1;

draw_markov_chain(p, print_prob=False);
p2as = get_p2as_prob(p);
print(p2as);
../_images/842238cd0f9c8cf17acc31894445cc20b35f332574a8939137aa92f1452c4df5.png
[[1.]
 [1.]
 [1.]
 [1.]
 [1.]]

   Note that the list is all 1's because only 1 Absorbing State (AS) exists in this case. What if more were added? Given below, are examples with 2 and 3 absorbing states. Notice that the probabilities of escaping these states is 0, i.e. if one begins from an AS, they cannot transition out of the state...
sz = 5; abs_state = np.random.choice(sz, 2, replace=False);
p = np.absolute(np.random.random([sz,sz]));
p = np.asmatrix([el/np.sum(el) for el in p]);
p[abs_state,:] = 0; p[abs_state,abs_state] = 1;

draw_markov_chain(p, print_prob=False);
p2as = get_p2as_prob(p);
print(p2as);
../_images/3c3ddb38d32364d4e2aee5b8923f723171fd55e8790b1625e4f82b7544c2f06b.png
[[0.60720023 0.39279977]
 [1.         0.        ]
 [0.48023909 0.51976091]
 [0.66032729 0.33967271]
 [0.         1.        ]]
sz = 11; abs_state = np.random.choice(sz, 3, replace=False);
p = np.absolute(np.random.random([sz,sz]));
p = np.asmatrix([el/np.sum(el) for el in p]);
p[abs_state,:] = 0; p[abs_state,abs_state] = 1;

draw_markov_chain(p, print_prob=False);
p2as = get_p2as_prob(p);
print(p2as);
../_images/2dc8f6237429eb75f3014940c6f0adfc2436657774b710b8651210eadc12a300.png
[[0.39506911 0.30922911 0.29570178]
 [0.32454023 0.36820207 0.30725769]
 [0.41278558 0.29558675 0.29162767]
 [1.         0.         0.        ]
 [0.         1.         0.        ]
 [0.38762782 0.30666532 0.30570686]
 [0.41961767 0.34723819 0.23314414]
 [0.43717516 0.34092161 0.22190323]
 [0.49410128 0.27101664 0.23488207]
 [0.         0.         1.        ]
 [0.26211671 0.37993706 0.35794623]]