Decoding Einsum with Python loops: A Practical Guide

The Einstein Summation, also known as Einsum is a popular operation found in tensor processing libraries such as NumPy, TensorFlow, and PyTorch. At first glance, it seems to be a daunting mix of characters, commas, and arrows. Its application however can save us some precious number of additional operations. This guide tries to help you understand the Einsum operation by providing equivalent Python code using loops.

# Intialize the Tensors
>>> import numpy as np
>>> a = np.array([1, 2, 3])
>>> b = np.array([
              [1, 2, 3],
              [4, 5, 6],
              [7, 8, 9]
            ])

1D Tensor Operations

# Sum the a vector
>>> np.einsum('i->',a)
# 6

# Python Equivalent
>>> sum = 0
>>> for i in range(len(a)):
>>>   sum += a[i]
>>> print(sum)
# 6

Sum of 1D Tensor

2D Tensor Operations

# `ij->ij` will output the same tensor
>>> np.einsum('ij->ij',b)
# array([[1, 2, 3],
#       [4, 5, 6],
#       [7, 8, 9]])

# Python Equivalent
>>> for i in range(len(b)):
>>> 	x = b[i]
>>>		for j in range(len(x)):
>>>		    print(b[i][j], end="\t")
>>>		print()
#	1	2	3	
#	4	5	6	
#	7	8	9

Output Same Tensor

# Omitting an index after -> will sum up the vector in that axis
>>> np.einsum('ij->i',b)
# array([ 6, 15, 24])

# Python Equivalent
>>>	for i in range(len(b)):
>>> 	x = b[i]
>>>  	sum = 0
>>>  	for j in range(len(x)):
>>>    		sum += b[i][j]
>>>  	print(sum, end='\t')
#	6	15	24

Sum vectors in the j dimension

# A blank RHS will result in a sum of the entire tensor
>>> np.einsum('ij->',b)
# 45

# Python Equivalent
>>> sum = 0
>>>	for i in range(len(b)):
>>> 	for j in range(len(b[i])):
>>>   		sum += b[i][j]
  
>>>	print(sum)
# 45

Sum vectors in all dimensions

# Dot product of a and b
>>> np.einsum('ij, jk ->ik', a, b)
# array([[ 4,  1,  4],
#       [10,  4, 10],
#       [16,  7, 16]])

# Python Equivalent
>>>	for i in range(len(a)):
>>>	  for k in range(len(b[i])):
>>>	    sum = 0
>>>	    for j in range(len(a[i])):
>>>	        sum += a[i][j] * b[j][k]
>>>	    print(sum, end='\t')
>>>	  print()
  
# 4		1	4	
# 10	4	10	
# 16	7	16

Calculate the dot product of a and b

# `ii` will pick the diagonal indices in a tensor
>>> np.einsum('ii->i',b)
# array([1, 5, 9])

# Python Equivalent
>>> for i in range(len(b)):
>>>   print(b[i][i], end='\t')
#	1	5	9

Output diagonal vector

Multiple Tensor Operations

# Intialize tensors
>>> a = np.array([1, 2, 3])
>>> b = np.array([4, 5, 6])

# Multiplies two tensors and sums the result
>>> np.einsum('i, i ->', a, b)
# 32

# Python Equivalent
>>> sum = 0
>>> for i in range(len(a)):
>>>   sum += a[i] * b[i]

>>> print(sum)
# 32

Multiply a by b and sum the result

Multiple 2D Tensor Operations

# Intialize tensors
>>> a = np.array([
              [1, 2, 3],
              [4, 5, 6],
              [7, 8, 9]
])

>>> b = np.array([
              [1, 1, 1],
              [0, 0, 0],
              [1, 0, 1]
])

# Reversing ij will multiply a with the transpose of b
>>> np.einsum('ij, ji -> ij', a, b)
array([[1, 0, 3],
       [4, 0, 0],
       [7, 0, 9]])

# Python Equivalent
>>> for i in range(len(a)):
>>>   for j in range(len(a[i])):
>>>     print(a[i][j] * b[j][i], end='\t')
>>>   print()
  
#	1	0	3	
#	4	0	0	
#	7	0	9

Multiply a by the transpose of b and output in a shape

# Same operation as above but with the columns summed up
>>> np.einsum('ij, ji -> j', a, b)
array([12,  0, 12])

# Python Equivalent
>>> for i in range(len(a)):
>>>   sum = 0
>>>   for j in range(len(a[i])):
>>>     sum += a[j][i] * b[i][j]
>>>   print(sum, end='\t')
  
#	12	0	12

Multiply a by the transpose of b and sum in the i axis

# Multiple the two diagonal vectors and sum up the result
>>> np.einsum('ii, jj -> ', a, b)
# 30

# Python Equivalent
>>> sum = 0
>>> for i in range(len(a)):
>>>   for j in range(len(a[i])):
>>>     sum += a[i][i] * b[j][j]
>>> print(sum)
# 30

Get the outer product of the diagonal vector of a and the diagonal vector of b and sum the result

# Intialize tensors
>>> a = np.array([1, 2, 3])
>>> b = np.array([[6, 6, 6], [5, 5, 5]])
>>> c = np.array([
              [[1, 1, 1], [2, 2, 2]],
              [[3, 3, 3], [4, 4, 4]],
              [[5, 5, 5], [6, 6, 6]],
              [[7, 7, 7], [8, 8, 8]],
])

# A complex calculation
>>> np.einsum("i, ji, kji -> ij", a, b, c)
#	array([[ 96, 100],
#       [192, 200],
#       [288, 300]])

# Python Equivalent
>>> for i in range(3):
>>>	  for j in range(2):
>>>	    sum = 0 
>>>	    for k in range(4):
>>>	      sum += np.sum(c[k][j][i] * a[i] * b[j][i])
>>>	    print(sum, end="\t")
>>>   print()
#	96	100	
#	192	200	
#	288	300

Multiply the a[i] * b[j][i] * c[k][j][i] and sum the k dimension. Show tensor in ij shape

If you wish to understand the theory behind the operation instead, I suggest the following guides:

If you found this post useful, please do consider subscribing to my blog. You can also follow me on Twitter.