The Einstein Summation, also known as Einsum is a popular operation found in tensor processing libraries such as NumPy, TensorFlow, and PyTorch. At first glance, it seems to be a daunting mix of characters, commas, and arrows. Its application however can save us some precious number of additional operations. This guide tries to help you understand the Einsum operation by providing equivalent Python code using loops.
# Intialize the Tensors
>>> import numpy as np
>>> a = np.array([1, 2, 3])
>>> b = np.array([
[1, 2, 3],
[4, 5, 6],
[7, 8, 9]
])
1D Tensor Operations
# Sum the a vector
>>> np.einsum('i->',a)
# 6
# Python Equivalent
>>> sum = 0
>>> for i in range(len(a)):
>>> sum += a[i]
>>> print(sum)
# 6
2D Tensor Operations
# `ij->ij` will output the same tensor
>>> np.einsum('ij->ij',b)
# array([[1, 2, 3],
# [4, 5, 6],
# [7, 8, 9]])
# Python Equivalent
>>> for i in range(len(b)):
>>> x = b[i]
>>> for j in range(len(x)):
>>> print(b[i][j], end="\t")
>>> print()
# 1 2 3
# 4 5 6
# 7 8 9
# Omitting an index after -> will sum up the vector in that axis
>>> np.einsum('ij->i',b)
# array([ 6, 15, 24])
# Python Equivalent
>>> for i in range(len(b)):
>>> x = b[i]
>>> sum = 0
>>> for j in range(len(x)):
>>> sum += b[i][j]
>>> print(sum, end='\t')
# 6 15 24
# A blank RHS will result in a sum of the entire tensor
>>> np.einsum('ij->',b)
# 45
# Python Equivalent
>>> sum = 0
>>> for i in range(len(b)):
>>> for j in range(len(b[i])):
>>> sum += b[i][j]
>>> print(sum)
# 45
# Dot product of a and b
>>> np.einsum('ij, jk ->ik', a, b)
# array([[ 4, 1, 4],
# [10, 4, 10],
# [16, 7, 16]])
# Python Equivalent
>>> for i in range(len(a)):
>>> for k in range(len(b[i])):
>>> sum = 0
>>> for j in range(len(a[i])):
>>> sum += a[i][j] * b[j][k]
>>> print(sum, end='\t')
>>> print()
# 4 1 4
# 10 4 10
# 16 7 16
# `ii` will pick the diagonal indices in a tensor
>>> np.einsum('ii->i',b)
# array([1, 5, 9])
# Python Equivalent
>>> for i in range(len(b)):
>>> print(b[i][i], end='\t')
# 1 5 9
Multiple Tensor Operations
# Intialize tensors
>>> a = np.array([1, 2, 3])
>>> b = np.array([4, 5, 6])
# Multiplies two tensors and sums the result
>>> np.einsum('i, i ->', a, b)
# 32
# Python Equivalent
>>> sum = 0
>>> for i in range(len(a)):
>>> sum += a[i] * b[i]
>>> print(sum)
# 32
Multiple 2D Tensor Operations
# Intialize tensors
>>> a = np.array([
[1, 2, 3],
[4, 5, 6],
[7, 8, 9]
])
>>> b = np.array([
[1, 1, 1],
[0, 0, 0],
[1, 0, 1]
])
# Reversing ij will multiply a with the transpose of b
>>> np.einsum('ij, ji -> ij', a, b)
array([[1, 0, 3],
[4, 0, 0],
[7, 0, 9]])
# Python Equivalent
>>> for i in range(len(a)):
>>> for j in range(len(a[i])):
>>> print(a[i][j] * b[j][i], end='\t')
>>> print()
# 1 0 3
# 4 0 0
# 7 0 9
# Same operation as above but with the columns summed up
>>> np.einsum('ij, ji -> j', a, b)
array([12, 0, 12])
# Python Equivalent
>>> for i in range(len(a)):
>>> sum = 0
>>> for j in range(len(a[i])):
>>> sum += a[j][i] * b[i][j]
>>> print(sum, end='\t')
# 12 0 12
# Multiple the two diagonal vectors and sum up the result
>>> np.einsum('ii, jj -> ', a, b)
# 30
# Python Equivalent
>>> sum = 0
>>> for i in range(len(a)):
>>> for j in range(len(a[i])):
>>> sum += a[i][i] * b[j][j]
>>> print(sum)
# 30
# Intialize tensors
>>> a = np.array([1, 2, 3])
>>> b = np.array([[6, 6, 6], [5, 5, 5]])
>>> c = np.array([
[[1, 1, 1], [2, 2, 2]],
[[3, 3, 3], [4, 4, 4]],
[[5, 5, 5], [6, 6, 6]],
[[7, 7, 7], [8, 8, 8]],
])
# A complex calculation
>>> np.einsum("i, ji, kji -> ij", a, b, c)
# array([[ 96, 100],
# [192, 200],
# [288, 300]])
# Python Equivalent
>>> for i in range(3):
>>> for j in range(2):
>>> sum = 0
>>> for k in range(4):
>>> sum += np.sum(c[k][j][i] * a[i] * b[j][i])
>>> print(sum, end="\t")
>>> print()
# 96 100
# 192 200
# 288 300
If you wish to understand the theory behind the operation instead, I suggest the following guides:
- Stack overflow answer by Alex Riley
- Einsum Is All You Need: NumPy, PyTorch and TensorFlow by Aladdin Persson
If you found this post useful, please do consider subscribing to my blog. You can also follow me on Twitter.