# Introduction to NumPy

In [1]:
import numpy as np

In [81]:
a = np.array([1,2,3,4,5,6]) #arrays of a fixed dtype and fixed dimensions
b = np.array([7,8,9,0,1,2])
a*b #element-wise operations

array([ 7, 16, 27,  0,  5, 12])

In [3]:
a[0] = 5 #you can simply replace content of individual cells
a

array([5, 2, 3, 4, 5, 6])

In [4]:
a[0] = 3.7 #some conversions are possible, but beware of the results
a

array([3, 2, 3, 4, 5, 6])

In [5]:
a.dtype #you can always check the dtype

dtype('int32')

In [6]:
a[0] = "Hi, I am wrong!" #some conversions are not possible
a

ValueError: invalid literal for int() with base 10: 'Hi, I am wrong!'

In [7]:
c = np.array(a, dtype=np.float64) #this creates a coppy of the original array - sometimes this is undesirable
(c, c.dtype)

(array([3., 2., 3., 4., 5., 6.]), dtype('float64'))

In [8]:
c[0] = 7
(c,a)

(array([7., 2., 3., 4., 5., 6.]), array([3, 2, 3, 4, 5, 6]))

In [9]:
M = np.array([[1,2,3,4,5],[6,7,8,9,0],[1,1,1,1,1]])
M.shape

(3, 5)

In [13]:
M[0:2] #slicing has the format of [from, to) 

array([[1, 2, 3, 4, 5],
       [6, 7, 8, 9, 0]])

In [14]:
M[0:2, 2:4] #all dimensions can be sliced

array([[3, 4],
       [8, 9]])

In [19]:
M[0, 1::2] #negative values to count from the end; :: for setting a step size

array([2, 4])

### changing "parent" arrays via assignment to "child"

In [26]:
#slicing does not copy anything (that is extremely important for larger data)
a = np.ones((6,6))
b = np.arange(1,7)
(a,b)

(array([[1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1.]]), array([1, 2, 3, 4, 5, 6]))

In [27]:
c = a[1]
c[:] = c+b
a

array([[1., 1., 1., 1., 1., 1.],
       [2., 3., 4., 5., 6., 7.],
       [1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1.]])

In [28]:
#however (creating new content vs. assigning to array)
c = a[3]
c = c+b #this operation created a new array
a

array([[1., 1., 1., 1., 1., 1.],
       [2., 3., 4., 5., 6., 7.],
       [1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1.]])

### universal functions

In [34]:
#what about operations between arrays?
a = np.arange(1,4)
b = np.random.rand(3)
np.stack([a,b,a+b,a*b,a+2, a*2, a/b, a**2, np.log(a)])

array([[1.        , 2.        , 3.        ],
       [0.48239624, 0.26246995, 0.64071066],
       [1.48239624, 2.26246995, 3.64071066],
       [0.48239624, 0.52493991, 1.92213198],
       [3.        , 4.        , 5.        ],
       [2.        , 4.        , 6.        ],
       [2.07298466, 7.61991985, 4.68230074],
       [1.        , 4.        , 9.        ],
       [0.        , 0.69314718, 1.09861229]])

**Wait a second... why did a+2 worked?**
- numpy uFunctions
- broadcasting

In [83]:
M = np.ones((5,3))
a = np.arange(1,6)
b = np.arange(1,4)
c = np.arange(1,2)
d = 5
(M,a,b,c,d)

(array([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]]),
 array([1, 2, 3, 4, 5]),
 array([1, 2, 3]),
 array([1]),
 5)

In [84]:
(M.shape,a.shape,b.shape,c.shape)

((5, 3), (5,), (3,), (1,))

**Which of the arrays are compatible?**

In [86]:
#M+a
#M.T+a #transposition
#M+b
#M+c
#M+d
#a+b
#a+c
#a+d

array([[2., 2., 2.],
       [2., 2., 2.],
       [2., 2., 2.],
       [2., 2., 2.],
       [2., 2., 2.]])

In [70]:
a_ex = np.expand_dims(a,axis=1)#add a new axis with the size of one -> [[1],[2],[3],...]
(a_ex.shape, a_ex.flags.owndata)

((5, 1), False)

In [71]:
M+a_ex

array([[2., 2., 2.],
       [3., 3., 3.],
       [4., 4., 4.],
       [5., 5., 5.],
       [6., 6., 6.]])

**Good, thats it?**
- not quite: reduction functions
- (tweeking array shapes, vectorize, load/store numpy arrays ...) https://numpy.org/doc/1.19/reference/index.html

In [76]:
results = np.random.rand(50,10,5)
results.shape

(50, 10, 5)

In [78]:
a = results.mean()
b = results.mean(axis=0)
c = results.mean(axis=1)
d = results.mean(axis=2)
e = results.mean(axis=(1,2))
(a,b.shape,c.shape,d.shape,e.shape)

(0.48220301547720545, (10, 5), (50, 5), (50, 10), (50,))

In [79]:
results = np.reshape(results,(50,-1))
results.shape

(50, 50)