Professional Documents
Culture Documents
Numpy - KickStart - Jupyter Notebook
Numpy - KickStart - Jupyter Notebook
Why Numpy?
Inorder to perform some numerical operations like array addition, multiplication, create dummy values,
etc.,
In [1]:
import numpy as np
In [2]:
arr_1 =np.array([1,2,3,4,5])
arr_1
Out[2]:
array([1, 2, 3, 4, 5])
In [4]:
type(arr_1)
Out[4]:
numpy.ndarray
In [5]:
list_1 = [1,2,3,4,5]
list_1
Out[5]:
[1, 2, 3, 4, 5]
In [6]:
type(list_1)
Out[6]:
list
List Vs Numpy
1. In List, we cannot do element-wise operation directly but in array we can do that directly.
3. List can be converted into an array and vice versa, but dimensions will be missed.
Create 1D array
In [10]:
arr_1d = np.array([1,2,3,4,5])
print(arr_1d)
print('No of dimensions: ',arr_1d.ndim) #Attribute
print('No of elements : ',arr_1d.size) #Attribute
print('Max element : ',arr_1d.argmax()) #Returns the index number of the max value
[1 2 3 4 5]
No of dimensions: 1
No of elements : 5
Max element : 4
Differences
In [13]:
list_1.append([6,7])
In [14]:
list_1
Out[14]:
In [ ]:
In [16]:
arr_1d + 3
Out[16]:
array([4, 5, 6, 7, 8])
In [17]:
---------------------------------------------------------------------------
<ipython-input-17-6657cea64c08> in <module>
----> 1 list_1 + 3
In [18]:
list_2 = [1,3.5,'Vennela']
list_2
Out[18]:
In [21]:
arr_2 = np.array([1,2,3])
print(arr_2)
print(arr_2.dtype)
[1 2 3]
int32
In [22]:
arr_2 = np.array([1,2.4,3])
print(arr_2)
print(arr_2.dtype)
[1. 2.4 3. ]
float64
In [23]:
arr_2 = np.array([1,2.4,'3'])
print(arr_2)
print(arr_2.dtype)
<U32
Create a 2d array
In [28]:
arr_2d = np.array([[1,2,3],[4,5,6]])
print(arr_2d)
print('No of dimensions : ',arr_2d.ndim)
[[1 2 3]
[4 5 6]]
No of dimensions : 2
Create 3d array
In [33]:
arr_3d = np.array([[[1,2,3],[4,5,6],[7,8,9]]])
print(arr_2d)
print('No of dimensions : ',arr_3d.ndim)
print('Type of elements : ',arr_3d.dtype)
[[[1 2 3]
[4 5 6]
[7 8 9]]]
No of dimensions : 3
In [35]:
[[[1. 2. 3.]
[4. 5. 6.]
[7. 8. 9.]]]
No of dimensions : 3
In [40]:
list_3 = [[1,2,3,4],[3,7,8,9]]
print(list_3)
print(type(list_3))
#Conversion
list_to_array = np.array(list_3)
print(list_to_array)
print(type(list_to_array))
print(list_to_array.ndim)
<class 'list'>
[[1 2 3 4]
[3 7 8 9]]
<class 'numpy.ndarray'>
In [45]:
arr_4 = np.array([[1,2,3],[4,5,6]])
print(arr_4)
print(type(arr_4))
print('No of dimensions : ',arr_4.ndim)
#Conversion
arr_to_list = arr_4.tolist()
arr_to_list
[[1 2 3]
[4 5 6]]
<class 'numpy.ndarray'>
No of dimensions : 2
Out[45]:
In [48]:
import pandas as pd
pd.read_csv('dummy_data.csv')
Out[48]:
In [52]:
arr_5 = np.array([[1.,2,3],[4,5,6]])
arr_5
Out[52]:
In [53]:
arr_5[0][0] = np.nan
In [54]:
arr_5
Out[54]:
Statistical Operations
In [55]:
arr_6 = np.array([1,2,3,4,5,6,7,8,9,10])
print(arr_6)
[ 1 2 3 4 5 6 7 8 9 10]
In [56]:
arr_6.sum()
Out[56]:
55
In [57]:
arr_6.prod()
Out[57]:
3628800
In [58]:
arr_6.mean()
Out[58]:
5.5
In [59]:
arr_6.std() #From the center value, how much the datapoints got deviated
Out[59]:
2.8722813232690143
In [60]:
arr_6.argmax()
Out[60]:
Reshaping
localhost:8888/notebooks/Data science/Numpy_KickStart.ipynb 6/14
10/8/21, 12:19 AM Numpy_KickStart - Jupyter Notebook
In [64]:
arr_7 = np.array([[1,2,3,4,5],[2,3,4,4,6]])
print(arr_7)
print('Dimension: ',arr_7.ndim)
print('Shape : ',arr_7.shape)
[[1 2 3 4 5]
[2 3 4 4 6]]
Dimension: 2
Shape : (2, 5)
In [68]:
arr_7_reshape =arr_7.reshape((5,2))
print(arr_7_reshape)
print('Dimension: ',arr_7_reshape.ndim)
print('Shape : ',arr_7_reshape.shape)
[[1 2]
[3 4]
[5 2]
[3 4]
[4 6]]
Dimension: 2
Shape : (5, 2)
Reshape to 1 dimension
In [74]:
arr_7 = arr_7.reshape(1,10)
print(arr_7)
print('Dimension: ',arr_7.ndim)
print('Shape : ',arr_7.shape)
[[1 2 3 4 5 2 3 4 4 6]]
Dimension: 2
In [75]:
arr_7 = arr_7.flatten()
print(arr_7)
print('Dimension: ',arr_7.ndim)
print('Shape : ',arr_7.shape)
[1 2 3 4 5 2 3 4 4 6]
Dimension: 1
Shape : (10,)
In [82]:
np.arange(1,21,dtype='int')
Out[82]:
In [83]:
np.arange(1,21,dtype='float')
Out[83]:
array([ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13.,
In [84]:
np.linspace(start = 1,stop = 50,num=20) #Return evenly spaced numbers over a specified inte
Out[84]:
In [86]:
np.ones((3,5),dtype = 'int')
Out[86]:
array([[1, 1, 1, 1, 1],
[1, 1, 1, 1, 1],
[1, 1, 1, 1, 1]])
In [87]:
np.ones((3,5),dtype = 'float')
Out[87]:
In [90]:
np.zeros((5,3),dtype = 'int')
Out[90]:
array([[0, 0, 0],
[0, 0, 0],
[0, 0, 0],
[0, 0, 0],
[0, 0, 0]])
In [91]:
arr_2d
Out[91]:
array([[[1, 2, 3],
[4, 5, 6],
[7, 8, 9]]])
In [93]:
Out[93]:
array([[[1, 2, 3],
[4, 5, 6],
[7, 8, 9]],
[[1, 2, 3],
[4, 5, 6],
[7, 8, 9]],
[[1, 2, 3],
[4, 5, 6],
[7, 8, 9]],
[[1, 2, 3],
[4, 5, 6],
[7, 8, 9]],
[[1, 2, 3],
[4, 5, 6],
[7, 8, 9]],
[[1, 2, 3],
[4, 5, 6],
[7, 8, 9]],
[[1, 2, 3],
[4, 5, 6],
[7, 8, 9]],
[[1, 2, 3],
[4, 5, 6],
[7, 8, 9]],
[[1, 2, 3],
[4, 5, 6],
[7, 8, 9]],
[[1, 2, 3],
[4, 5, 6],
[7, 8, 9]]])
In [97]:
In [115]:
plt.hist(random_numbers)
Out[115]:
(array([[2., 1., 2., 0., 0., 0., 0., 2., 1., 2.],
[0., 2., 0., 2., 2., 0., 1., 2., 0., 1.],
[1., 1., 1., 0., 1., 0., 2., 1., 2., 1.],
[0., 0., 0., 0., 1., 3., 3., 1., 1., 1.],
[1., 0., 1., 1., 2., 3., 0., 1., 1., 0.],
[2., 1., 1., 1., 1., 0., 0., 0., 3., 1.],
[1., 2., 2., 0., 0., 1., 1., 2., 1., 0.],
[0., 0., 2., 1., 1., 0., 0., 2., 1., 3.],
[1., 1., 1., 4., 2., 0., 0., 0., 0., 1.],
[1., 1., 1., 1., 0., 0., 2., 1., 2., 1.]]),
0.98032876]),
In [105]:
Out[105]:
In [106]:
ages = [12,35,67,89,55,78,55,76,89,100]
ages
Out[106]:
[12, 35, 67, 89, 55, 78, 55, 76, 89, 100]
In [109]:
np.random.choice(a = ages,size=3)
Out[109]:
In [112]:
Out[112]:
In [113]:
In [114]:
plt.hist(norm_distribution_random_numbers)
Out[114]:
(array([[1., 1., 0., 0., 2., 3., 3., 0., 0., 0.],
[0., 0., 1., 2., 0., 1., 5., 1., 0., 0.],
[0., 0., 0., 3., 1., 2., 0., 3., 1., 0.],
[0., 0., 0., 2., 1., 3., 1., 1., 2., 0.],
[0., 0., 0., 3., 0., 1., 3., 2., 1., 0.],
[0., 0., 0., 3., 0., 1., 3., 2., 1., 0.],
[0., 0., 0., 2., 1., 3., 3., 1., 0., 0.],
[1., 0., 0., 1., 3., 1., 2., 1., 0., 1.],
[0., 0., 1., 1., 5., 0., 2., 0., 0., 1.],
[0., 0., 1., 2., 2., 2., 0., 1., 1., 1.]]),
2.45851022]),
OBSERVATION
NORMAL DISTRIBUTION: