4 Dataframe Operations - Ipynb

You might also like

Download as txt, pdf, or txt
Download as txt, pdf, or txt
You are on page 1of 85

{

"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Operations"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import random\n",
"import numpy as np\n",
"import matplotlib"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>C00</th>\n",
" <th>C01</th>\n",
" <th>C02</th>\n",
" <th>C03</th>\n",
" <th>C04</th>\n",
" <th>C05</th>\n",
" <th>C06</th>\n",
" <th>C07</th>\n",
" <th>C08</th>\n",
" <th>C09</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>R00</th>\n",
" <td>79</td>\n",
" <td>19</td>\n",
" <td>21</td>\n",
" <td>99</td>\n",
" <td>35</td>\n",
" <td>59</td>\n",
" <td>44</td>\n",
" <td>25</td>\n",
" <td>75</td>\n",
" <td>58</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R01</th>\n",
" <td>25</td>\n",
" <td>39</td>\n",
" <td>89</td>\n",
" <td>66</td>\n",
" <td>9</td>\n",
" <td>41</td>\n",
" <td>6</td>\n",
" <td>69</td>\n",
" <td>63</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R02</th>\n",
" <td>37</td>\n",
" <td>64</td>\n",
" <td>31</td>\n",
" <td>69</td>\n",
" <td>61</td>\n",
" <td>97</td>\n",
" <td>5</td>\n",
" <td>11</td>\n",
" <td>76</td>\n",
" <td>57</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R03</th>\n",
" <td>74</td>\n",
" <td>61</td>\n",
" <td>100</td>\n",
" <td>6</td>\n",
" <td>58</td>\n",
" <td>80</td>\n",
" <td>95</td>\n",
" <td>50</td>\n",
" <td>15</td>\n",
" <td>51</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R04</th>\n",
" <td>79</td>\n",
" <td>60</td>\n",
" <td>83</td>\n",
" <td>85</td>\n",
" <td>16</td>\n",
" <td>5</td>\n",
" <td>16</td>\n",
" <td>69</td>\n",
" <td>5</td>\n",
" <td>20</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R05</th>\n",
" <td>45</td>\n",
" <td>26</td>\n",
" <td>73</td>\n",
" <td>73</td>\n",
" <td>100</td>\n",
" <td>60</td>\n",
" <td>21</td>\n",
" <td>19</td>\n",
" <td>95</td>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R06</th>\n",
" <td>12</td>\n",
" <td>29</td>\n",
" <td>18</td>\n",
" <td>98</td>\n",
" <td>62</td>\n",
" <td>68</td>\n",
" <td>92</td>\n",
" <td>29</td>\n",
" <td>74</td>\n",
" <td>96</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R07</th>\n",
" <td>36</td>\n",
" <td>32</td>\n",
" <td>22</td>\n",
" <td>4</td>\n",
" <td>66</td>\n",
" <td>25</td>\n",
" <td>63</td>\n",
" <td>51</td>\n",
" <td>59</td>\n",
" <td>14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R08</th>\n",
" <td>55</td>\n",
" <td>53</td>\n",
" <td>89</td>\n",
" <td>13</td>\n",
" <td>84</td>\n",
" <td>87</td>\n",
" <td>74</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>64</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R09</th>\n",
" <td>46</td>\n",
" <td>74</td>\n",
" <td>36</td>\n",
" <td>54</td>\n",
" <td>21</td>\n",
" <td>12</td>\n",
" <td>68</td>\n",
" <td>33</td>\n",
" <td>80</td>\n",
" <td>25</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" C00 C01 C02 C03 C04 C05 C06 C07 C08 C09\n",
"R00 79 19 21 99 35 59 44 25 75 58\n",
"R01 25 39 89 66 9 41 6 69 63 3\n",
"R02 37 64 31 69 61 97 5 11 76 57\n",
"R03 74 61 100 6 58 80 95 50 15 51\n",
"R04 79 60 83 85 16 5 16 69 5 20\n",
"R05 45 26 73 73 100 60 21 19 95 12\n",
"R06 12 29 18 98 62 68 92 29 74 96\n",
"R07 36 32 22 4 66 25 63 51 59 14\n",
"R08 55 53 89 13 84 87 74 3 2 64\n",
"R09 46 74 36 54 21 12 68 33 80 25"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.read_json('./data/sampledf.json')\n",
"df.index = [\"R{:02d}\".format(i) for i in range(len(df))]\n",
"df.columns = [\"C{:02d}\".format(i) for i in range(len(df.columns))]\n",
"df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Adding and deleting Series in a DataFrame"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>C00</th>\n",
" <th>C01</th>\n",
" <th>C02</th>\n",
" <th>C03</th>\n",
" <th>C04</th>\n",
" <th>C05</th>\n",
" <th>C06</th>\n",
" <th>C07</th>\n",
" <th>C08</th>\n",
" <th>C09</th>\n",
" <th>C10</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>R00</th>\n",
" <td>79</td>\n",
" <td>19</td>\n",
" <td>21</td>\n",
" <td>99</td>\n",
" <td>35</td>\n",
" <td>59</td>\n",
" <td>44</td>\n",
" <td>25</td>\n",
" <td>75</td>\n",
" <td>58</td>\n",
" <td>71</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R01</th>\n",
" <td>25</td>\n",
" <td>39</td>\n",
" <td>89</td>\n",
" <td>66</td>\n",
" <td>9</td>\n",
" <td>41</td>\n",
" <td>6</td>\n",
" <td>69</td>\n",
" <td>63</td>\n",
" <td>3</td>\n",
" <td>18</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R02</th>\n",
" <td>37</td>\n",
" <td>64</td>\n",
" <td>31</td>\n",
" <td>69</td>\n",
" <td>61</td>\n",
" <td>97</td>\n",
" <td>5</td>\n",
" <td>11</td>\n",
" <td>76</td>\n",
" <td>57</td>\n",
" <td>89</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R03</th>\n",
" <td>74</td>\n",
" <td>61</td>\n",
" <td>100</td>\n",
" <td>6</td>\n",
" <td>58</td>\n",
" <td>80</td>\n",
" <td>95</td>\n",
" <td>50</td>\n",
" <td>15</td>\n",
" <td>51</td>\n",
" <td>18</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R04</th>\n",
" <td>79</td>\n",
" <td>60</td>\n",
" <td>83</td>\n",
" <td>85</td>\n",
" <td>16</td>\n",
" <td>5</td>\n",
" <td>16</td>\n",
" <td>69</td>\n",
" <td>5</td>\n",
" <td>20</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R05</th>\n",
" <td>45</td>\n",
" <td>26</td>\n",
" <td>73</td>\n",
" <td>73</td>\n",
" <td>100</td>\n",
" <td>60</td>\n",
" <td>21</td>\n",
" <td>19</td>\n",
" <td>95</td>\n",
" <td>12</td>\n",
" <td>77</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R06</th>\n",
" <td>12</td>\n",
" <td>29</td>\n",
" <td>18</td>\n",
" <td>98</td>\n",
" <td>62</td>\n",
" <td>68</td>\n",
" <td>92</td>\n",
" <td>29</td>\n",
" <td>74</td>\n",
" <td>96</td>\n",
" <td>74</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R07</th>\n",
" <td>36</td>\n",
" <td>32</td>\n",
" <td>22</td>\n",
" <td>4</td>\n",
" <td>66</td>\n",
" <td>25</td>\n",
" <td>63</td>\n",
" <td>51</td>\n",
" <td>59</td>\n",
" <td>14</td>\n",
" <td>67</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R08</th>\n",
" <td>55</td>\n",
" <td>53</td>\n",
" <td>89</td>\n",
" <td>13</td>\n",
" <td>84</td>\n",
" <td>87</td>\n",
" <td>74</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>64</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R09</th>\n",
" <td>46</td>\n",
" <td>74</td>\n",
" <td>36</td>\n",
" <td>54</td>\n",
" <td>21</td>\n",
" <td>12</td>\n",
" <td>68</td>\n",
" <td>33</td>\n",
" <td>80</td>\n",
" <td>25</td>\n",
" <td>18</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" C00 C01 C02 C03 C04 C05 C06 C07 C08 C09 C10\n",
"R00 79 19 21 99 35 59 44 25 75 58 71\n",
"R01 25 39 89 66 9 41 6 69 63 3 18\n",
"R02 37 64 31 69 61 97 5 11 76 57 89\n",
"R03 74 61 100 6 58 80 95 50 15 51 18\n",
"R04 79 60 83 85 16 5 16 69 5 20 1\n",
"R05 45 26 73 73 100 60 21 19 95 12 77\n",
"R06 12 29 18 98 62 68 92 29 74 96 74\n",
"R07 36 32 22 4 66 25 63 51 59 14 67\n",
"R08 55 53 89 13 84 87 74 3 2 64 5\n",
"R09 46 74 36 54 21 12 68 33 80 25 18"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['C10'] = [random.randint(1, 100) for i in range(len(df))]\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 63\n",
"1 99\n",
"2 38\n",
"3 73\n",
"4 99\n",
"5 77\n",
"6 97\n",
"7 11\n",
"8 26\n",
"9 83\n",
"dtype: int64"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# caveat when adding a Series:\n",
"new_series = pd.Series(list([random.randint(1, 100) for i in
range(len(df))]))\n",
"new_series"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>C00</th>\n",
" <th>C01</th>\n",
" <th>C02</th>\n",
" <th>C03</th>\n",
" <th>C04</th>\n",
" <th>C05</th>\n",
" <th>C06</th>\n",
" <th>C07</th>\n",
" <th>C08</th>\n",
" <th>C09</th>\n",
" <th>C10</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>R00</th>\n",
" <td>79</td>\n",
" <td>19</td>\n",
" <td>21</td>\n",
" <td>99</td>\n",
" <td>35</td>\n",
" <td>59</td>\n",
" <td>44</td>\n",
" <td>25</td>\n",
" <td>75</td>\n",
" <td>58</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R01</th>\n",
" <td>25</td>\n",
" <td>39</td>\n",
" <td>89</td>\n",
" <td>66</td>\n",
" <td>9</td>\n",
" <td>41</td>\n",
" <td>6</td>\n",
" <td>69</td>\n",
" <td>63</td>\n",
" <td>3</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R02</th>\n",
" <td>37</td>\n",
" <td>64</td>\n",
" <td>31</td>\n",
" <td>69</td>\n",
" <td>61</td>\n",
" <td>97</td>\n",
" <td>5</td>\n",
" <td>11</td>\n",
" <td>76</td>\n",
" <td>57</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R03</th>\n",
" <td>74</td>\n",
" <td>61</td>\n",
" <td>100</td>\n",
" <td>6</td>\n",
" <td>58</td>\n",
" <td>80</td>\n",
" <td>95</td>\n",
" <td>50</td>\n",
" <td>15</td>\n",
" <td>51</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R04</th>\n",
" <td>79</td>\n",
" <td>60</td>\n",
" <td>83</td>\n",
" <td>85</td>\n",
" <td>16</td>\n",
" <td>5</td>\n",
" <td>16</td>\n",
" <td>69</td>\n",
" <td>5</td>\n",
" <td>20</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R05</th>\n",
" <td>45</td>\n",
" <td>26</td>\n",
" <td>73</td>\n",
" <td>73</td>\n",
" <td>100</td>\n",
" <td>60</td>\n",
" <td>21</td>\n",
" <td>19</td>\n",
" <td>95</td>\n",
" <td>12</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R06</th>\n",
" <td>12</td>\n",
" <td>29</td>\n",
" <td>18</td>\n",
" <td>98</td>\n",
" <td>62</td>\n",
" <td>68</td>\n",
" <td>92</td>\n",
" <td>29</td>\n",
" <td>74</td>\n",
" <td>96</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R07</th>\n",
" <td>36</td>\n",
" <td>32</td>\n",
" <td>22</td>\n",
" <td>4</td>\n",
" <td>66</td>\n",
" <td>25</td>\n",
" <td>63</td>\n",
" <td>51</td>\n",
" <td>59</td>\n",
" <td>14</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R08</th>\n",
" <td>55</td>\n",
" <td>53</td>\n",
" <td>89</td>\n",
" <td>13</td>\n",
" <td>84</td>\n",
" <td>87</td>\n",
" <td>74</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>64</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R09</th>\n",
" <td>46</td>\n",
" <td>74</td>\n",
" <td>36</td>\n",
" <td>54</td>\n",
" <td>21</td>\n",
" <td>12</td>\n",
" <td>68</td>\n",
" <td>33</td>\n",
" <td>80</td>\n",
" <td>25</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" C00 C01 C02 C03 C04 C05 C06 C07 C08 C09 C10\n",
"R00 79 19 21 99 35 59 44 25 75 58 NaN\n",
"R01 25 39 89 66 9 41 6 69 63 3 NaN\n",
"R02 37 64 31 69 61 97 5 11 76 57 NaN\n",
"R03 74 61 100 6 58 80 95 50 15 51 NaN\n",
"R04 79 60 83 85 16 5 16 69 5 20 NaN\n",
"R05 45 26 73 73 100 60 21 19 95 12 NaN\n",
"R06 12 29 18 98 62 68 92 29 74 96 NaN\n",
"R07 36 32 22 4 66 25 63 51 59 14 NaN\n",
"R08 55 53 89 13 84 87 74 3 2 64 NaN\n",
"R09 46 74 36 54 21 12 68 33 80 25 NaN"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['C10'] = new_series\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"R00 63\n",
"R01 99\n",
"R02 38\n",
"R03 73\n",
"R04 99\n",
"R05 77\n",
"R06 97\n",
"R07 11\n",
"R08 26\n",
"R09 83\n",
"dtype: int64"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# adding a new Series: mind the index of the DF\n",
"new_series.index = df.index\n",
"new_series"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>C00</th>\n",
" <th>C01</th>\n",
" <th>C02</th>\n",
" <th>C03</th>\n",
" <th>C04</th>\n",
" <th>C05</th>\n",
" <th>C06</th>\n",
" <th>C07</th>\n",
" <th>C08</th>\n",
" <th>C09</th>\n",
" <th>C10</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>R00</th>\n",
" <td>79</td>\n",
" <td>19</td>\n",
" <td>21</td>\n",
" <td>99</td>\n",
" <td>35</td>\n",
" <td>59</td>\n",
" <td>44</td>\n",
" <td>25</td>\n",
" <td>75</td>\n",
" <td>58</td>\n",
" <td>63</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R01</th>\n",
" <td>25</td>\n",
" <td>39</td>\n",
" <td>89</td>\n",
" <td>66</td>\n",
" <td>9</td>\n",
" <td>41</td>\n",
" <td>6</td>\n",
" <td>69</td>\n",
" <td>63</td>\n",
" <td>3</td>\n",
" <td>99</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R02</th>\n",
" <td>37</td>\n",
" <td>64</td>\n",
" <td>31</td>\n",
" <td>69</td>\n",
" <td>61</td>\n",
" <td>97</td>\n",
" <td>5</td>\n",
" <td>11</td>\n",
" <td>76</td>\n",
" <td>57</td>\n",
" <td>38</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R03</th>\n",
" <td>74</td>\n",
" <td>61</td>\n",
" <td>100</td>\n",
" <td>6</td>\n",
" <td>58</td>\n",
" <td>80</td>\n",
" <td>95</td>\n",
" <td>50</td>\n",
" <td>15</td>\n",
" <td>51</td>\n",
" <td>73</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R04</th>\n",
" <td>79</td>\n",
" <td>60</td>\n",
" <td>83</td>\n",
" <td>85</td>\n",
" <td>16</td>\n",
" <td>5</td>\n",
" <td>16</td>\n",
" <td>69</td>\n",
" <td>5</td>\n",
" <td>20</td>\n",
" <td>99</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R05</th>\n",
" <td>45</td>\n",
" <td>26</td>\n",
" <td>73</td>\n",
" <td>73</td>\n",
" <td>100</td>\n",
" <td>60</td>\n",
" <td>21</td>\n",
" <td>19</td>\n",
" <td>95</td>\n",
" <td>12</td>\n",
" <td>77</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R06</th>\n",
" <td>12</td>\n",
" <td>29</td>\n",
" <td>18</td>\n",
" <td>98</td>\n",
" <td>62</td>\n",
" <td>68</td>\n",
" <td>92</td>\n",
" <td>29</td>\n",
" <td>74</td>\n",
" <td>96</td>\n",
" <td>97</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R07</th>\n",
" <td>36</td>\n",
" <td>32</td>\n",
" <td>22</td>\n",
" <td>4</td>\n",
" <td>66</td>\n",
" <td>25</td>\n",
" <td>63</td>\n",
" <td>51</td>\n",
" <td>59</td>\n",
" <td>14</td>\n",
" <td>11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R08</th>\n",
" <td>55</td>\n",
" <td>53</td>\n",
" <td>89</td>\n",
" <td>13</td>\n",
" <td>84</td>\n",
" <td>87</td>\n",
" <td>74</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>64</td>\n",
" <td>26</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R09</th>\n",
" <td>46</td>\n",
" <td>74</td>\n",
" <td>36</td>\n",
" <td>54</td>\n",
" <td>21</td>\n",
" <td>12</td>\n",
" <td>68</td>\n",
" <td>33</td>\n",
" <td>80</td>\n",
" <td>25</td>\n",
" <td>83</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" C00 C01 C02 C03 C04 C05 C06 C07 C08 C09 C10\n",
"R00 79 19 21 99 35 59 44 25 75 58 63\n",
"R01 25 39 89 66 9 41 6 69 63 3 99\n",
"R02 37 64 31 69 61 97 5 11 76 57 38\n",
"R03 74 61 100 6 58 80 95 50 15 51 73\n",
"R04 79 60 83 85 16 5 16 69 5 20 99\n",
"R05 45 26 73 73 100 60 21 19 95 12 77\n",
"R06 12 29 18 98 62 68 92 29 74 96 97\n",
"R07 36 32 22 4 66 25 63 51 59 14 11\n",
"R08 55 53 89 13 84 87 74 3 2 64 26\n",
"R09 46 74 36 54 21 12 68 33 80 25 83"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['C10'] = new_series\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>C00</th>\n",
" <th>C01</th>\n",
" <th>C02</th>\n",
" <th>C03</th>\n",
" <th>C04</th>\n",
" <th>C05</th>\n",
" <th>C06</th>\n",
" <th>C07</th>\n",
" <th>C08</th>\n",
" <th>C09</th>\n",
" <th>C10</th>\n",
" <th>C11</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>R00</th>\n",
" <td>79</td>\n",
" <td>19</td>\n",
" <td>21</td>\n",
" <td>99</td>\n",
" <td>35</td>\n",
" <td>59</td>\n",
" <td>44</td>\n",
" <td>25</td>\n",
" <td>75</td>\n",
" <td>58</td>\n",
" <td>63</td>\n",
" <td>63</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R01</th>\n",
" <td>25</td>\n",
" <td>39</td>\n",
" <td>89</td>\n",
" <td>66</td>\n",
" <td>9</td>\n",
" <td>41</td>\n",
" <td>6</td>\n",
" <td>69</td>\n",
" <td>63</td>\n",
" <td>3</td>\n",
" <td>99</td>\n",
" <td>99</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R02</th>\n",
" <td>37</td>\n",
" <td>64</td>\n",
" <td>31</td>\n",
" <td>69</td>\n",
" <td>61</td>\n",
" <td>97</td>\n",
" <td>5</td>\n",
" <td>11</td>\n",
" <td>76</td>\n",
" <td>57</td>\n",
" <td>38</td>\n",
" <td>38</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R03</th>\n",
" <td>74</td>\n",
" <td>61</td>\n",
" <td>100</td>\n",
" <td>6</td>\n",
" <td>58</td>\n",
" <td>80</td>\n",
" <td>95</td>\n",
" <td>50</td>\n",
" <td>15</td>\n",
" <td>51</td>\n",
" <td>73</td>\n",
" <td>73</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R04</th>\n",
" <td>79</td>\n",
" <td>60</td>\n",
" <td>83</td>\n",
" <td>85</td>\n",
" <td>16</td>\n",
" <td>5</td>\n",
" <td>16</td>\n",
" <td>69</td>\n",
" <td>5</td>\n",
" <td>20</td>\n",
" <td>99</td>\n",
" <td>99</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R05</th>\n",
" <td>45</td>\n",
" <td>26</td>\n",
" <td>73</td>\n",
" <td>73</td>\n",
" <td>100</td>\n",
" <td>60</td>\n",
" <td>21</td>\n",
" <td>19</td>\n",
" <td>95</td>\n",
" <td>12</td>\n",
" <td>77</td>\n",
" <td>77</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R06</th>\n",
" <td>12</td>\n",
" <td>29</td>\n",
" <td>18</td>\n",
" <td>98</td>\n",
" <td>62</td>\n",
" <td>68</td>\n",
" <td>92</td>\n",
" <td>29</td>\n",
" <td>74</td>\n",
" <td>96</td>\n",
" <td>97</td>\n",
" <td>97</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R07</th>\n",
" <td>36</td>\n",
" <td>32</td>\n",
" <td>22</td>\n",
" <td>4</td>\n",
" <td>66</td>\n",
" <td>25</td>\n",
" <td>63</td>\n",
" <td>51</td>\n",
" <td>59</td>\n",
" <td>14</td>\n",
" <td>11</td>\n",
" <td>11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R08</th>\n",
" <td>55</td>\n",
" <td>53</td>\n",
" <td>89</td>\n",
" <td>13</td>\n",
" <td>84</td>\n",
" <td>87</td>\n",
" <td>74</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>64</td>\n",
" <td>26</td>\n",
" <td>26</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R09</th>\n",
" <td>46</td>\n",
" <td>74</td>\n",
" <td>36</td>\n",
" <td>54</td>\n",
" <td>21</td>\n",
" <td>12</td>\n",
" <td>68</td>\n",
" <td>33</td>\n",
" <td>80</td>\n",
" <td>25</td>\n",
" <td>83</td>\n",
" <td>83</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" C00 C01 C02 C03 C04 C05 C06 C07 C08 C09 C10 C11\n",
"R00 79 19 21 99 35 59 44 25 75 58 63 63\n",
"R01 25 39 89 66 9 41 6 69 63 3 99 99\n",
"R02 37 64 31 69 61 97 5 11 76 57 38 38\n",
"R03 74 61 100 6 58 80 95 50 15 51 73 73\n",
"R04 79 60 83 85 16 5 16 69 5 20 99 99\n",
"R05 45 26 73 73 100 60 21 19 95 12 77 77\n",
"R06 12 29 18 98 62 68 92 29 74 96 97 97\n",
"R07 36 32 22 4 66 25 63 51 59 14 11 11\n",
"R08 55 53 89 13 84 87 74 3 2 64 26 26\n",
"R09 46 74 36 54 21 12 68 33 80 25 83 83"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['C11'] = new_series\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>C00</th>\n",
" <th>C01</th>\n",
" <th>C02</th>\n",
" <th>C03</th>\n",
" <th>C04</th>\n",
" <th>C05</th>\n",
" <th>C06</th>\n",
" <th>C07</th>\n",
" <th>C08</th>\n",
" <th>C09</th>\n",
" <th>C10</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>R00</th>\n",
" <td>79</td>\n",
" <td>19</td>\n",
" <td>21</td>\n",
" <td>99</td>\n",
" <td>35</td>\n",
" <td>59</td>\n",
" <td>44</td>\n",
" <td>25</td>\n",
" <td>75</td>\n",
" <td>58</td>\n",
" <td>63</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R01</th>\n",
" <td>25</td>\n",
" <td>39</td>\n",
" <td>89</td>\n",
" <td>66</td>\n",
" <td>9</td>\n",
" <td>41</td>\n",
" <td>6</td>\n",
" <td>69</td>\n",
" <td>63</td>\n",
" <td>3</td>\n",
" <td>99</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R02</th>\n",
" <td>37</td>\n",
" <td>64</td>\n",
" <td>31</td>\n",
" <td>69</td>\n",
" <td>61</td>\n",
" <td>97</td>\n",
" <td>5</td>\n",
" <td>11</td>\n",
" <td>76</td>\n",
" <td>57</td>\n",
" <td>38</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R03</th>\n",
" <td>74</td>\n",
" <td>61</td>\n",
" <td>100</td>\n",
" <td>6</td>\n",
" <td>58</td>\n",
" <td>80</td>\n",
" <td>95</td>\n",
" <td>50</td>\n",
" <td>15</td>\n",
" <td>51</td>\n",
" <td>73</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R04</th>\n",
" <td>79</td>\n",
" <td>60</td>\n",
" <td>83</td>\n",
" <td>85</td>\n",
" <td>16</td>\n",
" <td>5</td>\n",
" <td>16</td>\n",
" <td>69</td>\n",
" <td>5</td>\n",
" <td>20</td>\n",
" <td>99</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R05</th>\n",
" <td>45</td>\n",
" <td>26</td>\n",
" <td>73</td>\n",
" <td>73</td>\n",
" <td>100</td>\n",
" <td>60</td>\n",
" <td>21</td>\n",
" <td>19</td>\n",
" <td>95</td>\n",
" <td>12</td>\n",
" <td>77</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R06</th>\n",
" <td>12</td>\n",
" <td>29</td>\n",
" <td>18</td>\n",
" <td>98</td>\n",
" <td>62</td>\n",
" <td>68</td>\n",
" <td>92</td>\n",
" <td>29</td>\n",
" <td>74</td>\n",
" <td>96</td>\n",
" <td>97</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R07</th>\n",
" <td>36</td>\n",
" <td>32</td>\n",
" <td>22</td>\n",
" <td>4</td>\n",
" <td>66</td>\n",
" <td>25</td>\n",
" <td>63</td>\n",
" <td>51</td>\n",
" <td>59</td>\n",
" <td>14</td>\n",
" <td>11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R08</th>\n",
" <td>55</td>\n",
" <td>53</td>\n",
" <td>89</td>\n",
" <td>13</td>\n",
" <td>84</td>\n",
" <td>87</td>\n",
" <td>74</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>64</td>\n",
" <td>26</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R09</th>\n",
" <td>46</td>\n",
" <td>74</td>\n",
" <td>36</td>\n",
" <td>54</td>\n",
" <td>21</td>\n",
" <td>12</td>\n",
" <td>68</td>\n",
" <td>33</td>\n",
" <td>80</td>\n",
" <td>25</td>\n",
" <td>83</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" C00 C01 C02 C03 C04 C05 C06 C07 C08 C09 C10\n",
"R00 79 19 21 99 35 59 44 25 75 58 63\n",
"R01 25 39 89 66 9 41 6 69 63 3 99\n",
"R02 37 64 31 69 61 97 5 11 76 57 38\n",
"R03 74 61 100 6 58 80 95 50 15 51 73\n",
"R04 79 60 83 85 16 5 16 69 5 20 99\n",
"R05 45 26 73 73 100 60 21 19 95 12 77\n",
"R06 12 29 18 98 62 68 92 29 74 96 97\n",
"R07 36 32 22 4 66 25 63 51 59 14 11\n",
"R08 55 53 89 13 84 87 74 3 2 64 26\n",
"R09 46 74 36 54 21 12 68 33 80 25 83"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"del df['C11']\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"scrolled": true
},
"outputs": [
{
"ename": "KeyError",
"evalue": "'C12'",
"output_type": "error",
"traceback": [
"\
u001b[0;31m------------------------------------------------------------------------
---\u001b[0m",
"\u001b[0;31mKeyError\u001b[0m Traceback
(most recent call last)",
"\u001b[0;32m~/anaconda3/envs/pyds/lib/python3.9/site-packages/pandas/core/
indexes/base.py\u001b[0m in \u001b[0;36mget_loc\u001b[0;34m(self, key, method,
tolerance)\u001b[0m\n\u001b[1;32m 3360\u001b[0m \u001b[0;32mtry\
u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\
n\u001b[0;32m-> 3361\u001b[0;31m \u001b[0;32mreturn\u001b[0m \
u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\
u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcasted_key\
u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\
n\u001b[0m\u001b[1;32m 3362\u001b[0m \u001b[0;32mexcept\u001b[0m \
u001b[0mKeyError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0merr\u001b[0m\
u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/envs/pyds/lib/python3.9/site-packages/pandas/_libs/
index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\
u001b[0;34m()\u001b[0m\n",
"\u001b[0;32m~/anaconda3/envs/pyds/lib/python3.9/site-packages/pandas/_libs/
index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\
u001b[0;34m()\u001b[0m\n",
"\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \
u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\
u001b[0m\n",
"\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \
u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\
u001b[0m\n",
"\u001b[0;31mKeyError\u001b[0m: 'C12'",
"\nThe above exception was the direct cause of the following exception:\n",
"\u001b[0;31mKeyError\u001b[0m Traceback
(most recent call last)",

"\u001b[0;32m/var/folders/16/xdrp9d8s5510rt6fwr4j6llr0000gn/T/ipykernel_42585/39014
31452.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m---->
1\u001b[0;31m \u001b[0;32mdel\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\
u001b[0;34m'C12'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\
u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m~/anaconda3/envs/pyds/lib/python3.9/site-packages/pandas/core/
generic.py\u001b[0m in \u001b[0;36m__delitem__\u001b[0;34m(self, key)\u001b[0m\n\
u001b[1;32m 3961\u001b[0m \u001b[0;31m# there was no match, this call
should raise the appropriate\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\
u001b[0m\n\u001b[1;32m 3962\u001b[0m \u001b[0;31m# exception:\
u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3963\
u001b[0;31m \u001b[0mloc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\
u001b[0m\u001b[0;34m.\u001b[0m\u001b[0maxes\u001b[0m\u001b[0;34m[\u001b[0m\
u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\
u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\
u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\
u001b[0m\u001b[1;32m 3964\u001b[0m \u001b[0mself\u001b[0m\
u001b[0;34m.\u001b[0m\u001b[0m_mgr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\
u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_mgr\u001b[0m\u001b[0;34m.\u001b[0m\
u001b[0midelete\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mloc\u001b[0m\u001b[0;34m)\
u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3965\
u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/envs/pyds/lib/python3.9/site-packages/pandas/core/
indexes/base.py\u001b[0m in \u001b[0;36mget_loc\u001b[0;34m(self, key, method,
tolerance)\u001b[0m\n\u001b[1;32m 3361\u001b[0m \
u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\
u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\
u001b[0;34m(\u001b[0m\u001b[0mcasted_key\u001b[0m\u001b[0;34m)\u001b[0m\
u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3362\u001b[0m
\u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m \u001b[0;32mas\u001b[0m \
u001b[0merr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\
u001b[0m\u001b[0m\n\u001b[0;32m-> 3363\u001b[0;31m \
u001b[0;32mraise\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m(\u001b[0m\
u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0merr\
u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m
3364\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3365\
u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mis_scalar\u001b[0m\u001b[0;34m(\
u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mand\u001b[0m \
u001b[0misna\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\
u001b[0m \u001b[0;32mand\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mself\u001b[0m\
u001b[0;34m.\u001b[0m\u001b[0mhasnans\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\
u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mKeyError\u001b[0m: 'C12'"
]
}
],
"source": [
"del df['C12']"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>C00</th>\n",
" <th>C01</th>\n",
" <th>C02</th>\n",
" <th>C03</th>\n",
" <th>C04</th>\n",
" <th>C05</th>\n",
" <th>C06</th>\n",
" <th>C07</th>\n",
" <th>C08</th>\n",
" <th>C09</th>\n",
" <th>C10</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>R00</th>\n",
" <td>79</td>\n",
" <td>19</td>\n",
" <td>21</td>\n",
" <td>99</td>\n",
" <td>35</td>\n",
" <td>59</td>\n",
" <td>44</td>\n",
" <td>25</td>\n",
" <td>75</td>\n",
" <td>58</td>\n",
" <td>63</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R01</th>\n",
" <td>25</td>\n",
" <td>39</td>\n",
" <td>89</td>\n",
" <td>66</td>\n",
" <td>9</td>\n",
" <td>41</td>\n",
" <td>6</td>\n",
" <td>69</td>\n",
" <td>63</td>\n",
" <td>3</td>\n",
" <td>99</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R02</th>\n",
" <td>37</td>\n",
" <td>64</td>\n",
" <td>31</td>\n",
" <td>69</td>\n",
" <td>61</td>\n",
" <td>97</td>\n",
" <td>5</td>\n",
" <td>11</td>\n",
" <td>76</td>\n",
" <td>57</td>\n",
" <td>38</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R03</th>\n",
" <td>74</td>\n",
" <td>61</td>\n",
" <td>100</td>\n",
" <td>6</td>\n",
" <td>58</td>\n",
" <td>80</td>\n",
" <td>95</td>\n",
" <td>50</td>\n",
" <td>15</td>\n",
" <td>51</td>\n",
" <td>73</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R04</th>\n",
" <td>79</td>\n",
" <td>60</td>\n",
" <td>83</td>\n",
" <td>85</td>\n",
" <td>16</td>\n",
" <td>5</td>\n",
" <td>16</td>\n",
" <td>69</td>\n",
" <td>5</td>\n",
" <td>20</td>\n",
" <td>99</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R05</th>\n",
" <td>45</td>\n",
" <td>26</td>\n",
" <td>73</td>\n",
" <td>73</td>\n",
" <td>100</td>\n",
" <td>60</td>\n",
" <td>21</td>\n",
" <td>19</td>\n",
" <td>95</td>\n",
" <td>12</td>\n",
" <td>77</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R06</th>\n",
" <td>12</td>\n",
" <td>29</td>\n",
" <td>18</td>\n",
" <td>98</td>\n",
" <td>62</td>\n",
" <td>68</td>\n",
" <td>92</td>\n",
" <td>29</td>\n",
" <td>74</td>\n",
" <td>96</td>\n",
" <td>97</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R07</th>\n",
" <td>36</td>\n",
" <td>32</td>\n",
" <td>22</td>\n",
" <td>4</td>\n",
" <td>66</td>\n",
" <td>25</td>\n",
" <td>63</td>\n",
" <td>51</td>\n",
" <td>59</td>\n",
" <td>14</td>\n",
" <td>11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R08</th>\n",
" <td>55</td>\n",
" <td>53</td>\n",
" <td>89</td>\n",
" <td>13</td>\n",
" <td>84</td>\n",
" <td>87</td>\n",
" <td>74</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>64</td>\n",
" <td>26</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R09</th>\n",
" <td>46</td>\n",
" <td>74</td>\n",
" <td>36</td>\n",
" <td>54</td>\n",
" <td>21</td>\n",
" <td>12</td>\n",
" <td>68</td>\n",
" <td>33</td>\n",
" <td>80</td>\n",
" <td>25</td>\n",
" <td>83</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" C00 C01 C02 C03 C04 C05 C06 C07 C08 C09 C10\n",
"R00 79 19 21 99 35 59 44 25 75 58 63\n",
"R01 25 39 89 66 9 41 6 69 63 3 99\n",
"R02 37 64 31 69 61 97 5 11 76 57 38\n",
"R03 74 61 100 6 58 80 95 50 15 51 73\n",
"R04 79 60 83 85 16 5 16 69 5 20 99\n",
"R05 45 26 73 73 100 60 21 19 95 12 77\n",
"R06 12 29 18 98 62 68 92 29 74 96 97\n",
"R07 36 32 22 4 66 25 63 51 59 14 11\n",
"R08 55 53 89 13 84 87 74 3 2 64 26\n",
"R09 46 74 36 54 21 12 68 33 80 25 83"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# avoid KeyErrors if unsure Series exists\n",
"df.drop(['C12'], errors='ignore')"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>C03</th>\n",
" <th>C05</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>R00</th>\n",
" <td>99</td>\n",
" <td>59</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R01</th>\n",
" <td>66</td>\n",
" <td>41</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R02</th>\n",
" <td>69</td>\n",
" <td>97</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R03</th>\n",
" <td>6</td>\n",
" <td>80</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R04</th>\n",
" <td>85</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R05</th>\n",
" <td>73</td>\n",
" <td>60</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R06</th>\n",
" <td>98</td>\n",
" <td>68</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R07</th>\n",
" <td>4</td>\n",
" <td>25</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R08</th>\n",
" <td>13</td>\n",
" <td>87</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R09</th>\n",
" <td>54</td>\n",
" <td>12</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" C03 C05\n",
"R00 99 59\n",
"R01 66 41\n",
"R02 69 97\n",
"R03 6 80\n",
"R04 85 5\n",
"R05 73 60\n",
"R06 98 68\n",
"R07 4 25\n",
"R08 13 87\n",
"R09 54 12"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# getting a subset is simpler than deleting many columns\n",
"dfs = df.loc[:, ['C03', 'C05']]\n",
"dfs"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"dfs['C04'] = [random.randint(1, 100) for i in range(len(df))]"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>C03</th>\n",
" <th>C05</th>\n",
" <th>C04</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>R00</th>\n",
" <td>99</td>\n",
" <td>59</td>\n",
" <td>47</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R01</th>\n",
" <td>66</td>\n",
" <td>41</td>\n",
" <td>89</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R02</th>\n",
" <td>69</td>\n",
" <td>97</td>\n",
" <td>67</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R03</th>\n",
" <td>6</td>\n",
" <td>80</td>\n",
" <td>8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R04</th>\n",
" <td>85</td>\n",
" <td>5</td>\n",
" <td>79</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R05</th>\n",
" <td>73</td>\n",
" <td>60</td>\n",
" <td>74</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R06</th>\n",
" <td>98</td>\n",
" <td>68</td>\n",
" <td>23</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R07</th>\n",
" <td>4</td>\n",
" <td>25</td>\n",
" <td>94</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R08</th>\n",
" <td>13</td>\n",
" <td>87</td>\n",
" <td>97</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R09</th>\n",
" <td>54</td>\n",
" <td>12</td>\n",
" <td>15</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" C03 C05 C04\n",
"R00 99 59 47\n",
"R01 66 41 89\n",
"R02 69 97 67\n",
"R03 6 80 8\n",
"R04 85 5 79\n",
"R05 73 60 74\n",
"R06 98 68 23\n",
"R07 4 25 94\n",
"R08 13 87 97\n",
"R09 54 12 15"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dfs"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>C00</th>\n",
" <th>C01</th>\n",
" <th>C02</th>\n",
" <th>C03</th>\n",
" <th>C04</th>\n",
" <th>C05</th>\n",
" <th>C06</th>\n",
" <th>C07</th>\n",
" <th>C08</th>\n",
" <th>C09</th>\n",
" <th>C10</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>R00</th>\n",
" <td>79</td>\n",
" <td>19</td>\n",
" <td>21</td>\n",
" <td>99</td>\n",
" <td>35</td>\n",
" <td>59</td>\n",
" <td>44</td>\n",
" <td>25</td>\n",
" <td>75</td>\n",
" <td>58</td>\n",
" <td>63</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R01</th>\n",
" <td>25</td>\n",
" <td>39</td>\n",
" <td>89</td>\n",
" <td>66</td>\n",
" <td>9</td>\n",
" <td>41</td>\n",
" <td>6</td>\n",
" <td>69</td>\n",
" <td>63</td>\n",
" <td>3</td>\n",
" <td>99</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R02</th>\n",
" <td>37</td>\n",
" <td>64</td>\n",
" <td>31</td>\n",
" <td>69</td>\n",
" <td>61</td>\n",
" <td>97</td>\n",
" <td>5</td>\n",
" <td>11</td>\n",
" <td>76</td>\n",
" <td>57</td>\n",
" <td>38</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R03</th>\n",
" <td>74</td>\n",
" <td>61</td>\n",
" <td>100</td>\n",
" <td>6</td>\n",
" <td>58</td>\n",
" <td>80</td>\n",
" <td>95</td>\n",
" <td>50</td>\n",
" <td>15</td>\n",
" <td>51</td>\n",
" <td>73</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R04</th>\n",
" <td>79</td>\n",
" <td>60</td>\n",
" <td>83</td>\n",
" <td>85</td>\n",
" <td>16</td>\n",
" <td>5</td>\n",
" <td>16</td>\n",
" <td>69</td>\n",
" <td>5</td>\n",
" <td>20</td>\n",
" <td>99</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R05</th>\n",
" <td>45</td>\n",
" <td>26</td>\n",
" <td>73</td>\n",
" <td>73</td>\n",
" <td>100</td>\n",
" <td>60</td>\n",
" <td>21</td>\n",
" <td>19</td>\n",
" <td>95</td>\n",
" <td>12</td>\n",
" <td>77</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R06</th>\n",
" <td>12</td>\n",
" <td>29</td>\n",
" <td>18</td>\n",
" <td>98</td>\n",
" <td>62</td>\n",
" <td>68</td>\n",
" <td>92</td>\n",
" <td>29</td>\n",
" <td>74</td>\n",
" <td>96</td>\n",
" <td>97</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R07</th>\n",
" <td>36</td>\n",
" <td>32</td>\n",
" <td>22</td>\n",
" <td>4</td>\n",
" <td>66</td>\n",
" <td>25</td>\n",
" <td>63</td>\n",
" <td>51</td>\n",
" <td>59</td>\n",
" <td>14</td>\n",
" <td>11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R08</th>\n",
" <td>55</td>\n",
" <td>53</td>\n",
" <td>89</td>\n",
" <td>13</td>\n",
" <td>84</td>\n",
" <td>87</td>\n",
" <td>74</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>64</td>\n",
" <td>26</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R09</th>\n",
" <td>46</td>\n",
" <td>74</td>\n",
" <td>36</td>\n",
" <td>54</td>\n",
" <td>21</td>\n",
" <td>12</td>\n",
" <td>68</td>\n",
" <td>33</td>\n",
" <td>80</td>\n",
" <td>25</td>\n",
" <td>83</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" C00 C01 C02 C03 C04 C05 C06 C07 C08 C09 C10\n",
"R00 79 19 21 99 35 59 44 25 75 58 63\n",
"R01 25 39 89 66 9 41 6 69 63 3 99\n",
"R02 37 64 31 69 61 97 5 11 76 57 38\n",
"R03 74 61 100 6 58 80 95 50 15 51 73\n",
"R04 79 60 83 85 16 5 16 69 5 20 99\n",
"R05 45 26 73 73 100 60 21 19 95 12 77\n",
"R06 12 29 18 98 62 68 92 29 74 96 97\n",
"R07 36 32 22 4 66 25 63 51 59 14 11\n",
"R08 55 53 89 13 84 87 74 3 2 64 26\n",
"R09 46 74 36 54 21 12 68 33 80 25 83"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Mangle your data"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**recall: NumPy Broadcasting**"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[0.61903208],\n",
" [0.76795317],\n",
" [0.26790371]])"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a = np.random.rand(3,1)\n",
"a"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[0.58635195],\n",
" [0.64208162],\n",
" [0.12870538]])"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"b = np.random.rand(3,1)\n",
"b"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[1.20538403],\n",
" [1.41003479],\n",
" [0.39660909]])"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a + b"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[0.03268013],\n",
" [0.12587155],\n",
" [0.13919833]])"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a - b"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[0.36297067],\n",
" [0.49308861],\n",
" [0.03448065]])"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a * b"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[1.05573466],\n",
" [1.19603669],\n",
" [2.08152693]])"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a / b"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[7.61903208],\n",
" [7.76795317],\n",
" [7.26790371]])"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a + 7"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[0.],\n",
" [0.],\n",
" [0.]])"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"b // 2"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[False],\n",
" [False],\n",
" [False]])"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a % 2 == 0"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[2.47612831, 3.09516039],\n",
" [3.07181268, 3.83976585],\n",
" [1.07161484, 1.33951855]])"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a * [4, 5]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Broadcasting in Pandas"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>birthday</th>\n",
" <th>customer</th>\n",
" <th>orderdate</th>\n",
" <th>product</th>\n",
" <th>units</th>\n",
" <th>unitprice</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Ernesto</td>\n",
" <td>1974-01-07</td>\n",
" <td>Frontier Industries</td>\n",
" <td>2016-10-06 08:21:20.544568</td>\n",
" <td>Star Wars</td>\n",
" <td>27</td>\n",
" <td>11.81</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Queen</td>\n",
" <td>1986-02-05</td>\n",
" <td>Bell Telecom Limited</td>\n",
" <td>2016-09-30 08:21:20.544599</td>\n",
" <td>PlayStation</td>\n",
" <td>1</td>\n",
" <td>284.71</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Reid</td>\n",
" <td>1982-07-06</td>\n",
" <td>Software Co</td>\n",
" <td>2016-10-05 08:21:20.544622</td>\n",
" <td>banana</td>\n",
" <td>49</td>\n",
" <td>10.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Arlene</td>\n",
" <td>1971-04-12</td>\n",
" <td>Data Design Galaxy Co</td>\n",
" <td>2016-10-02 08:21:20.544643</td>\n",
" <td>Thriller record</td>\n",
" <td>48</td>\n",
" <td>16.77</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Nikita</td>\n",
" <td>1984-12-14</td>\n",
" <td>Frontier Inc</td>\n",
" <td>2016-10-16 08:21:20.544666</td>\n",
" <td>Harry Potter book</td>\n",
" <td>4</td>\n",
" <td>5.65</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" name birthday customer
orderdate \\\n",
"0 Ernesto 1974-01-07 Frontier Industries 2016-10-06 08:21:20.544568 \
n",
"1 Queen 1986-02-05 Bell Telecom Limited 2016-09-30 08:21:20.544599 \
n",
"2 Reid 1982-07-06 Software Co 2016-10-05 08:21:20.544622 \
n",
"3 Arlene 1971-04-12 Data Design Galaxy Co 2016-10-02 08:21:20.544643 \
n",
"4 Nikita 1984-12-14 Frontier Inc 2016-10-16 08:21:20.544666 \
n",
"\n",
" product units unitprice \n",
"0 Star Wars 27 11.81 \n",
"1 PlayStation 1 284.71 \n",
"2 banana 49 10.00 \n",
"3 Thriller record 48 16.77 \n",
"4 Harry Potter book 4 5.65 "
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sales_data = pd.read_json('./data/blooth_sales_data.json',\n",
" convert_dates=['birthday', 'orderdate']\n",
" )\n",
"sales_data.head(5)"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>birthday</th>\n",
" <th>customer</th>\n",
" <th>orderdate</th>\n",
" <th>product</th>\n",
" <th>units</th>\n",
" <th>unitprice</th>\n",
" <th>turnover</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Ernesto</td>\n",
" <td>1974-01-07</td>\n",
" <td>Frontier Industries</td>\n",
" <td>2016-10-06 08:21:20.544568</td>\n",
" <td>Star Wars</td>\n",
" <td>27</td>\n",
" <td>11.81</td>\n",
" <td>318.87</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Queen</td>\n",
" <td>1986-02-05</td>\n",
" <td>Bell Telecom Limited</td>\n",
" <td>2016-09-30 08:21:20.544599</td>\n",
" <td>PlayStation</td>\n",
" <td>1</td>\n",
" <td>284.71</td>\n",
" <td>284.71</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Reid</td>\n",
" <td>1982-07-06</td>\n",
" <td>Software Co</td>\n",
" <td>2016-10-05 08:21:20.544622</td>\n",
" <td>banana</td>\n",
" <td>49</td>\n",
" <td>10.00</td>\n",
" <td>490.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Arlene</td>\n",
" <td>1971-04-12</td>\n",
" <td>Data Design Galaxy Co</td>\n",
" <td>2016-10-02 08:21:20.544643</td>\n",
" <td>Thriller record</td>\n",
" <td>48</td>\n",
" <td>16.77</td>\n",
" <td>804.96</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Nikita</td>\n",
" <td>1984-12-14</td>\n",
" <td>Frontier Inc</td>\n",
" <td>2016-10-16 08:21:20.544666</td>\n",
" <td>Harry Potter book</td>\n",
" <td>4</td>\n",
" <td>5.65</td>\n",
" <td>22.60</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" name birthday customer
orderdate \\\n",
"0 Ernesto 1974-01-07 Frontier Industries 2016-10-06 08:21:20.544568 \
n",
"1 Queen 1986-02-05 Bell Telecom Limited 2016-09-30 08:21:20.544599 \
n",
"2 Reid 1982-07-06 Software Co 2016-10-05 08:21:20.544622 \
n",
"3 Arlene 1971-04-12 Data Design Galaxy Co 2016-10-02 08:21:20.544643 \
n",
"4 Nikita 1984-12-14 Frontier Inc 2016-10-16 08:21:20.544666 \
n",
"\n",
" product units unitprice turnover \n",
"0 Star Wars 27 11.81 318.87 \n",
"1 PlayStation 1 284.71 284.71 \n",
"2 banana 49 10.00 490.00 \n",
"3 Thriller record 48 16.77 804.96 \n",
"4 Harry Potter book 4 5.65 22.60 "
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sales_data['turnover'] = sales_data['unitprice'] * sales_data['units']\n",
"sales_data.head(5)"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"59915.64764605545"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sales_data['turnover'].mean()"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
"class A:\n",
" \n",
" def __init__(self, a=0):\n",
" self.a = a\n",
" \n",
" def __add__(self, o):\n",
" return self.a + o.a\n",
" \n",
" \n"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [],
"source": [
"a = A(10)\n",
"b = A(20)"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"30"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a + b"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [

"/var/folders/16/xdrp9d8s5510rt6fwr4j6llr0000gn/T/ipykernel_42585/590773150.py:1:
FutureWarning: DataFrame.mean and DataFrame.median with numeric_only=None will
include datetime64 and datetime64tz columns in a future version.\n",
" sales_data.mean()\n",

"/var/folders/16/xdrp9d8s5510rt6fwr4j6llr0000gn/T/ipykernel_42585/590773150.py:1:
FutureWarning: Dropping of nuisance columns in DataFrame reductions (with
'numeric_only=None') is deprecated; in a future version this will raise TypeError.
Select only valid columns before calling the reduction.\n",
" sales_data.mean()\n"
]
},
{
"data": {
"text/plain": [
"units 25.808102\n",
"unitprice 2329.681343\n",
"turnover 59915.647646\n",
"dtype: float64"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sales_data.mean()"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"140502193.73000002"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sales_data['turnover'].sum()"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"532.14"
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sales_data['turnover'].median()"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 2345 entries, 0 to 2344\n",
"Data columns (total 8 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 name 2345 non-null object \n",
" 1 birthday 2345 non-null datetime64[ns]\n",
" 2 customer 2345 non-null object \n",
" 3 orderdate 2345 non-null datetime64[ns]\n",
" 4 product 2345 non-null object \n",
" 5 units 2345 non-null int64 \n",
" 6 unitprice 2345 non-null float64 \n",
" 7 turnover 2345 non-null float64 \n",
"dtypes: datetime64[ns](2), float64(2), int64(1), object(3)\n",
"memory usage: 146.7+ KB\n"
]
}
],
"source": [
"sales_data.info()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Excercise"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Calculate the mean without using .mean"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [],
"source": [
"# your code here\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Find the oldest customer (bithday)"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [],
"source": [
"# your code here\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Functions with **`.map(), .apply() .applymap()`**"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>birthday</th>\n",
" <th>customer</th>\n",
" <th>orderdate</th>\n",
" <th>product</th>\n",
" <th>units</th>\n",
" <th>unitprice</th>\n",
" <th>turnover</th>\n",
" <th>year</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Ernesto</td>\n",
" <td>1974-01-07</td>\n",
" <td>Frontier Industries</td>\n",
" <td>2016-10-06 08:21:20.544568</td>\n",
" <td>Star Wars</td>\n",
" <td>27</td>\n",
" <td>11.81</td>\n",
" <td>318.87</td>\n",
" <td>2016</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Queen</td>\n",
" <td>1986-02-05</td>\n",
" <td>Bell Telecom Limited</td>\n",
" <td>2016-09-30 08:21:20.544599</td>\n",
" <td>PlayStation</td>\n",
" <td>1</td>\n",
" <td>284.71</td>\n",
" <td>284.71</td>\n",
" <td>2016</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Reid</td>\n",
" <td>1982-07-06</td>\n",
" <td>Software Co</td>\n",
" <td>2016-10-05 08:21:20.544622</td>\n",
" <td>banana</td>\n",
" <td>49</td>\n",
" <td>10.00</td>\n",
" <td>490.00</td>\n",
" <td>2016</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Arlene</td>\n",
" <td>1971-04-12</td>\n",
" <td>Data Design Galaxy Co</td>\n",
" <td>2016-10-02 08:21:20.544643</td>\n",
" <td>Thriller record</td>\n",
" <td>48</td>\n",
" <td>16.77</td>\n",
" <td>804.96</td>\n",
" <td>2016</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Nikita</td>\n",
" <td>1984-12-14</td>\n",
" <td>Frontier Inc</td>\n",
" <td>2016-10-16 08:21:20.544666</td>\n",
" <td>Harry Potter book</td>\n",
" <td>4</td>\n",
" <td>5.65</td>\n",
" <td>22.60</td>\n",
" <td>2016</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" name birthday customer
orderdate \\\n",
"0 Ernesto 1974-01-07 Frontier Industries 2016-10-06 08:21:20.544568 \
n",
"1 Queen 1986-02-05 Bell Telecom Limited 2016-09-30 08:21:20.544599 \
n",
"2 Reid 1982-07-06 Software Co 2016-10-05 08:21:20.544622 \
n",
"3 Arlene 1971-04-12 Data Design Galaxy Co 2016-10-02 08:21:20.544643 \
n",
"4 Nikita 1984-12-14 Frontier Inc 2016-10-16 08:21:20.544666 \
n",
"\n",
" product units unitprice turnover year \n",
"0 Star Wars 27 11.81 318.87 2016 \n",
"1 PlayStation 1 284.71 284.71 2016 \n",
"2 banana 49 10.00 490.00 2016 \n",
"3 Thriller record 48 16.77 804.96 2016 \n",
"4 Harry Potter book 4 5.65 22.60 2016 "
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# map for Series\n",
"sales_data['year'] = sales_data['orderdate'].map(lambda x: x.year)\n",
"sales_data.head(5)"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>birthday</th>\n",
" <th>customer</th>\n",
" <th>orderdate</th>\n",
" <th>product</th>\n",
" <th>units</th>\n",
" <th>unitprice</th>\n",
" <th>turnover</th>\n",
" <th>year</th>\n",
" <th>month</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Ernesto</td>\n",
" <td>1974-01-07</td>\n",
" <td>Frontier Industries</td>\n",
" <td>2016-10-06 08:21:20.544568</td>\n",
" <td>Star Wars</td>\n",
" <td>27</td>\n",
" <td>11.81</td>\n",
" <td>318.87</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Queen</td>\n",
" <td>1986-02-05</td>\n",
" <td>Bell Telecom Limited</td>\n",
" <td>2016-09-30 08:21:20.544599</td>\n",
" <td>PlayStation</td>\n",
" <td>1</td>\n",
" <td>284.71</td>\n",
" <td>284.71</td>\n",
" <td>2016</td>\n",
" <td>9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Reid</td>\n",
" <td>1982-07-06</td>\n",
" <td>Software Co</td>\n",
" <td>2016-10-05 08:21:20.544622</td>\n",
" <td>banana</td>\n",
" <td>49</td>\n",
" <td>10.00</td>\n",
" <td>490.00</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Arlene</td>\n",
" <td>1971-04-12</td>\n",
" <td>Data Design Galaxy Co</td>\n",
" <td>2016-10-02 08:21:20.544643</td>\n",
" <td>Thriller record</td>\n",
" <td>48</td>\n",
" <td>16.77</td>\n",
" <td>804.96</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Nikita</td>\n",
" <td>1984-12-14</td>\n",
" <td>Frontier Inc</td>\n",
" <td>2016-10-16 08:21:20.544666</td>\n",
" <td>Harry Potter book</td>\n",
" <td>4</td>\n",
" <td>5.65</td>\n",
" <td>22.60</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" name birthday customer
orderdate \\\n",
"0 Ernesto 1974-01-07 Frontier Industries 2016-10-06 08:21:20.544568 \
n",
"1 Queen 1986-02-05 Bell Telecom Limited 2016-09-30 08:21:20.544599 \
n",
"2 Reid 1982-07-06 Software Co 2016-10-05 08:21:20.544622 \
n",
"3 Arlene 1971-04-12 Data Design Galaxy Co 2016-10-02 08:21:20.544643 \
n",
"4 Nikita 1984-12-14 Frontier Inc 2016-10-16 08:21:20.544666 \
n",
"\n",
" product units unitprice turnover year month \n",
"0 Star Wars 27 11.81 318.87 2016 10 \n",
"1 PlayStation 1 284.71 284.71 2016 9 \n",
"2 banana 49 10.00 490.00 2016 10 \n",
"3 Thriller record 48 16.77 804.96 2016 10 \n",
"4 Harry Potter book 4 5.65 22.60 2016 10 "
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# map for Series\n",
"sales_data['month'] = sales_data['orderdate'].map(lambda x: x.month)\n",
"sales_data.head(5)"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [],
"source": [
"# combine multiple columns\n",
"sales_data['year-month'] = sales_data['year'].map(\n",
" str) + sales_data['month'].map(lambda x: \"-{:02d}\".format(x))"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>birthday</th>\n",
" <th>customer</th>\n",
" <th>orderdate</th>\n",
" <th>product</th>\n",
" <th>units</th>\n",
" <th>unitprice</th>\n",
" <th>turnover</th>\n",
" <th>year</th>\n",
" <th>month</th>\n",
" <th>year-month</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Ernesto</td>\n",
" <td>1974-01-07</td>\n",
" <td>Frontier Industries</td>\n",
" <td>2016-10-06 08:21:20.544568</td>\n",
" <td>Star Wars</td>\n",
" <td>27</td>\n",
" <td>11.81</td>\n",
" <td>318.87</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" <td>2016-10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Queen</td>\n",
" <td>1986-02-05</td>\n",
" <td>Bell Telecom Limited</td>\n",
" <td>2016-09-30 08:21:20.544599</td>\n",
" <td>PlayStation</td>\n",
" <td>1</td>\n",
" <td>284.71</td>\n",
" <td>284.71</td>\n",
" <td>2016</td>\n",
" <td>9</td>\n",
" <td>2016-09</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Reid</td>\n",
" <td>1982-07-06</td>\n",
" <td>Software Co</td>\n",
" <td>2016-10-05 08:21:20.544622</td>\n",
" <td>banana</td>\n",
" <td>49</td>\n",
" <td>10.00</td>\n",
" <td>490.00</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" <td>2016-10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Arlene</td>\n",
" <td>1971-04-12</td>\n",
" <td>Data Design Galaxy Co</td>\n",
" <td>2016-10-02 08:21:20.544643</td>\n",
" <td>Thriller record</td>\n",
" <td>48</td>\n",
" <td>16.77</td>\n",
" <td>804.96</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" <td>2016-10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Nikita</td>\n",
" <td>1984-12-14</td>\n",
" <td>Frontier Inc</td>\n",
" <td>2016-10-16 08:21:20.544666</td>\n",
" <td>Harry Potter book</td>\n",
" <td>4</td>\n",
" <td>5.65</td>\n",
" <td>22.60</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" <td>2016-10</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" name birthday customer
orderdate \\\n",
"0 Ernesto 1974-01-07 Frontier Industries 2016-10-06 08:21:20.544568 \
n",
"1 Queen 1986-02-05 Bell Telecom Limited 2016-09-30 08:21:20.544599 \
n",
"2 Reid 1982-07-06 Software Co 2016-10-05 08:21:20.544622 \
n",
"3 Arlene 1971-04-12 Data Design Galaxy Co 2016-10-02 08:21:20.544643 \
n",
"4 Nikita 1984-12-14 Frontier Inc 2016-10-16 08:21:20.544666 \
n",
"\n",
" product units unitprice turnover year month year-month \
n",
"0 Star Wars 27 11.81 318.87 2016 10 2016-10 \
n",
"1 PlayStation 1 284.71 284.71 2016 9 2016-09 \
n",
"2 banana 49 10.00 490.00 2016 10 2016-10 \
n",
"3 Thriller record 48 16.77 804.96 2016 10 2016-10 \
n",
"4 Harry Potter book 4 5.65 22.60 2016 10 2016-10 "
]
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sales_data.head(5)"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>unitprice</th>\n",
" <th>units</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>11.81</td>\n",
" <td>27</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>284.71</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>10.00</td>\n",
" <td>49</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>16.77</td>\n",
" <td>48</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5.65</td>\n",
" <td>4</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" unitprice units\n",
"0 11.81 27\n",
"1 284.71 1\n",
"2 10.00 49\n",
"3 16.77 48\n",
"4 5.65 4"
]
},
"execution_count": 42,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sales_data[['unitprice', 'units']].head()"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>unitprice</th>\n",
" <th>units</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>47.24</td>\n",
" <td>108</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1138.84</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>40.00</td>\n",
" <td>196</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>67.08</td>\n",
" <td>192</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>22.60</td>\n",
" <td>16</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" unitprice units\n",
"0 47.24 108\n",
"1 1138.84 4\n",
"2 40.00 196\n",
"3 67.08 192\n",
"4 22.60 16"
]
},
"execution_count": 43,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# applymap for df\n",
"sales_data[['unitprice', 'units']].applymap(lambda x: np.round(4 * x,
2)).head(5)"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>unitprice</th>\n",
" <th>units</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>47.24</td>\n",
" <td>108</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1138.84</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>40.00</td>\n",
" <td>196</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>67.08</td>\n",
" <td>192</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>22.60</td>\n",
" <td>16</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" unitprice units\n",
"0 47.24 108\n",
"1 1138.84 4\n",
"2 40.00 196\n",
"3 67.08 192\n",
"4 22.60 16"
]
},
"execution_count": 44,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sales_data[['unitprice', 'units']].apply(lambda x: np.round(4 * x,
2)).head(5)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Differences of **`.map(), .apply() .applymap()`**"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
" * **`.map()`**\n",
" >iterate over each element of a DataFrame"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 11\n",
"1 284\n",
"2 10\n",
"3 16\n",
"4 5\n",
"Name: unitprice, dtype: int64"
]
},
"execution_count": 45,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sales_data['unitprice'].map(int)[:5]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
" * **`.apply()`**\n",
" > apply a function on an axis of the DataFrame."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"e.g. sum up two cells (hier: unitprice + units)"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 38.81\n",
"1 285.71\n",
"2 59.00\n",
"3 64.77\n",
"4 9.65\n",
"dtype: float64"
]
},
"execution_count": 46,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sales_data[['unitprice', 'units']].apply(sum, axis=1)[:5] # default axis=0"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
" * **`.applymap()`** \n",
" > apply a function on each element of the DataFrame"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>unitprice</th>\n",
" <th>units</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>11</td>\n",
" <td>27</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>284</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>10</td>\n",
" <td>49</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>16</td>\n",
" <td>48</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>4</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" unitprice units\n",
"0 11 27\n",
"1 284 1\n",
"2 10 49\n",
"3 16 48\n",
"4 5 4"
]
},
"execution_count": 47,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sales_data[['unitprice', 'units']].applymap(int)[:5]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Remember: a copy of the DF is returned. The DF is not altered."
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>birthday</th>\n",
" <th>customer</th>\n",
" <th>orderdate</th>\n",
" <th>product</th>\n",
" <th>units</th>\n",
" <th>unitprice</th>\n",
" <th>turnover</th>\n",
" <th>year</th>\n",
" <th>month</th>\n",
" <th>year-month</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Ernesto</td>\n",
" <td>1974-01-07</td>\n",
" <td>Frontier Industries</td>\n",
" <td>2016-10-06 08:21:20.544568</td>\n",
" <td>Star Wars</td>\n",
" <td>27</td>\n",
" <td>11.81</td>\n",
" <td>318.87</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" <td>2016-10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Queen</td>\n",
" <td>1986-02-05</td>\n",
" <td>Bell Telecom Limited</td>\n",
" <td>2016-09-30 08:21:20.544599</td>\n",
" <td>PlayStation</td>\n",
" <td>1</td>\n",
" <td>284.71</td>\n",
" <td>284.71</td>\n",
" <td>2016</td>\n",
" <td>9</td>\n",
" <td>2016-09</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Reid</td>\n",
" <td>1982-07-06</td>\n",
" <td>Software Co</td>\n",
" <td>2016-10-05 08:21:20.544622</td>\n",
" <td>banana</td>\n",
" <td>49</td>\n",
" <td>10.00</td>\n",
" <td>490.00</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" <td>2016-10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Arlene</td>\n",
" <td>1971-04-12</td>\n",
" <td>Data Design Galaxy Co</td>\n",
" <td>2016-10-02 08:21:20.544643</td>\n",
" <td>Thriller record</td>\n",
" <td>48</td>\n",
" <td>16.77</td>\n",
" <td>804.96</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" <td>2016-10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Nikita</td>\n",
" <td>1984-12-14</td>\n",
" <td>Frontier Inc</td>\n",
" <td>2016-10-16 08:21:20.544666</td>\n",
" <td>Harry Potter book</td>\n",
" <td>4</td>\n",
" <td>5.65</td>\n",
" <td>22.60</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" <td>2016-10</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" name birthday customer
orderdate \\\n",
"0 Ernesto 1974-01-07 Frontier Industries 2016-10-06 08:21:20.544568 \
n",
"1 Queen 1986-02-05 Bell Telecom Limited 2016-09-30 08:21:20.544599 \
n",
"2 Reid 1982-07-06 Software Co 2016-10-05 08:21:20.544622 \
n",
"3 Arlene 1971-04-12 Data Design Galaxy Co 2016-10-02 08:21:20.544643 \
n",
"4 Nikita 1984-12-14 Frontier Inc 2016-10-16 08:21:20.544666 \
n",
"\n",
" product units unitprice turnover year month year-month \
n",
"0 Star Wars 27 11.81 318.87 2016 10 2016-10 \
n",
"1 PlayStation 1 284.71 284.71 2016 9 2016-09 \
n",
"2 banana 49 10.00 490.00 2016 10 2016-10 \
n",
"3 Thriller record 48 16.77 804.96 2016 10 2016-10 \
n",
"4 Harry Potter book 4 5.65 22.60 2016 10 2016-10 "
]
},
"execution_count": 48,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"sales_data.head(5)"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [],
"source": [
"sales_data['unitprice'] = sales_data['unitprice'].map(lambda x: x * 2)"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>units</th>\n",
" <th>unitprice</th>\n",
" <th>turnover</th>\n",
" <th>year</th>\n",
" <th>month</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>2345.000000</td>\n",
" <td>2345.000000</td>\n",
" <td>2.345000e+03</td>\n",
" <td>2345.0</td>\n",
" <td>2345.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>25.808102</td>\n",
" <td>4659.362687</td>\n",
" <td>5.991565e+04</td>\n",
" <td>2016.0</td>\n",
" <td>9.812793</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>14.474110</td>\n",
" <td>13164.655228</td>\n",
" <td>1.969636e+05</td>\n",
" <td>0.0</td>\n",
" <td>0.390161</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>1.000000</td>\n",
" <td>10.020000</td>\n",
" <td>5.080000e+00</td>\n",
" <td>2016.0</td>\n",
" <td>9.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>13.000000</td>\n",
" <td>21.320000</td>\n",
" <td>2.601000e+02</td>\n",
" <td>2016.0</td>\n",
" <td>10.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>26.000000</td>\n",
" <td>35.700000</td>\n",
" <td>5.321400e+02</td>\n",
" <td>2016.0</td>\n",
" <td>10.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>39.000000</td>\n",
" <td>979.880000</td>\n",
" <td>1.016450e+04</td>\n",
" <td>2016.0</td>\n",
" <td>10.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>50.000000</td>\n",
" <td>49791.260000</td>\n",
" <td>1.222270e+06</td>\n",
" <td>2016.0</td>\n",
" <td>10.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" units unitprice turnover year month\n",
"count 2345.000000 2345.000000 2.345000e+03 2345.0 2345.000000\n",
"mean 25.808102 4659.362687 5.991565e+04 2016.0 9.812793\n",
"std 14.474110 13164.655228 1.969636e+05 0.0 0.390161\n",
"min 1.000000 10.020000 5.080000e+00 2016.0 9.000000\n",
"25% 13.000000 21.320000 2.601000e+02 2016.0 10.000000\n",
"50% 26.000000 35.700000 5.321400e+02 2016.0 10.000000\n",
"75% 39.000000 979.880000 1.016450e+04 2016.0 10.000000\n",
"max 50.000000 49791.260000 1.222270e+06 2016.0 10.000000"
]
},
"execution_count": 50,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sales_data.describe()"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/var/folders/16/xdrp9d8s5510rt6fwr4j6llr0000gn/T/ipykernel_42585/852887285.py:1:
FutureWarning: Treating datetime data as categorical rather than numeric in
`.describe` is deprecated and will be removed in a future version of pandas.
Specify `datetime_is_numeric=True` to silence this warning and adopt the future
behavior now.\n",
" sales_data['birthday'].describe()\n"
]
},
{
"data": {
"text/plain": [
"count 2345\n",
"unique 296\n",
"top 1998-04-20 00:00:00\n",
"freq 19\n",
"first 1952-02-07 00:00:00\n",
"last 1999-05-01 00:00:00\n",
"Name: birthday, dtype: object"
]
},
"execution_count": 51,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sales_data['birthday'].describe()"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>birthday</th>\n",
" <th>customer</th>\n",
" <th>orderdate</th>\n",
" <th>product</th>\n",
" <th>units</th>\n",
" <th>unitprice</th>\n",
" <th>turnover</th>\n",
" <th>year</th>\n",
" <th>month</th>\n",
" <th>year-month</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Ernesto</td>\n",
" <td>1974-01-07</td>\n",
" <td>Frontier Industries</td>\n",
" <td>2016-10-06 08:21:20.544568</td>\n",
" <td>Star Wars</td>\n",
" <td>27</td>\n",
" <td>23.62</td>\n",
" <td>318</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" <td>2016-10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Queen</td>\n",
" <td>1986-02-05</td>\n",
" <td>Bell Telecom Limited</td>\n",
" <td>2016-09-30 08:21:20.544599</td>\n",
" <td>PlayStation</td>\n",
" <td>1</td>\n",
" <td>569.42</td>\n",
" <td>284</td>\n",
" <td>2016</td>\n",
" <td>9</td>\n",
" <td>2016-09</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Reid</td>\n",
" <td>1982-07-06</td>\n",
" <td>Software Co</td>\n",
" <td>2016-10-05 08:21:20.544622</td>\n",
" <td>banana</td>\n",
" <td>49</td>\n",
" <td>20.00</td>\n",
" <td>490</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" <td>2016-10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Arlene</td>\n",
" <td>1971-04-12</td>\n",
" <td>Data Design Galaxy Co</td>\n",
" <td>2016-10-02 08:21:20.544643</td>\n",
" <td>Thriller record</td>\n",
" <td>48</td>\n",
" <td>33.54</td>\n",
" <td>804</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" <td>2016-10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Nikita</td>\n",
" <td>1984-12-14</td>\n",
" <td>Frontier Inc</td>\n",
" <td>2016-10-16 08:21:20.544666</td>\n",
" <td>Harry Potter book</td>\n",
" <td>4</td>\n",
" <td>11.30</td>\n",
" <td>22</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" <td>2016-10</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" name birthday customer
orderdate \\\n",
"0 Ernesto 1974-01-07 Frontier Industries 2016-10-06 08:21:20.544568 \
n",
"1 Queen 1986-02-05 Bell Telecom Limited 2016-09-30 08:21:20.544599 \
n",
"2 Reid 1982-07-06 Software Co 2016-10-05 08:21:20.544622 \
n",
"3 Arlene 1971-04-12 Data Design Galaxy Co 2016-10-02 08:21:20.544643 \
n",
"4 Nikita 1984-12-14 Frontier Inc 2016-10-16 08:21:20.544666 \
n",
"\n",
" product units unitprice turnover year month year-month \
n",
"0 Star Wars 27 23.62 318 2016 10 2016-10 \
n",
"1 PlayStation 1 569.42 284 2016 9 2016-09 \
n",
"2 banana 49 20.00 490 2016 10 2016-10 \
n",
"3 Thriller record 48 33.54 804 2016 10 2016-10 \
n",
"4 Harry Potter book 4 11.30 22 2016 10 2016-10 "
]
},
"execution_count": 53,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sales_data['turnover'] = sales_data['turnover'].astype(np.int32)\n",
"sales_data.head(5)"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>birthday</th>\n",
" <th>customer</th>\n",
" <th>orderdate</th>\n",
" <th>product</th>\n",
" <th>units</th>\n",
" <th>unitprice</th>\n",
" <th>turnover</th>\n",
" <th>year</th>\n",
" <th>month</th>\n",
" <th>year-month</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: [name, birthday, customer, orderdate, product, units, unitprice,
turnover, year, month, year-month]\n",
"Index: []"
]
},
"execution_count": 55,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sales_data[sales_data['unitprice'].isnull()]"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>birthday</th>\n",
" <th>customer</th>\n",
" <th>orderdate</th>\n",
" <th>product</th>\n",
" <th>units</th>\n",
" <th>unitprice</th>\n",
" <th>turnover</th>\n",
" <th>year</th>\n",
" <th>month</th>\n",
" <th>year-month</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Ernesto</td>\n",
" <td>1974-01-07</td>\n",
" <td>Frontier Industries</td>\n",
" <td>2016-10-06 08:21:20.544568</td>\n",
" <td>Star Wars</td>\n",
" <td>27</td>\n",
" <td>23.62</td>\n",
" <td>318</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" <td>2016-10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Queen</td>\n",
" <td>1986-02-05</td>\n",
" <td>Bell Telecom Limited</td>\n",
" <td>2016-09-30 08:21:20.544599</td>\n",
" <td>PlayStation</td>\n",
" <td>1</td>\n",
" <td>569.42</td>\n",
" <td>284</td>\n",
" <td>2016</td>\n",
" <td>9</td>\n",
" <td>2016-09</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Reid</td>\n",
" <td>1982-07-06</td>\n",
" <td>Software Co</td>\n",
" <td>2016-10-05 08:21:20.544622</td>\n",
" <td>banana</td>\n",
" <td>49</td>\n",
" <td>20.00</td>\n",
" <td>490</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" <td>2016-10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Arlene</td>\n",
" <td>1971-04-12</td>\n",
" <td>Data Design Galaxy Co</td>\n",
" <td>2016-10-02 08:21:20.544643</td>\n",
" <td>Thriller record</td>\n",
" <td>48</td>\n",
" <td>33.54</td>\n",
" <td>804</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" <td>2016-10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Nikita</td>\n",
" <td>1984-12-14</td>\n",
" <td>Frontier Inc</td>\n",
" <td>2016-10-16 08:21:20.544666</td>\n",
" <td>Harry Potter book</td>\n",
" <td>4</td>\n",
" <td>11.30</td>\n",
" <td>22</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" <td>2016-10</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" name birthday customer
orderdate \\\n",
"0 Ernesto 1974-01-07 Frontier Industries 2016-10-06 08:21:20.544568 \
n",
"1 Queen 1986-02-05 Bell Telecom Limited 2016-09-30 08:21:20.544599 \
n",
"2 Reid 1982-07-06 Software Co 2016-10-05 08:21:20.544622 \
n",
"3 Arlene 1971-04-12 Data Design Galaxy Co 2016-10-02 08:21:20.544643 \
n",
"4 Nikita 1984-12-14 Frontier Inc 2016-10-16 08:21:20.544666 \
n",
"\n",
" product units unitprice turnover year month year-month \
n",
"0 Star Wars 27 23.62 318 2016 10 2016-10 \
n",
"1 PlayStation 1 569.42 284 2016 9 2016-09 \
n",
"2 banana 49 20.00 490 2016 10 2016-10 \
n",
"3 Thriller record 48 33.54 804 2016 10 2016-10 \
n",
"4 Harry Potter book 4 11.30 22 2016 10 2016-10 "
]
},
"execution_count": 56,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sales_data.dropna().head(5)"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>birthday</th>\n",
" <th>customer</th>\n",
" <th>orderdate</th>\n",
" <th>product</th>\n",
" <th>units</th>\n",
" <th>unitprice</th>\n",
" <th>turnover</th>\n",
" <th>year</th>\n",
" <th>month</th>\n",
" <th>year-month</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Ernesto</td>\n",
" <td>1974-01-07</td>\n",
" <td>Frontier Industries</td>\n",
" <td>2016-10-06 08:21:20.544568</td>\n",
" <td>Star Wars</td>\n",
" <td>27</td>\n",
" <td>23.62</td>\n",
" <td>318</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" <td>2016-10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Queen</td>\n",
" <td>1986-02-05</td>\n",
" <td>Bell Telecom Limited</td>\n",
" <td>2016-09-30 08:21:20.544599</td>\n",
" <td>PlayStation</td>\n",
" <td>1</td>\n",
" <td>569.42</td>\n",
" <td>284</td>\n",
" <td>2016</td>\n",
" <td>9</td>\n",
" <td>2016-09</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Reid</td>\n",
" <td>1982-07-06</td>\n",
" <td>Software Co</td>\n",
" <td>2016-10-05 08:21:20.544622</td>\n",
" <td>banana</td>\n",
" <td>49</td>\n",
" <td>20.00</td>\n",
" <td>490</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" <td>2016-10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Arlene</td>\n",
" <td>1971-04-12</td>\n",
" <td>Data Design Galaxy Co</td>\n",
" <td>2016-10-02 08:21:20.544643</td>\n",
" <td>Thriller record</td>\n",
" <td>48</td>\n",
" <td>33.54</td>\n",
" <td>804</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" <td>2016-10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Nikita</td>\n",
" <td>1984-12-14</td>\n",
" <td>Frontier Inc</td>\n",
" <td>2016-10-16 08:21:20.544666</td>\n",
" <td>Harry Potter book</td>\n",
" <td>4</td>\n",
" <td>11.30</td>\n",
" <td>22</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" <td>2016-10</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" name birthday customer
orderdate \\\n",
"0 Ernesto 1974-01-07 Frontier Industries 2016-10-06 08:21:20.544568 \
n",
"1 Queen 1986-02-05 Bell Telecom Limited 2016-09-30 08:21:20.544599 \
n",
"2 Reid 1982-07-06 Software Co 2016-10-05 08:21:20.544622 \
n",
"3 Arlene 1971-04-12 Data Design Galaxy Co 2016-10-02 08:21:20.544643 \
n",
"4 Nikita 1984-12-14 Frontier Inc 2016-10-16 08:21:20.544666 \
n",
"\n",
" product units unitprice turnover year month year-month \
n",
"0 Star Wars 27 23.62 318 2016 10 2016-10 \
n",
"1 PlayStation 1 569.42 284 2016 9 2016-09 \
n",
"2 banana 49 20.00 490 2016 10 2016-10 \
n",
"3 Thriller record 48 33.54 804 2016 10 2016-10 \
n",
"4 Harry Potter book 4 11.30 22 2016 10 2016-10 "
]
},
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sales_data.head(5)"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>birthday</th>\n",
" <th>customer</th>\n",
" <th>orderdate</th>\n",
" <th>product</th>\n",
" <th>units</th>\n",
" <th>unitprice</th>\n",
" <th>turnover</th>\n",
" <th>year</th>\n",
" <th>month</th>\n",
" <th>year-month</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Ernesto</td>\n",
" <td>1974-01-07</td>\n",
" <td>Frontier Industries</td>\n",
" <td>2016-10-06 08:21:20.544568</td>\n",
" <td>Star Wars</td>\n",
" <td>27</td>\n",
" <td>23.62</td>\n",
" <td>318</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" <td>2016-10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Queen</td>\n",
" <td>1986-02-05</td>\n",
" <td>Bell Telecom Limited</td>\n",
" <td>2016-09-30 08:21:20.544599</td>\n",
" <td>PlayStation</td>\n",
" <td>1</td>\n",
" <td>569.42</td>\n",
" <td>284</td>\n",
" <td>2016</td>\n",
" <td>9</td>\n",
" <td>2016-09</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Reid</td>\n",
" <td>1982-07-06</td>\n",
" <td>Software Co</td>\n",
" <td>2016-10-05 08:21:20.544622</td>\n",
" <td>banana</td>\n",
" <td>49</td>\n",
" <td>20.00</td>\n",
" <td>490</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" <td>2016-10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Arlene</td>\n",
" <td>1971-04-12</td>\n",
" <td>Data Design Galaxy Co</td>\n",
" <td>2016-10-02 08:21:20.544643</td>\n",
" <td>Thriller record</td>\n",
" <td>48</td>\n",
" <td>33.54</td>\n",
" <td>804</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" <td>2016-10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Nikita</td>\n",
" <td>1984-12-14</td>\n",
" <td>Frontier Inc</td>\n",
" <td>2016-10-16 08:21:20.544666</td>\n",
" <td>Harry Potter book</td>\n",
" <td>4</td>\n",
" <td>11.30</td>\n",
" <td>22</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" <td>2016-10</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" name birthday customer
orderdate \\\n",
"0 Ernesto 1974-01-07 Frontier Industries 2016-10-06 08:21:20.544568 \
n",
"1 Queen 1986-02-05 Bell Telecom Limited 2016-09-30 08:21:20.544599 \
n",
"2 Reid 1982-07-06 Software Co 2016-10-05 08:21:20.544622 \
n",
"3 Arlene 1971-04-12 Data Design Galaxy Co 2016-10-02 08:21:20.544643 \
n",
"4 Nikita 1984-12-14 Frontier Inc 2016-10-16 08:21:20.544666 \
n",
"\n",
" product units unitprice turnover year month year-month \
n",
"0 Star Wars 27 23.62 318 2016 10 2016-10 \
n",
"1 PlayStation 1 569.42 284 2016 9 2016-09 \
n",
"2 banana 49 20.00 490 2016 10 2016-10 \
n",
"3 Thriller record 48 33.54 804 2016 10 2016-10 \
n",
"4 Harry Potter book 4 11.30 22 2016 10 2016-10 "
]
},
"execution_count": 58,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sales_data.fillna(99.99).head(5)"
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>birthday</th>\n",
" <th>customer</th>\n",
" <th>orderdate</th>\n",
" <th>product</th>\n",
" <th>units</th>\n",
" <th>unitprice</th>\n",
" <th>turnover</th>\n",
" <th>year</th>\n",
" <th>month</th>\n",
" <th>year-month</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Ernesto</td>\n",
" <td>1974-01-07</td>\n",
" <td>Frontier Industries</td>\n",
" <td>2016-10-06 08:21:20.544568</td>\n",
" <td>Star Wars</td>\n",
" <td>27</td>\n",
" <td>23.62</td>\n",
" <td>318</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" <td>2016-10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Queen</td>\n",
" <td>1986-02-05</td>\n",
" <td>Bell Telecom Limited</td>\n",
" <td>2016-09-30 08:21:20.544599</td>\n",
" <td>PlayStation</td>\n",
" <td>1</td>\n",
" <td>569.42</td>\n",
" <td>284</td>\n",
" <td>2016</td>\n",
" <td>9</td>\n",
" <td>2016-09</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Reid</td>\n",
" <td>1982-07-06</td>\n",
" <td>Software Co</td>\n",
" <td>2016-10-05 08:21:20.544622</td>\n",
" <td>banana</td>\n",
" <td>49</td>\n",
" <td>20.00</td>\n",
" <td>490</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" <td>2016-10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Arlene</td>\n",
" <td>1971-04-12</td>\n",
" <td>Data Design Galaxy Co</td>\n",
" <td>2016-10-02 08:21:20.544643</td>\n",
" <td>Thriller record</td>\n",
" <td>48</td>\n",
" <td>33.54</td>\n",
" <td>804</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" <td>2016-10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Nikita</td>\n",
" <td>1984-12-14</td>\n",
" <td>Frontier Inc</td>\n",
" <td>2016-10-16 08:21:20.544666</td>\n",
" <td>Harry Potter book</td>\n",
" <td>4</td>\n",
" <td>11.30</td>\n",
" <td>22</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" <td>2016-10</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" name birthday customer
orderdate \\\n",
"0 Ernesto 1974-01-07 Frontier Industries 2016-10-06 08:21:20.544568 \
n",
"1 Queen 1986-02-05 Bell Telecom Limited 2016-09-30 08:21:20.544599 \
n",
"2 Reid 1982-07-06 Software Co 2016-10-05 08:21:20.544622 \
n",
"3 Arlene 1971-04-12 Data Design Galaxy Co 2016-10-02 08:21:20.544643 \
n",
"4 Nikita 1984-12-14 Frontier Inc 2016-10-16 08:21:20.544666 \
n",
"\n",
" product units unitprice turnover year month year-month \
n",
"0 Star Wars 27 23.62 318 2016 10 2016-10 \
n",
"1 PlayStation 1 569.42 284 2016 9 2016-09 \
n",
"2 banana 49 20.00 490 2016 10 2016-10 \
n",
"3 Thriller record 48 33.54 804 2016 10 2016-10 \
n",
"4 Harry Potter book 4 11.30 22 2016 10 2016-10 "
]
},
"execution_count": 59,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sales_data.head()"
]
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>birthday</th>\n",
" <th>customer</th>\n",
" <th>orderdate</th>\n",
" <th>product</th>\n",
" <th>units</th>\n",
" <th>unitprice</th>\n",
" <th>turnover</th>\n",
" <th>year</th>\n",
" <th>month</th>\n",
" <th>year-month</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Ernesto</td>\n",
" <td>1974-01-07</td>\n",
" <td>Frontier Industries</td>\n",
" <td>2016-10-06 08:21:20.544568</td>\n",
" <td>Star Wars</td>\n",
" <td>27</td>\n",
" <td>23.62</td>\n",
" <td>318</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" <td>2016-10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Queen</td>\n",
" <td>1986-02-05</td>\n",
" <td>Bell Telecom Limited</td>\n",
" <td>2016-09-30 08:21:20.544599</td>\n",
" <td>PlayStation</td>\n",
" <td>1</td>\n",
" <td>569.42</td>\n",
" <td>284</td>\n",
" <td>2016</td>\n",
" <td>9</td>\n",
" <td>2016-09</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Reid</td>\n",
" <td>1982-07-06</td>\n",
" <td>Software Co</td>\n",
" <td>2016-10-05 08:21:20.544622</td>\n",
" <td>banana</td>\n",
" <td>49</td>\n",
" <td>20.00</td>\n",
" <td>490</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" <td>2016-10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Arlene</td>\n",
" <td>1971-04-12</td>\n",
" <td>Data Design Galaxy Co</td>\n",
" <td>2016-10-02 08:21:20.544643</td>\n",
" <td>Thriller record</td>\n",
" <td>48</td>\n",
" <td>33.54</td>\n",
" <td>804</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" <td>2016-10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Nikita</td>\n",
" <td>1984-12-14</td>\n",
" <td>Frontier Inc</td>\n",
" <td>2016-10-16 08:21:20.544666</td>\n",
" <td>Harry Potter book</td>\n",
" <td>4</td>\n",
" <td>11.30</td>\n",
" <td>22</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" <td>2016-10</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" name birthday customer
orderdate \\\n",
"0 Ernesto 1974-01-07 Frontier Industries 2016-10-06 08:21:20.544568 \
n",
"1 Queen 1986-02-05 Bell Telecom Limited 2016-09-30 08:21:20.544599 \
n",
"2 Reid 1982-07-06 Software Co 2016-10-05 08:21:20.544622 \
n",
"3 Arlene 1971-04-12 Data Design Galaxy Co 2016-10-02 08:21:20.544643 \
n",
"4 Nikita 1984-12-14 Frontier Inc 2016-10-16 08:21:20.544666 \
n",
"\n",
" product units unitprice turnover year month year-month \
n",
"0 Star Wars 27 23.62 318 2016 10 2016-10 \
n",
"1 PlayStation 1 569.42 284 2016 9 2016-09 \
n",
"2 banana 49 20.00 490 2016 10 2016-10 \
n",
"3 Thriller record 48 33.54 804 2016 10 2016-10 \
n",
"4 Harry Potter book 4 11.30 22 2016 10 2016-10 "
]
},
"execution_count": 60,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sales_data.fillna(99.99, inplace=True)\n",
"sales_data.head(5)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
},
"varInspector": {
"cols": {
"lenName": 16,
"lenType": 16,
"lenVar": 40
},
"kernels_config": {
"python": {
"delete_cmd_postfix": "",
"delete_cmd_prefix": "del ",
"library": "var_list.py",
"varRefreshCmd": "print(var_dic_list())"
},
"r": {
"delete_cmd_postfix": ") ",
"delete_cmd_prefix": "rm(",
"library": "var_list.r",
"varRefreshCmd": "cat(var_dic_list()) "
}
},
"types_to_exclude": [
"module",
"function",
"builtin_function_or_method",
"instance",
"_Feature"
],
"window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 4
}

You might also like