Professional Documents
Culture Documents
Import As Import As From Import
Import As Import As From Import
Out[54]:
Unnamed:
match_event_id location_x location_y remaining_min power_of_shot knockout_match game_season remaining_sec distance_of_sho
0
localhost:8888/notebooks/Untitled19.ipynb# 1/23
12/8/2019 Untitled19
In [55]: pd.set_option('display.max_columns', None) #this function helps us view all the columns available
df=df.rename(columns={ df.columns[0]: "Index" })
df.head()
Out[55]:
Index match_event_id location_x location_y remaining_min power_of_shot knockout_match game_season remaining_sec distance_of_shot is
In [57]: pd.value_counts(df.is_goal) #counting the number of values of each unique type in is_goal
localhost:8888/notebooks/Untitled19.ipynb# 2/23
12/8/2019 Untitled19
localhost:8888/notebooks/Untitled19.ipynb# 3/23
12/8/2019 Untitled19
Out[62]: 0 3
1 5
2 4
3 2
4 1
5 5
6 1
7 1
8 5
9 1
10 4
11 2
12 5
13 5
14 1
15 1
16 1
17 4
18 4
19 1
20 1
21 2
22 1
23 4
24 1
25 1
26 1
27 4
28 2
29 1
..
30667 1
30668 1
30669 1
30670 1
30671 1
localhost:8888/notebooks/Untitled19.ipynb# 4/23
12/8/2019 Untitled19
30672 3
30673 3
30674 2
30675 4
30676 5
30677 4
30678 1
30679 1
30680 1
30681 1
30682 5
30683 1
30684 5
30685 1
30686 1
30687 1
30688 5
30689 6
30690 5
30691 1
30692 1
30693 1
30694 4
30695 1
30696 1
Name: area_of_shot, Length: 30697, dtype: int64
Out[64]: array(['Mid Range', 'Goal Area', 'Goal Line', 'Penalty Spot', nan,
'Right Corner', 'Mid Ground Line', 'Left Corner'], dtype=object)
localhost:8888/notebooks/Untitled19.ipynb# 5/23
12/8/2019 Untitled19
localhost:8888/notebooks/Untitled19.ipynb# 6/23
12/8/2019 Untitled19
Out[67]: 0 1
1 1
2 1
3 1
4 2
5 1
6 2
7 2
8 4
9 4
10 3
11 1
12 4
13 1
14 4
15 1
16 2
17 1
18 1
19 1
20 1
21 1
22 4
23 1
24 4
25 2
26 1
27 3
28 1
29 4
..
30667 3
30668 1
30669 1
30670 4
30671 2
localhost:8888/notebooks/Untitled19.ipynb# 7/23
12/8/2019 Untitled19
30672 1
30673 1
30674 3
30675 3
30676 1
30677 1
30678 2
30679 2
30680 2
30681 3
30682 4
30683 1
30684 1
30685 3
30686 4
30687 4
30688 1
30689 6
30690 1
30691 2
30692 1
30693 2
30694 1
30695 3
30696 4
Name: shot_basics, Length: 30697, dtype: int64
Out[68]: array(['16-24 ft.', '8-16 ft.', 'Less Than 8 ft.', '24+ ft.', nan,
'Back Court Shot'], dtype=object)
localhost:8888/notebooks/Untitled19.ipynb# 8/23
12/8/2019 Untitled19
localhost:8888/notebooks/Untitled19.ipynb# 9/23
12/8/2019 Untitled19
Out[71]: 0 3
1 2
2 3
3 3
4 1
5 2
6 1
7 1
8 2
9 2
10 4
11 3
12 2
13 2
14 1
15 3
16 1
17 1
18 3
19 1
20 2
21 3
22 2
23 3
24 1
25 1
26 3
27 4
28 3
29 2
..
30667 4
30668 3
30669 3
30670 1
30671 1
30672 2
localhost:8888/notebooks/Untitled19.ipynb# 10/23
12/8/2019 Untitled19
30673 2
30674 4
30675 4
30676 2
30677 3
30678 1
30679 1
30680 1
30681 4
30682 2
30683 3
30684 2
30685 4
30686 2
30687 2
30688 2
30689 5
30690 2
30691 1
30692 1
30693 1
30694 3
30695 1
30696 1
Name: range_of_shot, Length: 30697, dtype: int64
localhost:8888/notebooks/Untitled19.ipynb# 11/23
12/8/2019 Untitled19
localhost:8888/notebooks/Untitled19.ipynb# 12/23
12/8/2019 Untitled19
In [76]: df.distance_of_shot
Out[76]: 0 38.000000
1 35.000000
2 36.000000
3 42.000000
4 20.000000
5 34.000000
6 20.000000
7 22.000000
8 32.000000
9 32.000000
10 45.000000
11 37.000000
12 33.448884
13 29.000000
14 25.000000
15 40.000000
16 20.000000
17 45.000000
18 36.000000
19 20.000000
20 34.000000
21 38.000000
22 31.000000
23 38.000000
24 27.000000
25 20.000000
26 40.000000
27 46.000000
28 39.000000
29 28.000000
...
30667 45.000000
30668 42.000000
30669 37.000000
30670 27.000000
30671 20.000000
30672 30.000000
30673 31.000000
30674 44.000000
localhost:8888/notebooks/Untitled19.ipynb# 13/23
12/8/2019 Untitled19
30675 45.000000
30676 34.000000
30677 38.000000
30678 33.448884
30679 20.000000
30680 20.000000
30681 46.000000
30682 28.000000
30683 41.000000
30684 33.000000
30685 46.000000
30686 29.000000
30687 30.000000
30688 33.000000
30689 87.000000
30690 35.000000
30691 20.000000
30692 24.000000
30693 20.000000
30694 41.000000
30695 46.000000
30696 27.000000
Name: distance_of_shot, Length: 30697, dtype: float64
In [ ]:
localhost:8888/notebooks/Untitled19.ipynb# 14/23
12/8/2019 Untitled19
Out[84]:
Index match_event_id location_x location_y remaining_min power_of_shot knockout_match game_season remaining_sec distance_o
localhost:8888/notebooks/Untitled19.ipynb# 15/23
12/8/2019 Untitled19
In [85]: df=df[np.isfinite(df.is_goal)] #Assigning all the rows of df with finding values in is_goal to df
df
Out[85]:
Index match_event_id location_x location_y remaining_min power_of_shot knockout_match game_season remaining_sec distance_o
In [86]: # The column shot_id_number has a lot of nan values.It is important to fill those values since this column us needed in
# submission file. A loop is written and and the missing values of shot_id_number is assigned by adding to the correspon
# value of the Index column
i=0
while i<6268:
df2.shot_id_number.iloc[i]=df2.Index.iloc[i]+1
i=i+1
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexing.py:189: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame
localhost:8888/notebooks/Untitled19.ipynb# 16/23
12/8/2019 Untitled19
In [87]: df2.shot_id_number
Out[87]: 0 1.0
7 8.0
16 17.0
19 20.0
21 22.0
32 33.0
33 34.0
34 35.0
35 36.0
36 37.0
37 38.0
44 45.0
49 50.0
54 55.0
59 60.0
61 62.0
65 66.0
66 67.0
70 71.0
71 72.0
75 76.0
79 80.0
84 85.0
85 86.0
86 87.0
91 92.0
94 95.0
96 97.0
103 104.0
112 113.0
...
30567 30568.0
30569 30570.0
30580 30581.0
30583 30584.0
30590 30591.0
30593 30594.0
30613 30614.0
30616 30617.0
localhost:8888/notebooks/Untitled19.ipynb# 17/23
12/8/2019 Untitled19
30617 30618.0
30625 30626.0
30629 30630.0
30630 30631.0
30631 30632.0
30633 30634.0
30635 30636.0
30636 30637.0
30638 30639.0
30646 30647.0
30648 30649.0
30655 30656.0
30659 30660.0
30664 30665.0
30668 30669.0
30679 30680.0
30680 30681.0
30681 30682.0
30682 30683.0
30686 30687.0
30687 30688.0
30693 30694.0
Name: shot_id_number, Length: 6268, dtype: float64
In [ ]:
In [89]: X=df[['location_x','location_y','power_of_shot','distance_of_shot','area_of_shot','shot_basics','range_of_shot']]
# important features are taken to be fed to logistic regression
localhost:8888/notebooks/Untitled19.ipynb# 18/23
12/8/2019 Untitled19
In [90]: X.head()
Out[90]:
location_x location_y power_of_shot distance_of_shot area_of_shot shot_basics range_of_shot
In [91]: Y=df[['is_goal']]
In [93]: X2=df2[['location_x','location_y','power_of_shot','distance_of_shot','area_of_shot','shot_basics','range_of_shot']]
localhost:8888/notebooks/Untitled19.ipynb# 19/23
12/8/2019 Untitled19
In [94]: X2.head()
Out[94]:
location_x location_y power_of_shot distance_of_shot area_of_shot shot_basics range_of_shot
In [102]: df2.is_goal=k
localhost:8888/notebooks/Untitled19.ipynb# 20/23
12/8/2019 Untitled19
In [111]: k2=df2[['shot_id_number','is_goal']]
k2
Out[111]:
shot_id_number is_goal
0 1.0 0.598310
7 8.0 0.428584
16 17.0 0.385120
19 20.0 0.396475
21 22.0 0.600862
32 33.0 0.589593
33 34.0 0.595674
34 35.0 0.396205
35 36.0 0.403566
36 37.0 0.385752
37 38.0 0.594587
44 45.0 0.425638
49 50.0 0.628595
54 55.0 0.697189
59 60.0 0.684408
61 62.0 0.471838
65 66.0 0.532036
66 67.0 0.553568
70 71.0 0.610627
71 72.0 0.630920
75 76.0 0.652395
79 80.0 0.517826
84 85.0 0.473821
localhost:8888/notebooks/Untitled19.ipynb# 21/23
12/8/2019 Untitled19
shot_id_number is_goal
85 86.0 0.580721
86 87.0 0.578107
91 92.0 0.571578
94 95.0 0.541471
96 97.0 0.607626
localhost:8888/notebooks/Untitled19.ipynb# 22/23
12/8/2019 Untitled19
shot_id_number is_goal
In [ ]:
localhost:8888/notebooks/Untitled19.ipynb# 23/23