Professional Documents
Culture Documents
Fifa World Cup History Analysis
Fifa World Cup History Analysis
Fifa World Cup History Analysis
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
In [50]: data1.head()
13 Jul LOMBARDI
Group CRISTOPHE REGO Gilberto
0 1930.0 1930 - Pocitos Montevideo France 4.0 1.0 Mexico 4444.0 3.0 0.0 Domingo 201.0 1096.0 FRA MEX
1 Henry (BEL) (BRA)
15:00 (URU)
13 Jul MATEUCCI
Group Parque MACIAS Jose WARNKEN
1 1930.0 1930 - Montevideo USA 3.0 0.0 Belgium 18346.0 2.0 0.0 Francisco 201.0 1090.0 USA BEL
4 Central (ARG) Alberto (CHI)
15:00 (URU)
14 Jul
Group Parque TEJADA VALLARINO BALWAY
2 1930.0 1930 - Montevideo Yugoslavia 2.0 1.0 Brazil 24059.0 2.0 0.0 201.0 1093.0 YUG BRA
2 Central Anibal (URU) Ricardo (URU) Thomas (FRA)
12:45
14 Jul
Group WARNKEN LANGENUS MATEUCCI
3 1930.0 1930 - Pocitos Montevideo Romania 3.0 1.0 Peru 2549.0 1.0 0.0 201.0 1098.0 ROU PER
3 Alberto (CHI) Jean (BEL) Francisco (URU)
14:50
In [51]: data2.head()
Out[51]: RoundID MatchID Team Initials Coach Name Line-up Shirt Number Player Name Position Event
2 201 1096 FRA CAUDRON Raoul (FRA) S 0 Marcel LANGILLER NaN G40'
3 201 1096 MEX LUQUE Juan (MEX) S 0 Juan CARRENO NaN G70'
4 201 1096 FRA CAUDRON Raoul (FRA) S 0 Ernest LIBERATI NaN NaN
In [52]: data3.head()
Out[52]: Year Country Winner Runners-Up Third Fourth GoalsScored QualifiedTeams MatchesPlayed Attendance
In [54]: #-----DATASET LOADED NOW WE WILL GET SOME INSIGHTS ABOUT THE DATA------------
Out[56]: Winner
Brazil 5
Italy 4
Germany FR 3
Uruguay 2
Argentina 2
England 1
France 1
Spain 1
Germany 1
In [57]: # Insight 1 : Brazil has won the tournament most number of times
0
Brazil Italy Germany FR Uruguay Argentina England France Spain Germany
Export to plot.ly »
In [60]: data_winner.head()
Out[60]: Winner
Brazil 5
Italy 4
Germany FR 3
Uruguay 2
Argentina 2
In [61]: data_runner_up.head()
Out[61]: Runners-Up
Argentina 3
Germany FR 3
Netherlands 3
Czechoslovakia 2
Hungary 2
In [62]: data_third.head()
Out[62]: Third
Germany 3
Brazil 2
Sweden 2
France 2
Poland 2
teams.fillna(0,inplace=True)
In [65]: teams=teams.astype(int)
In [66]: # Insight 2 : A complete depection of number of world cups won, first runner-up, and second runner-up positions by various participating teams
5 Winner
Runners-Up
Third
3
Count
0
Br Ita Ge Ur Ar En Fra Sp Ge Ne Cz Hu Sw Po US Au Ch Po Cr Tu
az ly rm ug ge gla nc ain rm the ec ng ed lan A str ile rtu oa rk ey
il an ua nti nd e an rl h o ary en d ia ga tia
yF y na y an slo l
R ds va
kia
In [68]: data1.head()
13 Jul LOMBARDI
Group CRISTOPHE REGO Gilberto
0 1930.0 1930 - Pocitos Montevideo France 4.0 1.0 Mexico 4444.0 3.0 0.0 Domingo 201.0 1096.0 FRA MEX
1 Henry (BEL) (BRA)
15:00 (URU)
13 Jul MATEUCCI
Group Parque MACIAS Jose WARNKEN
1 1930.0 1930 - Montevideo USA 3.0 0.0 Belgium 18346.0 2.0 0.0 Francisco 201.0 1090.0 USA BEL
4 Central (ARG) Alberto (CHI)
15:00 (URU)
14 Jul
Group Parque TEJADA VALLARINO BALWAY
2 1930.0 1930 - Montevideo Yugoslavia 2.0 1.0 Brazil 24059.0 2.0 0.0 201.0 1093.0 YUG BRA
2 Central Anibal (URU) Ricardo (URU) Thomas (FRA)
12:45
14 Jul
Group WARNKEN LANGENUS MATEUCCI
3 1930.0 1930 - Pocitos Montevideo Romania 3.0 1.0 Peru 2549.0 1.0 0.0 201.0 1098.0 ROU PER
3 Alberto (CHI) Jean (BEL) Francisco (URU)
14:50
In [70]: data_home.head()
0 France 4.0
1 USA 3.0
2 Yugoslavia 2.0
3 Romania 3.0
4 Argentina 1.0
In [71]: data_away.head()
0 Mexico 1.0
1 Belgium 0.0
2 Brazil 1.0
3 Peru 1.0
4 France 0.0
In [74]: data_country_goals
0 France 4.0
1 USA 3.0
2 Yugoslavia 2.0
3 Romania 3.0
4 Argentina 1.0
In [75]: # The above table do contain all the goals both home and away but con have different values for same countries, so...
data_final_country_goal=data_country_goals.groupby('Countries').sum()
In [77]: final_data=final_data[:10]
final_data
Out[77]: Goals
Countries
Brazil 225.0
Argentina 133.0
Germany FR 131.0
Italy 128.0
France 108.0
Germany 104.0
Spain 92.0
Netherlands 91.0
Hungary 87.0
Uruguay 80.0
In [78]: # Insight 3 : Brazil scored the most number of goals thrughout the history of worldcup followed by Argentina and Germany.
final_data.iplot(kind='bar',yTitle='No of Goals',title='Countries with maximum number of goals',colors='red',xTitle='Country')
200
150
No of Goals
100
50
0
Brazil Argentina Germany FR Italy France Germany Spain Netherlands Hungary Uruguay
Country
Export to plot.ly »
In [79]: # Comparing half time home goals scored and half time away goals scored
Brazil 66.0
Argentina 48.0
Germany FR 38.0
Italy 36.0
Hungary 33.0
... ...
Norway 0.0
Iran 0.0
Iraq 0.0
78 rows × 1 columns
Spain 20.0
Germany 18.0
Netherlands 18.0
France 17.0
Brazil 17.0
... ...
China PR 0.0
Slovenia 0.0
Slovakia 0.0
Haiti 0.0
83 rows × 1 columns
83 rows × 2 columns
In [83]: # Creating total goals columns to order the table based on total number of goals scored by a team
total['total_goals'] = total['Half-time Home Goals'] + total['Half-time Away Goals']
total = total.sort_values(by= 'total_goals',ascending=False)
total=total[:10]
total
In [85]: # Insight 4 : Brazil has again scored a majority of total goals playing as home team whereas spain scored more goals playing as away country rather than home country
total.iplot(kind='bar',yTitle='Count',title='Country wise analysis',xTitle='Country')
50
40
Count
30
20
10
0
Brazil Argentina Germany FR Italy Germany France Hungary Netherlands Spain Uruguay
Country
Export to plot.ly »
In [87]: data1.head(10)
13 Jul LOMBARDI
Group CRISTOPHE REGO Gilberto
0 1930.0 1930 - Pocitos Montevideo France 4.0 1.0 Mexico 4444.0 3.0 0.0 Domingo 201.0 1096.0 FRA MEX
1 Henry (BEL) (BRA)
15:00 (URU)
13 Jul MATEUCCI
Group Parque MACIAS Jose WARNKEN
1 1930.0 1930 - Montevideo USA 3.0 0.0 Belgium 18346.0 2.0 0.0 Francisco 201.0 1090.0 USA BEL
4 Central (ARG) Alberto (CHI)
15:00 (URU)
14 Jul
Group Parque TEJADA VALLARINO BALWAY
2 1930.0 1930 - Montevideo Yugoslavia 2.0 1.0 Brazil 24059.0 2.0 0.0 201.0 1093.0 YUG BRA
2 Central Anibal (URU) Ricardo (URU) Thomas (FRA)
12:45
14 Jul MATEUCCI
Group WARNKEN LANGENUS
3 1930.0 1930 - Pocitos Montevideo Romania 3.0 1.0 Peru 2549.0 1.0 0.0 Francisco 201.0 1098.0 ROU PER
3 Alberto (CHI) Jean (BEL)
14:50 (URU)
15 Jul RADULESCU
Group Parque REGO SAUCEDO
4 1930.0 1930 - Montevideo Argentina 1.0 0.0 France 23409.0 0.0 0.0 Constantin 201.0 1085.0 ARG FRA
1 Central Gilberto (BRA) Ulises (BOL)
16:00 (ROU)
16 Jul
Group Parque CRISTOPHE APHESTEGUY LANGENUS
5 1930.0 1930 - Montevideo Chile 3.0 0.0 Mexico 9249.0 1.0 0.0 201.0 1095.0 CHI MEX
1 Central Henry (BEL) Martin (URU) Jean (BEL)
14:45
17 Jul
Group Parque MACIAS Jose APHESTEGUY TEJADA Anibal
7 1930.0 1930 - Montevideo USA 3.0 0.0 Paraguay 18306.0 2.0 0.0 201.0 1097.0 USA PAR
4 Central (ARG) Martin (URU) (URU)
14:45
18 Jul
Group Estadio LANGENUS BALWAY CRISTOPHE
8 1930.0 1930 - Montevideo Uruguay 1.0 0.0 Peru 57735.0 0.0 0.0 201.0 1099.0 URU PER
3 Centenario Jean (BEL) Thomas (FRA) Henry (BEL)
14:30
19 Jul LOMBARDI
Group Estadio TEJADA REGO Gilberto
9 1930.0 1930 - Montevideo Chile 1.0 0.0 France 2000.0 0.0 0.0 Domingo 201.0 1094.0 CHI FRA
1 Centenario Anibal (URU) (BRA)
12:50 (URU)
In [88]: # We didn't considered draw matches here because in many cells we had NaN values given which corresponded to 0 values and could
# had made our pie chart biased.
def winner(data1):
if data1['Home Team Goals'] > data1['Away Team Goals']:
return 'Home team won'
elif data1['Home Team Goals'] < data1['Away Team Goals']:
return 'Away Team won'
In [90]: data1['winner'].value_counts()
plt.show()
In [93]: # Insight 5 : 73.6 % of the matches were won by home team while 26.4 % were won by away team.
In [94]: data2.head()
Out[94]: RoundID MatchID Team Initials Coach Name Line-up Shirt Number Player Name Position Event
2 201 1096 FRA CAUDRON Raoul (FRA) S 0 Marcel LANGILLER NaN G40'
3 201 1096 MEX LUQUE Juan (MEX) S 0 Juan CARRENO NaN G70'
4 201 1096 FRA CAUDRON Raoul (FRA) S 0 Ernest LIBERATI NaN NaN
BRA 2403
ITA 1843
ARG 1807
ENG 1378
FRG 1364
... ...
KUW 66
CAN 66
UAE 66
CUB 45
INH 18
82 rows × 1 columns
# Insight 6 : Brazil has played the most bnumber of players followed by Italy and Argentina
2500
2000
No of players
1500
1000
500
0
BRA
ITA
ARG
ENG
FRG
FRA
ESP
NED
MEX
URU
GER
SWE
BEL
YUG
SUI
CHI
USA
HUN
KOR
POL
URS
TCH
AUT
PAR
POR
BUL
CMR
SCO
COL
ROU
NGA
JPN
CRC
CRO
DEN
PER
ALG
AUS
KSA
IRL
NIR
MAR
GHA
TUN
IRN
GRE
ECU
TUR
CIV
RSA
HON
RUS
NOR
PRK
SVN
NZL
GDR
SLV
BOL
UKR
SEN
WAL
SVK
EGY
CHN
TRI
SRB
TOG
CZE
ANG
SCG
BIH
IRQ
JAM
ISR
ZAI
HAI
KUW
CAN
UAE
CUB
INH
Country code
Export to plot.ly »
In [100… # BUILDING BEE SWARM PLOT FOR NUMBER OF PLAYERS AND FINDING MINIMUM AND MAXIMUM RANGE
plt.rcParams['figure.facecolor'] = "#ffffe6"
plt.rcParams['axes.facecolor'] = "#ffffe6"
p2 = sns.swarmplot( x = 'Team Initials', data = d2 , palette='inferno')
plt.title('Finding the range of players played from Minimum To Maximum')
plt.xlabel('Number Of Players')
plt.ylabel('Range of Players')
plt.show()
In [101… # Insight 7 : Most of the teams have played between 0 and 500 players only in the complete competition, and handful of countries have played more than 1500 players in the history o
In [102… # Insights,
In [ ]: