Professional Documents
Culture Documents
Police Gender Pay Gap
Police Gender Pay Gap
data = pd.read_csv('extracted_table_cleaned202403201133.csv')
def parse_name(name):
return name.split(',')[-1].strip().split(' ')[0]
results = [d.get_gender(parse_name(name)) for name in data['Name']]
# clean results
cleaned_results = []
for index, row in data.iterrows():
name = parse_name(row['Name'])
result = results[index]
if manual and name in manual.keys():
cleaned_results.append(manual[name])
elif result == 'mostly_male':
cleaned_results.append('male')
elif result == 'mostly_female':
cleaned_results.append('female')
elif result not in ['male', 'female']:
print(f"warning, {name} is {result}")
cleaned_results.append(result)
else:
cleaned_results.append(result)
data['gender'] = cleaned_results
return data
In [16]: manual = {
'example': 'female'
}
data= get_gender(data, manual)
Out[20]: Salary
count 28.000000
mean 67355.035714
std 42287.350621
min 39562.000000
25% 44059.000000
50% 56551.000000
75% 71848.000000
max 217782.000000
Out[22]: Salary
count 38.000000
mean 54739.473684
std 15880.882804
min 33238.000000
25% 42926.500000
50% 49841.000000
75% 64508.000000
max 97562.000000
In [ ]: