Download as pdf or txt
Download as pdf or txt
You are on page 1of 6

PA Lab Exp 4 Predicting HR Analytics using logistic _regression_exerci... https://colab.research.google.com/drive/1fE4-juU7B1VadoTPkDZWp...

1 import pandas as pd
2 from matplotlib import pyplot as plt
3 %matplotlib inline

1 df = pd.read_csv("HR_comma_sep.csv")
2 df.head()

---------------------------------------------------------------------------
FileNotFoundError Traceback (most recent call last)
<ipython-input-2-928fbcd7c60a> in <module>
----> 1 df = pd.read_csv("HR_comma_sep.csv")
2 df.head()

5 frames
/usr/local/lib/python3.9/dist-packages/pandas/io/common.py in
get_handle(path_or_buf, mode, encoding, compression, memory_map, is_text, errors,
storage_options)
784 if ioargs.encoding and "b" not in ioargs.mode:
785 # Encoding
--> 786 handle = open(
787 handle,
788 ioargs.mode,

FileNotFoundError: [Errno 2] No such file or directory: 'HR_comma_sep.csv'

1 left = df[df.left==1]
2 left.shape

(3571, 10)

1 retained = df[df.left==0]
2 retained.shape

(11428, 10)

1 of 6 25-09-2023, 12:40
PA Lab Exp 4 Predicting HR Analytics using logistic _regression_exerci... https://colab.research.google.com/drive/1fE4-juU7B1VadoTPkDZWp...

1 df.groupby('left').mean()

satisfaction_level last_evaluation number_project average_montly_hours time_spend_

left

0 0.666810 0.715473 3.786664 199.060203

1 0.440098 0.718113 3.855503 207.419210

1 pd.crosstab(df.salary,df.left).plot(kind='bar')

<matplotlib.axes._subplots.AxesSubplot at 0x1442bf3eda0>

2 of 6 25-09-2023, 12:40
PA Lab Exp 4 Predicting HR Analytics using logistic _regression_exerci... https://colab.research.google.com/drive/1fE4-juU7B1VadoTPkDZWp...

1 pd.crosstab(df.Department,df.left).plot(kind='bar')

<matplotlib.axes._subplots.AxesSubplot at 0x1442bf75128>

1 subdf = df[['satisfaction_level','average_montly_hours','promotion_last_5years','salary'
2 subdf.head()

satisfaction_level average_montly_hours promotion_last_5years salary

0 0.38 157 0 low

1 0.80 262 0 medium

2 0.11 272 0 medium

3 0.72 223 0 low

3 of 6 25-09-2023, 12:40
PA Lab Exp 4 Predicting HR Analytics using logistic _regression_exerci... https://colab.research.google.com/drive/1fE4-juU7B1VadoTPkDZWp...

4 0.37 159 0 low

1 salary_dummies = pd.get_dummies(subdf.salary, prefix="salary")

1 df_with_dummies = pd.concat([subdf,salary_dummies],axis='columns')

1 df_with_dummies.head()

satisfaction_level average_montly_hours promotion_last_5years salary salary_high

0 0.38 157 0 low

1 0.80 262 0 medium

2 0.11 272 0 medium

3 0.72 223 0 low

4 0.37 159 0 low

1 df_with_dummies.drop('salary',axis='columns',inplace=True)
2 df_with_dummies.head()

satisfaction_level average_montly_hours promotion_last_5years salary_high salary_low

0 0.38 157 0 0

1 0.80 262 0 0

2 0.11 272 0 0

3 0.72 223 0 0

4 0.37 159 0 0

4 of 6 25-09-2023, 12:40
PA Lab Exp 4 Predicting HR Analytics using logistic _regression_exerci... https://colab.research.google.com/drive/1fE4-juU7B1VadoTPkDZWp...

1 X = df_with_dummies
2 X.head()

satisfaction_level average_montly_hours promotion_last_5years salary_high salary_low

0 0.38 157 0 0

1 0.80 262 0 0

2 0.11 272 0 0

3 0.72 223 0 0

4 0.37 159 0 0

1 y = df.left

1 from sklearn.model_selection import train_test_split


2 X_train, X_test, y_train, y_test = train_test_split(X,y,train_size=0.3)

1 from sklearn.linear_model import LogisticRegression


2 model = LogisticRegression()

1 model.fit(X_train, y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,


intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
verbose=0, warm_start=False)

1 model.predict(X_test)

array([0, 0, 0, ..., 0, 0, 1], dtype=int64)

1 model.score(X_test,y_test)

0.78428571428571425

5 of 6 25-09-2023, 12:40
PA Lab Exp 4 Predicting HR Analytics using logistic _regression_exerci... https://colab.research.google.com/drive/1fE4-juU7B1VadoTPkDZWp...

6 of 6 25-09-2023, 12:40

You might also like