Professional Documents
Culture Documents
FRA MILESTONE1 Jupyter Notebook PDF
FRA MILESTONE1 Jupyter Notebook PDF
FRA MILESTONE1 Jupyter Notebook PDF
In [28]: df.head()
Tata Tele.
1 21214 -3986.19 1954.93 -2968.08 4458.20 7410.18 9070.86 -1098.88 486
Mah.
ABG
2 14852 -3192.58 53.84 506.86 7714.68 6944.54 1281.54 4496.25 9097
Shipyard
3 2439 GTL -3054.51 157.30 -623.49 2353.88 2326.05 1033.69 -2612.42 1034
Bharati
4 23505 -2967.36 50.30 -1070.83 4675.33 5740.90 1084.20 1836.23 4685
Defence
5 rows × 67 columns
In [29]: df.shape
localhost:8889/notebooks/Desktop/GreatLakes/FRA/Sonal_FRA_MILESTONE1.ipynb 1/39
11/11/21, 5:51 PM Sonal_FRA_MILESTONE1 - Jupyter Notebook
In [31]: df.describe()
Out[31]:
Networth Next Equity Paid Capital
Co_Code Networth Total Debt G
Year Up Employed
8 rows × 66 columns
localhost:8889/notebooks/Desktop/GreatLakes/FRA/Sonal_FRA_MILESTONE1.ipynb 2/39
11/11/21, 5:51 PM Sonal_FRA_MILESTONE1 - Jupyter Notebook
In [32]: df.info()
<class 'pandas.core.frame.DataFrame'>
localhost:8889/notebooks/Desktop/GreatLakes/FRA/Sonal_FRA_MILESTONE1.ipynb 3/39
11/11/21, 5:51 PM Sonal_FRA_MILESTONE1 - Jupyter Notebook
In [33]: df.isnull().sum()
Out[33]: Co_Code 0
Co_Name 0
Equity Paid Up 0
Networth 0
...
In [34]: df.duplicated().sum()
Out[34]: 0
localhost:8889/notebooks/Desktop/GreatLakes/FRA/Sonal_FRA_MILESTONE1.ipynb 4/39
11/11/21, 5:51 PM Sonal_FRA_MILESTONE1 - Jupyter Notebook
Out[35]: (array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66]),
[Text(1, 0, 'Co_Code'),
Text(4, 0, 'Networth'),
Text(18, 0, 'PBIDT'),
Text(19, 0, 'PBDT'),
Text(20, 0, 'PBIT'),
Text(21, 0, 'PBT'),
Text(22, 0, 'PAT'),
Text(24, 0, 'CP'),
localhost:8889/notebooks/Desktop/GreatLakes/FRA/Sonal_FRA_MILESTONE1.ipynb 5/39
11/11/21, 5:51 PM Sonal_FRA_MILESTONE1 - Jupyter Notebook
Outlier treatment
localhost:8889/notebooks/Desktop/GreatLakes/FRA/Sonal_FRA_MILESTONE1.ipynb 6/39
11/11/21, 5:51 PM Sonal_FRA_MILESTONE1 - Jupyter Notebook
In [39]: df = mod_outlier(df)
localhost:8889/notebooks/Desktop/GreatLakes/FRA/Sonal_FRA_MILESTONE1.ipynb 7/39
11/11/21, 5:51 PM Sonal_FRA_MILESTONE1 - Jupyter Notebook
Out[40]: (array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66]),
[Text(1, 0, 'Co_Code'),
Text(4, 0, 'Networth'),
Text(18, 0, 'PBIDT'),
Text(19, 0, 'PBDT'),
Text(20, 0, 'PBIT'),
Text(21, 0, 'PBT'),
Text(22, 0, 'PAT'),
Text(24, 0, 'CP'),
localhost:8889/notebooks/Desktop/GreatLakes/FRA/Sonal_FRA_MILESTONE1.ipynb 8/39
11/11/21, 5:51 PM Sonal_FRA_MILESTONE1 - Jupyter Notebook
Out[41]: Co_Code 0
Co_Name 0
Equity Paid Up 0
Networth 0
...
localhost:8889/notebooks/Desktop/GreatLakes/FRA/Sonal_FRA_MILESTONE1.ipynb 9/39
11/11/21, 5:51 PM Sonal_FRA_MILESTONE1 - Jupyter Notebook
In [42]: df.fillna(df.median())
Out[42]: N
Networth Equity Capital Total Gross
Co_Code Co_Name Networth Workin
Next Year Paid Up Employed Debt Block
Capit
Tata Tele.
1 21214 -175.74125 43.16875 -166.215 555.10875 180.83 328.8825 -89.4062
Mah.
ABG
2 14852 -175.74125 43.16875 287.405 555.10875 180.83 328.8825 151.5237
Shipyard
Bharati
4 23505 -175.74125 43.16875 -166.215 555.10875 180.83 328.8825 151.5237
Defence
3581 4987 HDFC Bank 303.52875 43.16875 287.405 555.10875 180.83 328.8825 0.0000
3582 502 Vedanta 303.52875 43.16875 287.405 555.10875 180.83 328.8825 151.5237
3583 12002 IOCL 303.52875 43.16875 287.405 555.10875 180.83 328.8825 151.5237
3584 12001 NTPC 303.52875 43.16875 287.405 555.10875 180.83 328.8825 151.5237
3585 15542 Bharti Airtel 303.52875 43.16875 287.405 555.10875 180.83 328.8825 -89.4062
In [43]: df.isna().sum()
Out[43]: Co_Code 0
Co_Name 0
Equity Paid Up 0
Networth 0
...
localhost:8889/notebooks/Desktop/GreatLakes/FRA/Sonal_FRA_MILESTONE1.ipynb 10/39
11/11/21, 5:51 PM Sonal_FRA_MILESTONE1 - Jupyter Notebook
In [44]: updated_df = df
updated_df['Inventory Velocity (Days)']=updated_df['Inventory Velocity (Days)'].f
#updated_df.info()
updated_df.isnull().sum()
Out[44]: Co_Code 0
Co_Name 0
Equity Paid Up 0
Networth 0
..
imputer = KNNImputer(n_neighbors=3)
imputed = imputer.fit_transform(df)
df_imputed =
pd.DataFrame(imputed, columns=df.columns)
Ques 1.3
In [45]: dummy=pd.get_dummies(updated_df['Networth Next Year']>0)
dummy.head()
0 1 0
1 1 0
2 1 0
3 1 0
4 1 0
In [46]: default=()
localhost:8889/notebooks/Desktop/GreatLakes/FRA/Sonal_FRA_MILESTONE1.ipynb 11/39
11/11/21, 5:51 PM Sonal_FRA_MILESTONE1 - Jupyter Notebook
In [ ]:
In [ ]:
In [ ]:
In [ ]:
Out[48]: Net
Networth Equity Capital Total Gross
Co_Code Co_Name Networth Working
Next Year Paid Up Employed Debt Block
Capital
Tata Tele.
1 21214 -175.74125 43.16875 -166.215 555.10875 180.83 328.8825 -89.40625
Mah.
ABG
2 14852 -175.74125 43.16875 287.405 555.10875 180.83 328.8825 151.52375
Shipyard
Bharati
4 23505 -175.74125 43.16875 -166.215 555.10875 180.83 328.8825 151.52375
Defence
In [ ]:
localhost:8889/notebooks/Desktop/GreatLakes/FRA/Sonal_FRA_MILESTONE1.ipynb 12/39
11/11/21, 5:51 PM Sonal_FRA_MILESTONE1 - Jupyter Notebook
0 -175.74125 1
1 -175.74125 1
2 -175.74125 1
3 -175.74125 1
4 -175.74125 1
3581 303.52875 0
3582 303.52875 0
3583 303.52875 0
3584 303.52875 0
3585 303.52875 0
In [52]: df.head()
0 -175.74125 1
1 -175.74125 1
2 -175.74125 1
3 -175.74125 1
4 -175.74125 1
localhost:8889/notebooks/Desktop/GreatLakes/FRA/Sonal_FRA_MILESTONE1.ipynb 13/39
11/11/21, 5:51 PM Sonal_FRA_MILESTONE1 - Jupyter Notebook
Out[53]: Net
Networth Equity Capital Total Gross
Co_Code Co_Name Networth Working
Next Year Paid Up Employed Debt Block
Capital
Tata Tele.
1 21214 -175.74125 43.16875 -166.215 555.10875 180.83 328.8825 -89.40625
Mah.
ABG
2 14852 -175.74125 43.16875 287.405 555.10875 180.83 328.8825 151.52375
Shipyard
Bharati
4 23505 -175.74125 43.16875 -166.215 555.10875 180.83 328.8825 151.52375
Defence
5 rows × 69 columns
In [54]: df1.default.unique()
localhost:8889/notebooks/Desktop/GreatLakes/FRA/Sonal_FRA_MILESTONE1.ipynb 14/39
11/11/21, 5:51 PM Sonal_FRA_MILESTONE1 - Jupyter Notebook
In [56]: df1.info()
<class 'pandas.core.frame.DataFrame'>
localhost:8889/notebooks/Desktop/GreatLakes/FRA/Sonal_FRA_MILESTONE1.ipynb 15/39
11/11/21, 5:51 PM Sonal_FRA_MILESTONE1 - Jupyter Notebook
In [ ]:
Out[57]: <AxesSubplot:xlabel='Networth'>
localhost:8889/notebooks/Desktop/GreatLakes/FRA/Sonal_FRA_MILESTONE1.ipynb 16/39
11/11/21, 5:51 PM Sonal_FRA_MILESTONE1 - Jupyter Notebook
In [58]: df1.Networth.plot.density(color='green')
plt.show()
localhost:8889/notebooks/Desktop/GreatLakes/FRA/Sonal_FRA_MILESTONE1.ipynb 17/39
11/11/21, 5:51 PM Sonal_FRA_MILESTONE1 - Jupyter Notebook
localhost:8889/notebooks/Desktop/GreatLakes/FRA/Sonal_FRA_MILESTONE1.ipynb 18/39
11/11/21, 5:51 PM Sonal_FRA_MILESTONE1 - Jupyter Notebook
localhost:8889/notebooks/Desktop/GreatLakes/FRA/Sonal_FRA_MILESTONE1.ipynb 19/39
11/11/21, 5:51 PM Sonal_FRA_MILESTONE1 - Jupyter Notebook
In [65]: sns.boxplot(data=df1,x=df1['PBIDT'])
Out[65]: <AxesSubplot:xlabel='PBIDT'>
localhost:8889/notebooks/Desktop/GreatLakes/FRA/Sonal_FRA_MILESTONE1.ipynb 20/39
11/11/21, 5:51 PM Sonal_FRA_MILESTONE1 - Jupyter Notebook
In [66]: sns.boxplot(data=df1,x=df1['PBT'])
Out[66]: <AxesSubplot:xlabel='PBT'>
In [67]: sns.boxplot(data=df1,x=df1['PAT'])
Out[67]: <AxesSubplot:xlabel='PAT'>
localhost:8889/notebooks/Desktop/GreatLakes/FRA/Sonal_FRA_MILESTONE1.ipynb 21/39
11/11/21, 5:51 PM Sonal_FRA_MILESTONE1 - Jupyter Notebook
localhost:8889/notebooks/Desktop/GreatLakes/FRA/Sonal_FRA_MILESTONE1.ipynb 22/39
11/11/21, 5:51 PM Sonal_FRA_MILESTONE1 - Jupyter Notebook
Out[70]: <AxesSubplot:>
Out[71]: <AxesSubplot:ylabel='Frequency'>
localhost:8889/notebooks/Desktop/GreatLakes/FRA/Sonal_FRA_MILESTONE1.ipynb 23/39
11/11/21, 5:51 PM Sonal_FRA_MILESTONE1 - Jupyter Notebook
Out[72]: <AxesSubplot:ylabel='Frequency'>
Out[73]: <AxesSubplot:ylabel='Frequency'>
localhost:8889/notebooks/Desktop/GreatLakes/FRA/Sonal_FRA_MILESTONE1.ipynb 24/39
11/11/21, 5:51 PM Sonal_FRA_MILESTONE1 - Jupyter Notebook
Out[74]: <AxesSubplot:ylabel='Frequency'>
Out[75]: <AxesSubplot:ylabel='Frequency'>
localhost:8889/notebooks/Desktop/GreatLakes/FRA/Sonal_FRA_MILESTONE1.ipynb 25/39
11/11/21, 5:51 PM Sonal_FRA_MILESTONE1 - Jupyter Notebook
localhost:8889/notebooks/Desktop/GreatLakes/FRA/Sonal_FRA_MILESTONE1.ipynb 26/39
11/11/21, 5:51 PM Sonal_FRA_MILESTONE1 - Jupyter Notebook
localhost:8889/notebooks/Desktop/GreatLakes/FRA/Sonal_FRA_MILESTONE1.ipynb 27/39
11/11/21, 5:51 PM Sonal_FRA_MILESTONE1 - Jupyter Notebook
localhost:8889/notebooks/Desktop/GreatLakes/FRA/Sonal_FRA_MILESTONE1.ipynb 28/39
11/11/21, 5:51 PM Sonal_FRA_MILESTONE1 - Jupyter Notebook
In [82]: plt.subplots(figsize=(20,15))
corr = df1.corr()
sns.heatmap(corr)
Out[82]: <AxesSubplot:>
localhost:8889/notebooks/Desktop/GreatLakes/FRA/Sonal_FRA_MILESTONE1.ipynb 29/39
11/11/21, 5:51 PM Sonal_FRA_MILESTONE1 - Jupyter Notebook
Out[84]: Current
Net
Equity Capital Total Gross Current Liabilities
Networth Working
Paid Up Employed Debt Block Assets and Assets/L
Capital
Provisions
In [ ]:
In [86]: # Question 6: splitting data into training and test set for independent attribute
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = .33, random
Out[87]: 1184
localhost:8889/notebooks/Desktop/GreatLakes/FRA/Sonal_FRA_MILESTONE1.ipynb 30/39
11/11/21, 5:51 PM Sonal_FRA_MILESTONE1 - Jupyter Notebook
In [88]: X_train.shape[0]
Out[88]: 2402
localhost:8889/notebooks/Desktop/GreatLakes/FRA/Sonal_FRA_MILESTONE1.ipynb 31/39
11/11/21, 5:51 PM Sonal_FRA_MILESTONE1 - Jupyter Notebook
---------------------------------------------------------------------------
<ipython-input-100-5b5c3943cc78> in <module>
2 log_reg = LogisticRegression()
~\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py in fit(self,
X, y, sample_weight)
1343
1345 order="C",
~\anaconda3\lib\site-packages\sklearn\base.py in _validate_data(self, X, y,
reset, validate_separately, **check_params)
432 else:
434 out = X, y
435
~\anaconda3\lib\site-packages\sklearn\utils\validation.py in inner_f(*args,
**kwargs)
62 if extra_args <= 0:
64
65 # extra_args > 0
~\anaconda3\lib\site-packages\sklearn\utils\validation.py in check_X_y(X, y,
accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, en
sure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_
numeric, estimator)
813
815 accept_large_sparse=accept_large_sparse,
~\anaconda3\lib\site-packages\sklearn\utils\validation.py in inner_f(*args,
**kwargs)
62 if extra_args <= 0:
64
65 # extra_args > 0
~\anaconda3\lib\site-packages\sklearn\utils\validation.py in check_array(arr
localhost:8889/notebooks/Desktop/GreatLakes/FRA/Sonal_FRA_MILESTONE1.ipynb 32/39
11/11/21, 5:51 PM Sonal_FRA_MILESTONE1 - Jupyter Notebook
615 else:
103
104
1897
1900
103
104
In [ ]:
In [ ]:
In [ ]:
localhost:8889/notebooks/Desktop/GreatLakes/FRA/Sonal_FRA_MILESTONE1.ipynb 33/39
11/11/21, 5:51 PM Sonal_FRA_MILESTONE1 - Jupyter Notebook
In [ ]:
In [ ]:
In [ ]:
In [ ]:
localhost:8889/notebooks/Desktop/GreatLakes/FRA/Sonal_FRA_MILESTONE1.ipynb 34/39
11/11/21, 5:51 PM Sonal_FRA_MILESTONE1 - Jupyter Notebook
In [96]: #df1.info()
In [109]:
!pip install scikit-learn==0.23.1
!pip install imbalanced-learn==0.7.0
Collecting scikit-learn==0.23.1
Uninstalling scikit-learn-0.24.1:
ERROR: Could not install packages due to an OSError: [WinError 5] Access is den
ied: 'C:\\Users\\Sonal\\anaconda3\\Lib\\site-packages\\~klearn\\cluster\\_dbsca
n_inner.cp38-win_amd64.pyd'
Collecting imbalanced-learn==0.7.0
Uninstalling imbalanced-learn-0.8.1:
localhost:8889/notebooks/Desktop/GreatLakes/FRA/Sonal_FRA_MILESTONE1.ipynb 35/39
11/11/21, 5:51 PM Sonal_FRA_MILESTONE1 - Jupyter Notebook
Out[113]: '0.24.1'
In [114]:
import imblearn
imblearn.__version__
Out[114]: '0.8.1'
localhost:8889/notebooks/Desktop/GreatLakes/FRA/Sonal_FRA_MILESTONE1.ipynb 36/39
11/11/21, 5:51 PM Sonal_FRA_MILESTONE1 - Jupyter Notebook
In [121]:
# Now works
X_res, Y_res = sm.fit_resample(X_train, y_train)
---------------------------------------------------------------------------
<ipython-input-121-576249c89e94> in <module>
1 # Now works
~\anaconda3\lib\site-packages\imblearn\base.py in fit_resample(self, X, y)
75 check_classification_targets(y)
76 arrays_transformer = ArraysTransformer(X, y)
78
79 self.sampling_strategy_ = check_sampling_strategy(
134 X, y = self._validate_data(
431 else:
434
~\anaconda3\lib\site-packages\sklearn\utils\validation.py in check_X_y(X, y, ac
cept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_
2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric,
estimator)
812
813 check_consistent_length(X, y)
--> 814
815 return X, y
816
localhost:8889/notebooks/Desktop/GreatLakes/FRA/Sonal_FRA_MILESTONE1.ipynb 37/39
11/11/21, 5:51 PM Sonal_FRA_MILESTONE1 - Jupyter Notebook
~\anaconda3\lib\site-packages\sklearn\utils\validation.py in check_array(array,
accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensur
e_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator)
101
103
104
1897
1900
101
103
104
---------------------------------------------------------------------------
<ipython-input-120-bb5496dc5f13> in <module>
localhost:8889/notebooks/Desktop/GreatLakes/FRA/Sonal_FRA_MILESTONE1.ipynb 38/39
11/11/21, 5:51 PM Sonal_FRA_MILESTONE1 - Jupyter Notebook
In [ ]:
localhost:8889/notebooks/Desktop/GreatLakes/FRA/Sonal_FRA_MILESTONE1.ipynb 39/39