Source Code For Logistic Regression and Dijkstra's Algorithm

In [57]: # importing pandas library and data file in csv format
In [58]: import pandas as pd
In [59]: df = pd.read_csv("DATA1.csv",skiprows=1)
df
Out[59]: Building Monday Tuesday Wednesday Thursday Friday Saturday Sunday
0 A 91.0 95.0 97.0 99.0 90.0 95.0 54.0
1 B 234.0 281.0 293.0 280.0 269.0 165.0 34.0
2 C 156.0 149.0 174.0 167.0 146.0 61.0 8.0
3 D 81.0 120.0 110.0 91.0 95.0 68.0 8.0
4 E 252.0 231.0 287.0 259.0 273.0 226.0 6.0
5 F 97.0 117.0 134.0 117.0 118.0 74.0 1.0
6 G 21.0 21.0 21.0 21.0 21.0 21.0 0.0
7 H 7.0 8.0 7.0 7.0 7.0 8.0 4.0
8 I 62.0 56.0 48.0 42.0 42.0 44.0 0.0
9 J 2.0 0.0 0.0 2.0 2.0 0.0 0.0
10 K 23.0 38.0 30.0 27.0 25.0 15.0 0.0
11 L 4.0 3.0 0.0 0.0 0.0 0.0 0.0
12 M 14.0 24.0 19.0 29.0 23.0 14.0 18.0
13 NaN NaN NaN NaN NaN NaN NaN NaN
14 waste collected or not collected : collected (... NaN NaN NaN NaN NaN NaN NaN
15 A 1.0 1.0 1.0 1.0 1.0 1.0 0.0
16 B 1.0 1.0 1.0 1.0 1.0 1.0 0.0
17 C 1.0 1.0 1.0 1.0 1.0 1.0 0.0
18 D 1.0 1.0 1.0 1.0 1.0 1.0 0.0
19 E 1.0 1.0 1.0 1.0 1.0 1.0 0.0
20 F 0.0 0.0 1.0 1.0 1.0 1.0 0.0
21 G 0.0 0.0 0.0 0.0 0.0 0.0 0.0
22 H 0.0 0.0 0.0 0.0 0.0 0.0 0.0
23 I 1.0 0.0 0.0 0.0 0.0 0.0 0.0
24 J 0.0 0.0 0.0 0.0 0.0 0.0 0.0
25 K 0.0 0.0 0.0 0.0 0.0 0.0 0.0
26 L 0.0 0.0 0.0 0.0 0.0 0.0 0.0
27 M 0.0 0.0 0.0 0.0 0.0 0.0 0.0
In [60]: # rearranging the data
In [61]: class_schedule = df.iloc[0:13]

class_schedule.set_index('Building',inplace=True)
class_schedule
Out[61]: Monday Tuesday Wednesday Thursday Friday Saturday Sunday
Building
A 91.0 95.0 97.0 99.0 90.0 95.0 54.0
B 234.0 281.0 293.0 280.0 269.0 165.0 34.0
C 156.0 149.0 174.0 167.0 146.0 61.0 8.0
D 81.0 120.0 110.0 91.0 95.0 68.0 8.0
E 252.0 231.0 287.0 259.0 273.0 226.0 6.0
F 97.0 117.0 134.0 117.0 118.0 74.0 1.0
G 21.0 21.0 21.0 21.0 21.0 21.0 0.0
H 7.0 8.0 7.0 7.0 7.0 8.0 4.0
I 62.0 56.0 48.0 42.0 42.0 44.0 0.0
J 2.0 0.0 0.0 2.0 2.0 0.0 0.0
K 23.0 38.0 30.0 27.0 25.0 15.0 0.0
L 4.0 3.0 0.0 0.0 0.0 0.0 0.0
M 14.0 24.0 19.0 29.0 23.0 14.0 18.0
In [62]: waste_info = df.iloc[15:28]

waste_info.set_index("Building",inplace=True)
waste_info
Out[62]: Monday Tuesday Wednesday Thursday Friday Saturday Sunday
Building
A 1.0 1.0 1.0 1.0 1.0 1.0 0.0
B 1.0 1.0 1.0 1.0 1.0 1.0 0.0
C 1.0 1.0 1.0 1.0 1.0 1.0 0.0
D 1.0 1.0 1.0 1.0 1.0 1.0 0.0
E 1.0 1.0 1.0 1.0 1.0 1.0 0.0
F 0.0 0.0 1.0 1.0 1.0 1.0 0.0
G 0.0 0.0 0.0 0.0 0.0 0.0 0.0
H 0.0 0.0 0.0 0.0 0.0 0.0 0.0
I 1.0 0.0 0.0 0.0 0.0 0.0 0.0
J 0.0 0.0 0.0 0.0 0.0 0.0 0.0
K 0.0 0.0 0.0 0.0 0.0 0.0 0.0
L 0.0 0.0 0.0 0.0 0.0 0.0 0.0
M 0.0 0.0 0.0 0.0 0.0 0.0 0.0
In [63]: # creating a new dataframe with new set of columns
In [64]: days = list(class_schedule.columns)

buildings = list(class_schedule.index)
print(days,buildings)
['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'] ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M']
In [65]: new_cols = days + buildings + ["classes","waste_collected"]

new_cols
Out[65]: ['Monday',
'Tuesday',
'Wednesday',
'Thursday',
'Friday',
'Saturday',
'Sunday',
'A',
'B',
'C',
'D',
'E',
'F',
'G',
'H',
'I',
'J',
'K',
'L',
'M',
'classes',
'waste_collected']
In [66]: new_df = pd.DataFrame(columns=new_cols)

new_df
Out[66]: Monday Tuesday Wednesday Thursday Friday Saturday Sunday A B C ... F G H I J K L M classes waste_collected
0 rows × 22 columns
In [67]: # 'one hot encoding' for assigning numeric values to string or character inputs
In [68]: for day in days:

for building in buildings:
row=[0]*22
dI = new_cols.index(day)
row[dI] = 1
bI = new_cols.index(building)
row[bI] = 1
row[-2] = class_schedule[day][building]
row[-1] = waste_info[day][building]
series_row = pd.Series(row, index=new_df.columns )
new_df = new_df.append(series_row,ignore_index=True)
new_df
Out[68]: Monday Tuesday Wednesday Thursday Friday Saturday Sunday A B C ... F G H I J K L M classes waste_collected
0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 91.0 1.0
1 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 234.0 1.0
2 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 156.0 1.0
3 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 81.0 1.0
4 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 252.0 1.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
86 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0
87 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0
88 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0
89 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0
90 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 18.0 0.0
91 rows × 22 columns
In [69]: # performing Logistic Regression with ScikitLearn
In [70]: from sklearn.linear_model import LogisticRegressionCV
In [71]: # taking 'waste_collected' as 'y' i.e the dependent variable (to be predicted) and all other columns as the independent variable 'x.'
In [72]: y = new_df['waste_collected']
X = new_df.drop(['waste_collected'],axis=1)
In [73]: my_model = LogisticRegressionCV(cv=5, random_state=0).fit(X, y)

result = my_model.predict_proba(X)
C:\Users\tapin\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py:762: ConvergenceWarning: lbfgs failed to converge (status=1):

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
n_iter_i = _check_optimize_result(
In [74]: # no. of classes into which the output is classified and their probabilities
In [75]: print(my_model.classes_)
result
[0. 1.]
Out[75]: array([[1.57815565e-01, 8.42184435e-01],
[3.18800532e-06, 9.99996812e-01],
[1.26868415e-03, 9.98731316e-01],
[2.86993656e-01, 7.13006344e-01],
[7.99583728e-07, 9.99999200e-01],
[1.06798461e-01, 8.93201539e-01],
[9.76032728e-01, 2.39672721e-02],
[9.91688138e-01, 8.31186239e-03],
[6.35804123e-01, 3.64195877e-01],
[9.94323294e-01, 5.67670566e-03],
[9.72179252e-01, 2.78207480e-02],
[9.93386831e-01, 6.61316874e-03],
[9.85862147e-01, 1.41378533e-02],
[1.21987791e-01, 8.78012209e-01],
[8.69018382e-08, 9.99999913e-01],
[2.18780436e-03, 9.97812196e-01],
[1.98820681e-02, 9.80117932e-01],
[4.04561838e-06, 9.99995954e-01],
[2.52800024e-02, 9.74719998e-01],
[9.76221656e-01, 2.37783441e-02],
[9.91102201e-01, 8.89779874e-03],
[7.36182182e-01, 2.63817818e-01],
[9.95166967e-01, 4.83303258e-03],
[9.17555227e-01, 8.24447728e-02],
[9.93922003e-01, 6.07799708e-03],
[9.70245840e-01, 2.97541595e-02],
[1.05820160e-01, 8.94179840e-01],
[3.43349987e-08, 9.99999966e-01],
[3.19024161e-04, 9.99680976e-01],
[4.16282878e-02, 9.58371712e-01],
[5.44212012e-08, 9.99999946e-01],
[6.93066679e-03, 9.93069333e-01],
[9.76063558e-01, 2.39364425e-02],
[9.91699001e-01, 8.30099938e-03],
[8.36714156e-01, 1.63285844e-01],
[9.95134206e-01, 4.86579448e-03],
[9.53351615e-01, 4.66483851e-02],
[9.95134222e-01, 4.86577785e-03],
[9.79404772e-01, 2.05952283e-02],
[9.20581516e-02, 9.07941848e-01],
[9.31166202e-08, 9.99999907e-01],
[5.45561274e-04, 9.99454439e-01],
[1.57379628e-01, 8.42620372e-01],
[4.67183667e-07, 9.99999533e-01],
[2.50898522e-02, 9.74910148e-01],
[9.76041201e-01, 2.39587990e-02],
[9.91691123e-01, 8.30887680e-03],
[8.90312425e-01, 1.09687575e-01],
[9.94325339e-01, 5.67466119e-03],
[9.62559588e-01, 3.74404117e-02],
[9.95129589e-01, 4.87041134e-03],
[9.56592342e-01, 4.34076584e-02],
[1.68295711e-01, 8.31704289e-01],
[2.16696040e-07, 9.99999783e-01],
[2.73109190e-03, 9.97268908e-01],
[1.20734381e-01, 8.79265619e-01],
[1.59317110e-07, 9.99999841e-01],
[2.32695744e-02, 9.76730426e-01],
[9.76032702e-01, 2.39672984e-02],
[9.91688128e-01, 8.31187163e-03],
[8.90276933e-01, 1.09723067e-01],
[9.94323288e-01, 5.67671198e-03],
[9.67708269e-01, 3.22917311e-02],
[9.95127827e-01, 4.87217291e-03],
[9.72166606e-01, 2.78333943e-02],
[1.20231749e-01, 8.79768251e-01],
[6.33252930e-04, 9.99366747e-01],
[6.50420307e-01, 3.49579693e-01],
[5.20045116e-01, 4.79954884e-01],
[5.84290789e-06, 9.99994157e-01],
[4.09646391e-01, 5.90353609e-01],
[9.75835668e-01, 2.41643318e-02],
[9.90955528e-01, 9.04447231e-03],
[8.73420462e-01, 1.26579538e-01],
[9.95086966e-01, 4.91303367e-03],
[9.84631053e-01, 1.53689471e-02],
[9.95086983e-01, 4.91301687e-03],
[9.85744725e-01, 1.42552755e-02],
[7.63255733e-01, 2.36744267e-01],
[9.37645265e-01, 6.23547353e-02],
[9.91018466e-01, 8.98153396e-03],
[9.90992328e-01, 9.00767225e-03],
[9.92317771e-01, 7.68222921e-03],
[9.94798656e-01, 5.20134415e-03],
[9.95145057e-01, 4.85494283e-03],
[9.93406604e-01, 6.59339645e-03],
[9.95144870e-01, 4.85512957e-03],
[9.95141809e-01, 4.85819143e-03],
[9.95147901e-01, 4.85209900e-03],
[9.95141825e-01, 4.85817482e-03],
[9.80927963e-01, 1.90720373e-02]])
In [76]: # printing the probability output in the tabular form
In [77]: waste_not_collected = pd.DataFrame(columns = ['Day','A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M'])
waste_collected = pd.DataFrame(columns = ['Day','A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M'])
i=0
for day in days:
row_nc = []
row_c = []
row_nc.append(day)
row_c.append(day)
i+=1
row_nc.append(round(result[i-1][0],3))
row_c.append(round(result[i-1][1],3))
series_nc = pd.Series(row_nc, index=waste_not_collected.columns )
waste_not_collected = waste_not_collected.append(series_nc,ignore_index=True)
series_c = pd.Series(row_c, index=waste_collected.columns )
waste_collected = waste_collected.append(series_c,ignore_index=True)
In [78]: waste_not_collected
Out[78]: Day A B C D E F G H I J K L M
0 Monday 0.158 0.000 0.001 0.287 0.000 0.107 0.976 0.992 0.636 0.994 0.972 0.993 0.986
1 Tuesday 0.122 0.000 0.002 0.020 0.000 0.025 0.976 0.991 0.736 0.995 0.918 0.994 0.970
2 Wednesday 0.106 0.000 0.000 0.042 0.000 0.007 0.976 0.992 0.837 0.995 0.953 0.995 0.979
3 Thursday 0.092 0.000 0.001 0.157 0.000 0.025 0.976 0.992 0.890 0.994 0.963 0.995 0.957
4 Friday 0.168 0.000 0.003 0.121 0.000 0.023 0.976 0.992 0.890 0.994 0.968 0.995 0.972
5 Saturday 0.120 0.001 0.650 0.520 0.000 0.410 0.976 0.991 0.873 0.995 0.985 0.995 0.986
6 Sunday 0.763 0.938 0.991 0.991 0.992 0.995 0.995 0.993 0.995 0.995 0.995 0.995 0.981
In [79]: waste_collected
0 Monday 0.842 1.000 0.999 0.713 1.000 0.893 0.024 0.008 0.364 0.006 0.028 0.007 0.014
1 Tuesday 0.878 1.000 0.998 0.980 1.000 0.975 0.024 0.009 0.264 0.005 0.082 0.006 0.030
2 Wednesday 0.894 1.000 1.000 0.958 1.000 0.993 0.024 0.008 0.163 0.005 0.047 0.005 0.021
3 Thursday 0.908 1.000 0.999 0.843 1.000 0.975 0.024 0.008 0.110 0.006 0.037 0.005 0.043
4 Friday 0.832 1.000 0.997 0.879 1.000 0.977 0.024 0.008 0.110 0.006 0.032 0.005 0.028
5 Saturday 0.880 0.999 0.350 0.480 1.000 0.590 0.024 0.009 0.127 0.005 0.015 0.005 0.014
6 Sunday 0.237 0.062 0.009 0.009 0.008 0.005 0.005 0.007 0.005 0.005 0.005 0.005 0.019
In [80]: # converting the output table into csv format and downloading
In [81]: waste_not_collected.to_csv("probability (waste not collected).csv",index=False)
In [82]: waste_collected.to_csv("probability (waste collected).csv",index=False)
In [83]: # predicted output
In [84]: pred_result = my_model.predict(X)

pred_result
Out[84]: array([1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1.,
1., 1., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 0., 0.,
0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.,
0., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 1., 1., 0.,
0., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
0., 0., 0., 0., 0., 0.])
In [85]: actual_output = pd.DataFrame(columns = ['Day','A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M'])
i=0
for day in days:
row=[]
row.append(day)
i+=1
row.append(pred_result[i-1])
series_ao = pd.Series(row, index=actual_output.columns )
actual_output = actual_output.append(series_ao,ignore_index=True)
actual_output
0 Monday 1.0 1.0 1.0 1.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
1 Tuesday 1.0 1.0 1.0 1.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
2 Wednesday 1.0 1.0 1.0 1.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
3 Thursday 1.0 1.0 1.0 1.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
4 Friday 1.0 1.0 1.0 1.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
5 Saturday 1.0 1.0 0.0 0.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
6 Sunday 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
In [86]: actual_output.to_csv("predictd result.csv",index=False)
In [87]: # classification report of the model which measures the quality of predictions
In [88]: from sklearn.metrics import classification_report

report=classification_report(y,pred_result)
print(report)
precision recall f1-score support
0.0 0.95 0.96 0.96 56

1.0 0.94 0.91 0.93 35
accuracy 0.95 91
macro avg 0.94 0.94 0.94 91
weighted avg 0.94 0.95 0.94 91
In [89]: # plotting the Receiver Operating Curve (ROC Curve)
In [90]: from sklearn.metrics import roc_auc_score

from sklearn.metrics import roc_curve,auc
required_curve=roc_auc_score(y,pred_result)
fpr,tpr,thresholds=roc_curve(y,pred_result,pos_label=1)
In [91]: import matplotlib.pyplot as plt

plt.figure()
plt.plot(fpr,tpr,color='darkorange',lw=1,label='ROC curve(area = %0.2f)'% required_curve)
plt.plot([0, 1], [0, 1], color='navy', lw=1, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.0])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic Curve')
plt.legend(loc="lower right")
plt.show()
In [92]: actual_output=actual_output.set_index(['Day'])
actual_output
Out[92]: A B C D E F G H I J K L M
Day
Monday 1.0 1.0 1.0 1.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
Tuesday 1.0 1.0 1.0 1.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
Wednesday 1.0 1.0 1.0 1.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
Thursday 1.0 1.0 1.0 1.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
Friday 1.0 1.0 1.0 1.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
Saturday 1.0 1.0 0.0 0.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
Sunday 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
In [93]: actual_output=actual_output.transpose()
actual_output
Out[93]: Day Monday Tuesday Wednesday Thursday Friday Saturday Sunday
A 1.0 1.0 1.0 1.0 1.0 1.0 0.0
B 1.0 1.0 1.0 1.0 1.0 1.0 0.0
C 1.0 1.0 1.0 1.0 1.0 0.0 0.0
D 1.0 1.0 1.0 1.0 1.0 0.0 0.0
E 1.0 1.0 1.0 1.0 1.0 1.0 0.0
F 1.0 1.0 1.0 1.0 1.0 1.0 0.0
G 0.0 0.0 0.0 0.0 0.0 0.0 0.0
H 0.0 0.0 0.0 0.0 0.0 0.0 0.0
I 0.0 0.0 0.0 0.0 0.0 0.0 0.0
J 0.0 0.0 0.0 0.0 0.0 0.0 0.0
K 0.0 0.0 0.0 0.0 0.0 0.0 0.0
L 0.0 0.0 0.0 0.0 0.0 0.0 0.0
M 0.0 0.0 0.0 0.0 0.0 0.0 0.0
In [94]: actual_output.insert(0,'Buildings',['A','B','C','D','E','F','G','H','I','J','K','L','M'],True)
actual_output
Out[94]: Day Buildings Monday Tuesday Wednesday Thursday Friday Saturday Sunday
A A 1.0 1.0 1.0 1.0 1.0 1.0 0.0
B B 1.0 1.0 1.0 1.0 1.0 1.0 0.0
C C 1.0 1.0 1.0 1.0 1.0 0.0 0.0
D D 1.0 1.0 1.0 1.0 1.0 0.0 0.0
E E 1.0 1.0 1.0 1.0 1.0 1.0 0.0
F F 1.0 1.0 1.0 1.0 1.0 1.0 0.0
G G 0.0 0.0 0.0 0.0 0.0 0.0 0.0
H H 0.0 0.0 0.0 0.0 0.0 0.0 0.0
I I 0.0 0.0 0.0 0.0 0.0 0.0 0.0
J J 0.0 0.0 0.0 0.0 0.0 0.0 0.0
K K 0.0 0.0 0.0 0.0 0.0 0.0 0.0
L L 0.0 0.0 0.0 0.0 0.0 0.0 0.0
M M 0.0 0.0 0.0 0.0 0.0 0.0 0.0
In [95]: actual_output=actual_output.set_index(['Buildings'])
actual_output
Out[95]: Day Monday Tuesday Wednesday Thursday Friday Saturday Sunday
Buildings
A 1.0 1.0 1.0 1.0 1.0 1.0 0.0
B 1.0 1.0 1.0 1.0 1.0 1.0 0.0
C 1.0 1.0 1.0 1.0 1.0 0.0 0.0
D 1.0 1.0 1.0 1.0 1.0 0.0 0.0
E 1.0 1.0 1.0 1.0 1.0 1.0 0.0
F 1.0 1.0 1.0 1.0 1.0 1.0 0.0
G 0.0 0.0 0.0 0.0 0.0 0.0 0.0
H 0.0 0.0 0.0 0.0 0.0 0.0 0.0
I 0.0 0.0 0.0 0.0 0.0 0.0 0.0
J 0.0 0.0 0.0 0.0 0.0 0.0 0.0
K 0.0 0.0 0.0 0.0 0.0 0.0 0.0
L 0.0 0.0 0.0 0.0 0.0 0.0 0.0
M 0.0 0.0 0.0 0.0 0.0 0.0 0.0
In [96]: Index_label1 = actual_output.query('Monday==1').index.tolist()

print('Monday =',Index_label1)
Index_label2 = actual_output.query('Tuesday==1').index.tolist()
print('Tuesday =',Index_label2)
Index_label3 = actual_output.query('Wednesday==1').index.tolist()
print('Wednesday =',Index_label3)
Index_label4 = actual_output.query('Thursday==1').index.tolist()
print('Thursday =',Index_label4)
Index_label5 = actual_output.query('Friday==1').index.tolist()
print('Friday =',Index_label5)
Index_label6 = actual_output.query('Saturday==1').index.tolist()
print('Saturday =',Index_label6)
Index_label7 = actual_output.query('Sunday==1').index.tolist()
print('Sunday =',Index_label7)
Monday = ['A', 'B', 'C', 'D', 'E', 'F']

Tuesday = ['A', 'B', 'C', 'D', 'E', 'F']
Wednesday = ['A', 'B', 'C', 'D', 'E', 'F']
Thursday = ['A', 'B', 'C', 'D', 'E', 'F']
Friday = ['A', 'B', 'C', 'D', 'E', 'F']
Saturday = ['A', 'B', 'E', 'F']
Sunday = []
In [97]: buildings_to_clean=actual_output['Monday'].values.sum()
print(buildings_to_clean)
6.0
In [98]: import math

from random import *
import random
In [168… # generating nodes randomly
# since there are 13 buildings or nodes

num = 13
# radius of the circle (range of the wifi module)
circle_r = 50
# centre of the circle (x, y)
circle_x = 0
circle_y = 0
xy_list=[]
for i in range(num):
t = 2 * math.pi * random.random()
r = circle_r * math.sqrt(random.random())
x = r * math.cos(t) + circle_x
y = r * math.sin(t) + circle_y
xy_list.append([x,y])
circle_r = circle_r
# coordinate of the generated point is now the
# new centre of the circle
circle_x = x
circle_y = y
# connection between the nodes have been arbitrarily fixed

# however, the edges (distances) between them varies randomly.
xi=[xy_list[0][0], xy_list[1][0], xy_list[2][0], xy_list[3][0], xy_list[4][0], xy_list[5][0], xy_list[6][0],
xy_list[7][0], xy_list[8][0], xy_list[9][0], xy_list[10][0], xy_list[11][0], xy_list[12][0], xy_list[0][0],
xy_list[8][0], xy_list[5][0], xy_list[11][0], xy_list[7][0]]
yi=[xy_list[0][1], xy_list[1][1], xy_list[2][1], xy_list[3][1], xy_list[4][1], xy_list[5][1], xy_list[6][1],

xy_list[8][1], xy_list[5][1], xy_list[11][1], xy_list[7][1]]
n=['A','B','C','D','E','F','G','H','I','J','K','L','M']
fig, ax = plt.subplots()
ax.scatter(xi, yi,color='orange')
ax.plot(xi,yi,linestyle='dashed')
ax.set_title('Random Generated Graph')
for i, txt in enumerate(n):

ax.annotate(txt, (xi[i], yi[i]))
In [169… data={'Points':['A','B','C','D','E','F','G','H','I','J','K','L','M'],
'Coordinates':xy_list}
df=pd.DataFrame(data)
df
Out[169… Points Coordinates
0 A [-22.811352766369502, -42.9051483798065]
1 B [-16.548585957869044, -64.99301949290735]
2 C [-2.7135371843524982, -111.84658944434184]
3 D [14.504731245459316, -71.85514523206425]
4 E [52.616349152583524, -88.24676662203245]
5 F [83.45028968379334, -89.17275941357737]
6 G [112.04231553326235, -69.67377414401894]
7 H [138.77444345907816, -87.58117115205548]
8 I [104.70398680301169, -109.18049652906625]
9 J [65.50935819552903, -115.87765861437762]
10 K [19.718736052297082, -108.9197063224358]
11 L [47.22360087625079, -83.45634507275099]
12 M [73.86999598965316, -61.226726940341635]
In [170… # find the distance between the connected nodes
AB=math.sqrt( ((xy_list[0][0]-xy_list[1][0])**2)+((xy_list[0][1]-xy_list[1][1])**2) )
BC=math.sqrt( ((xy_list[1][0]-xy_list[2][0])**2)+((xy_list[1][1]-xy_list[2][1])**2) )
CD=math.sqrt( ((xy_list[2][0]-xy_list[3][0])**2)+((xy_list[2][1]-xy_list[3][1])**2) )
DE=math.sqrt( ((xy_list[3][0]-xy_list[4][0])**2)+((xy_list[3][1]-xy_list[4][1])**2) )
EF=math.sqrt( ((xy_list[4][0]-xy_list[5][0])**2)+((xy_list[4][1]-xy_list[5][1])**2) )
FG=math.sqrt( ((xy_list[5][0]-xy_list[6][0])**2)+((xy_list[5][1]-xy_list[6][1])**2) )
GH=math.sqrt( ((xy_list[6][0]-xy_list[7][0])**2)+((xy_list[6][1]-xy_list[7][1])**2) )
HI=math.sqrt( ((xy_list[7][0]-xy_list[8][0])**2)+((xy_list[7][1]-xy_list[8][1])**2) )
IJ=math.sqrt( ((xy_list[8][0]-xy_list[9][0])**2)+((xy_list[8][1]-xy_list[9][1])**2) )
JK=math.sqrt( ((xy_list[9][0]-xy_list[10][0])**2)+((xy_list[9][1]-xy_list[10][1])**2) )
KL=math.sqrt( ((xy_list[10][0]-xy_list[11][0])**2)+((xy_list[10][1]-xy_list[11][1])**2) )
LM=math.sqrt( ((xy_list[11][0]-xy_list[12][0])**2)+((xy_list[11][1]-xy_list[12][1])**2) )
MA=math.sqrt( ((xy_list[12][0]-xy_list[0][0])**2)+((xy_list[12][1]-xy_list[0][1])**2) )
AC=math.sqrt( ((xy_list[0][0]-xy_list[2][0])**2)+((xy_list[0][1]-xy_list[2][1])**2) )
CE=math.sqrt( ((xy_list[2][0]-xy_list[4][0])**2)+((xy_list[2][1]-xy_list[4][1])**2) )
EB=math.sqrt( ((xy_list[4][0]-xy_list[1][0])**2)+((xy_list[4][1]-xy_list[1][1])**2) )
BD=math.sqrt( ((xy_list[1][0]-xy_list[3][0])**2)+((xy_list[1][1]-xy_list[3][1])**2) )
DG=math.sqrt( ((xy_list[3][0]-xy_list[6][0])**2)+((xy_list[3][1]-xy_list[6][1])**2) )
GJ=math.sqrt( ((xy_list[6][0]-xy_list[9][0])**2)+((xy_list[6][1]-xy_list[9][1])**2) )
JM=math.sqrt( ((xy_list[9][0]-xy_list[12][0])**2)+((xy_list[9][1]-xy_list[12][1])**2) )
MI=math.sqrt( ((xy_list[12][0]-xy_list[8][0])**2)+((xy_list[12][1]-xy_list[8][1])**2) )
IF=math.sqrt( ((xy_list[8][0]-xy_list[5][0])**2)+((xy_list[8][1]-xy_list[5][1])**2) )
FL=math.sqrt( ((xy_list[5][0]-xy_list[11][0])**2)+((xy_list[5][1]-xy_list[11][1])**2) )
LH=math.sqrt( ((xy_list[11][0]-xy_list[7][0])**2)+((xy_list[11][1]-xy_list[7][1])**2) )
Nodes=['AB','BC','CD','DE','EF','FG','GH','HI','IJ','JK','KL','LM','MA','AC','CE','EB','BD','DG','GJ','JM','MI','IF','FL','LH']
Distance=[AB,BC,CD,DE,EF,FG,GH,HI,IJ,JK,KL,LM,MA,AC,CE,EB,BD,DG,GH,JM,MI,IF,FL,LH]
table=pd.DataFrame()
table['Nodes']=Nodes
table['Distance']=Distance
table
Out[170… Nodes Distance
0 AB 22.958578
1 BC 48.853512
2 CD 43.540606
3 DE 41.487115
4 EF 30.847842
5 FG 34.608010
6 GH 32.175791
7 HI 40.340140
8 IJ 39.762682
9 JK 46.316241
10 KL 37.482000
11 LM 34.701388
12 MA 98.402050
13 AC 71.811172
14 CE 60.152705
15 EB 72.969343
16 BD 31.802473
17 DG 97.561974
18 GJ 32.175791
19 JM 55.286749
20 MI 57.011394
21 IF 29.189539
22 FL 36.674928
23 LH 91.643718
In [173… import sys

from heapq import heappop, heappush
# A class to store a graph edge

class Edge:
def __init__(self, source, dest, weight):
self.source = source
self.dest = dest
self.weight = weight
# A class to store a heap node

class Node:
def __init__(self, vertex, weight):
self.vertex = vertex
self.weight = weight
# Override the `__lt__()` function to make `Node` class work with a min-heap
def __lt__(self, other):
return self.weight < other.weight
# A class to represent a graph object

class Graph:
def __init__(self, edges, N):
# allocate memory for the adjacency list
self.adj = [[] for _ in range(N)]
# add edges to the undirected graph

for edge in edges:
self.adj[edge.source].append(edge)
def get_route(prev, i, route):

if i >= 0:
# changing the value of integer i to corresponding character
node = chr(ord(root) + i)
get_route(prev, prev[i], route)
route.append(node)
# Run Dijkstra’s algorithm on a given graph
def findShortestPaths(graph, source, N):
# create a min-heap and push source node having distance 0

pq = []
heappush(pq, Node(source, 0))
# set initial distance from the source to `v` as INFINITY

dist = [sys.maxsize] * N
# distance from the source to itself is zero

dist[source] = 0
# list to track vertices for which minimum cost is already found

done = [False] * N
done[source] = True
# stores predecessor of a vertex (to a print path)

prev = [-1] * N
route = []
# run till min-heap is empty

while pq:
node = heappop(pq) # Remove and return the best vertex

u = node.vertex # get the vertex number
# do for each neighbor `v` of ù`

for edge in graph.adj[u]:
v = edge.dest
weight = edge.weight
# Relaxation step
if not done[v] and (dist[u] + weight) < dist[v]:
dist[v] = dist[u] + weight
prev[v] = u
heappush(pq, Node(v, dist[v]))
# mark vertex ù` as done so it will not get picked up again

done[u] = True
for i in range(1, N):

if i != source and dist[i] != sys.maxsize:
get_route(prev, i, route)
# changing the value of integer i to corresponding character
node = chr(ord(root) + i)
# Changed source to root and i to node
print(f"Path ({root} —> {node}): Minimum Distance = {dist[i]} Route = {route}")
route.clear()
if __name__ == '__main__':
# initialize edges as per the above diagram

# `(u, v, w)` triplet represent undirected edge from
# vertex ù` to vertex `v` having weight `w`
edges = [Edge(0,1,22.958578), Edge(0,2,71.811172), Edge(0,12,98.402050), Edge(1,2,48.853512), Edge(1,4,72.969343),
Edge(1,3,31.802473), Edge(2,3,43.5406062), Edge(2,4,60.152705), Edge(3,4,41.487115), Edge(3,6,97.561974),
Edge(9,10,46.316241), Edge(9,12,55.286749), Edge(10,11,37.482000), Edge(11,12,34.701388)]
# total number of nodes in the graph

N = 25
# construct graph
graph = Graph(edges, N)
source = 0
# a new variable 'root' as char data type
root = 'A'
findShortestPaths(graph, source, N)
Path (A —> B): Minimum Distance = 22.958578 Route = ['A', 'B']

Path (A —> C): Minimum Distance = 71.811172 Route = ['A', 'C']
Path (A —> D): Minimum Distance = 54.761050999999995 Route = ['A', 'B', 'D']
Path (A —> E): Minimum Distance = 95.927921 Route = ['A', 'B', 'E']
Path (A —> F): Minimum Distance = 126.775763 Route = ['A', 'B', 'E', 'F']
Path (A —> G): Minimum Distance = 152.323025 Route = ['A', 'B', 'D', 'G']
Path (A —> H): Minimum Distance = 184.498816 Route = ['A', 'B', 'D', 'G', 'H']
Path (A —> I): Minimum Distance = 155.965302 Route = ['A', 'B', 'E', 'F', 'I']
Path (A —> J): Minimum Distance = 184.498816 Route = ['A', 'B', 'D', 'G', 'J']
Path (A —> K): Minimum Distance = 230.815057 Route = ['A', 'B', 'D', 'G', 'J', 'K']
Path (A —> L): Minimum Distance = 163.450691 Route = ['A', 'B', 'E', 'F', 'L']
Path (A —> M): Minimum Distance = 98.40205 Route = ['A', 'M']
In [ ]:

Source Code For Logistic Regression and Dijkstra's Algorithm

Uploaded by

Document Information

Original Description:

Original Title

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

Source Code For Logistic Regression and Dijkstra's Algorithm

Uploaded by

Copyright:

Available Formats

In [57]: # importing pandas library and data file in csv format

In [58]: import pandas as pd

Out[59]: Building Monday Tuesday Wednesday Thursday Friday Saturday Sunday

0 A 91.0 95.0 97.0 99.0 90.0 95.0 54.0

1 B 234.0 281.0 293.0 280.0 269.0 165.0 34.0

2 C 156.0 149.0 174.0 167.0 146.0 61.0 8.0

3 D 81.0 120.0 110.0 91.0 95.0 68.0 8.0

4 E 252.0 231.0 287.0 259.0 273.0 226.0 6.0

5 F 97.0 117.0 134.0 117.0 118.0 74.0 1.0

6 G 21.0 21.0 21.0 21.0 21.0 21.0 0.0

7 H 7.0 8.0 7.0 7.0 7.0 8.0 4.0

8 I 62.0 56.0 48.0 42.0 42.0 44.0 0.0

9 J 2.0 0.0 0.0 2.0 2.0 0.0 0.0

10 K 23.0 38.0 30.0 27.0 25.0 15.0 0.0

11 L 4.0 3.0 0.0 0.0 0.0 0.0 0.0

12 M 14.0 24.0 19.0 29.0 23.0 14.0 18.0

13 NaN NaN NaN NaN NaN NaN NaN NaN

15 A 1.0 1.0 1.0 1.0 1.0 1.0 0.0

16 B 1.0 1.0 1.0 1.0 1.0 1.0 0.0

17 C 1.0 1.0 1.0 1.0 1.0 1.0 0.0

18 D 1.0 1.0 1.0 1.0 1.0 1.0 0.0

19 E 1.0 1.0 1.0 1.0 1.0 1.0 0.0

20 F 0.0 0.0 1.0 1.0 1.0 1.0 0.0

21 G 0.0 0.0 0.0 0.0 0.0 0.0 0.0

22 H 0.0 0.0 0.0 0.0 0.0 0.0 0.0

23 I 1.0 0.0 0.0 0.0 0.0 0.0 0.0

24 J 0.0 0.0 0.0 0.0 0.0 0.0 0.0

25 K 0.0 0.0 0.0 0.0 0.0 0.0 0.0

26 L 0.0 0.0 0.0 0.0 0.0 0.0 0.0

27 M 0.0 0.0 0.0 0.0 0.0 0.0 0.0

In [60]: # rearranging the data

In [61]: class_schedule = df.iloc[0:13]

Out[61]: Monday Tuesday Wednesday Thursday Friday Saturday Sunday

A 91.0 95.0 97.0 99.0 90.0 95.0 54.0

B 234.0 281.0 293.0 280.0 269.0 165.0 34.0

C 156.0 149.0 174.0 167.0 146.0 61.0 8.0

D 81.0 120.0 110.0 91.0 95.0 68.0 8.0

E 252.0 231.0 287.0 259.0 273.0 226.0 6.0

F 97.0 117.0 134.0 117.0 118.0 74.0 1.0

G 21.0 21.0 21.0 21.0 21.0 21.0 0.0

H 7.0 8.0 7.0 7.0 7.0 8.0 4.0

I 62.0 56.0 48.0 42.0 42.0 44.0 0.0

J 2.0 0.0 0.0 2.0 2.0 0.0 0.0

K 23.0 38.0 30.0 27.0 25.0 15.0 0.0

L 4.0 3.0 0.0 0.0 0.0 0.0 0.0

M 14.0 24.0 19.0 29.0 23.0 14.0 18.0

In [62]: waste_info = df.iloc[15:28]

Out[62]: Monday Tuesday Wednesday Thursday Friday Saturday Sunday

A 1.0 1.0 1.0 1.0 1.0 1.0 0.0

B 1.0 1.0 1.0 1.0 1.0 1.0 0.0

C 1.0 1.0 1.0 1.0 1.0 1.0 0.0

D 1.0 1.0 1.0 1.0 1.0 1.0 0.0

E 1.0 1.0 1.0 1.0 1.0 1.0 0.0

F 0.0 0.0 1.0 1.0 1.0 1.0 0.0

G 0.0 0.0 0.0 0.0 0.0 0.0 0.0

H 0.0 0.0 0.0 0.0 0.0 0.0 0.0

I 1.0 0.0 0.0 0.0 0.0 0.0 0.0

J 0.0 0.0 0.0 0.0 0.0 0.0 0.0

K 0.0 0.0 0.0 0.0 0.0 0.0 0.0

L 0.0 0.0 0.0 0.0 0.0 0.0 0.0

M 0.0 0.0 0.0 0.0 0.0 0.0 0.0

In [63]: # creating a new dataframe with new set of columns