Download as pdf or txt
Download as pdf or txt
You are on page 1of 23

KONGU ENGINEERING COLLEGE

PERUNDURAI ERODE – 638 060

NOVEMBER 2022

DEPARTMENT OF COMPUTER SCIENCE AND ENGINEERING

MACHINE LEARNING

STOCK PRICE PREDICTION

A MINI PROJECT REPORT

Submitted by

SARAVANAKKUMAR T A
19CSR181

SUDHAN E D
20CSR211

VASANTH V
20CSR230

in partial fulfillment of the


requirements for the award of the
degree of
PROBLEM DESCRIPTION:

Stock price prediction we have using the datasets to predict and

Train and test the models .Here we have use three different model to get

the better accuracy first of we use linear regression model which is based

On the category of the supervised machine learning we have get the better

accuracy in those models and second we have to use long short term

Memory model which is also very suitable for the prediction of the stock

Price third and last model we have use is about KNN(K-Nearest Neighbor)

Model is under the category of the unsupervised machine learning model

It gives as the better accuracy for that model

We will learn how to predict stock price using the LSTM

Neural Network. Then we will build a dashboard using Plotly dash

for stock analysis.


STOCK PRICE PREDICTION:

# Using Logistic Regression


import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sb

from sklearn.model_selection import train_test_split


from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn import metrics

import warnings
warnings.filterwarnings('ignore')

df= pd.read_csv("D:\\archive (1)\ADANIPORTS.csv")


df.head()

df.shape()
(3322, 15)

df.describe()

df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3322 entries, 0 to 3321
Data columns (total 15 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Date 3322 non-null object
1 Symbol 3322 non-null object
2 Series 3322 non-null object
3 Prev Close 3322 non-null float64
4 Open 3322 non-null float64
5 High 3322 non-null float64
6 Low 3322 non-null float64
7 Last 3322 non-null float64
8 Close 3322 non-null float64
9 VWAP 3322 non-null float64
10 Volume 3322 non-null int64
11 Turnover 3322 non-null float64
12 Trades 2456 non-null float64
13 Deliverable Volume 3322 non-null int64
14 %Deliverble 3322 non-null float64
dtypes: float64(10), int64(2), object(3)
memory usage: 389.4+ KB

plt.figure(figsize=(15,5))
plt.plot(df['Close'])
plt.title('AdaniPorts Close price.', fontsize=15)
plt.ylabel('Price in dollars.')
plt.show()

df[df['Close'] == df['Close']].shape
df[df['Close'] == df['Close']].shape
(3322, 15)
In [22]:
df.isnull().sum()
Date 0
Symbol 0
Series 0
Prev Close 0
Open 0
High 0
Low 0
Last 0
Close 0
VWAP 0
Volume 0
Turnover 0
Trades 866
Deliverable Volume 0
%Deliverble 0
dtype: int64

features = ['Prev Close', 'Open','High', 'Low', 'Last','Close', 'VWAP','Turnover','Trades','Deliverable Volume']

plt.subplots(figsize=(20,10))

for i, col in enumerate(features):


plt.subplot(6,4,i+1)
sb.distplot(df[col])
plt.show()

plt.subplots(figsize=(20,10))
for i, col in enumerate(features):
plt.subplot(6,4,i+1)
sb.boxplot(df[col])
plt.show()

splitted = df['Date'].str.split('-', expand=True)

df['year'] = splitted[2].astype('int')
df['month'] = splitted[1].astype('int')
df['date'] = splitted[0].astype('int')

df.head()

df['is_quarter_end'] = np.where(df['month']%3==0,1,0)
df.head()
data_grouped = df.groupby('year').mean()
plt.subplots(figsize=(20,10))

for i, col in enumerate(['Prev Close', 'Open','High', 'Low', 'Last','Close',


'VWAP','Turnover','Trades','Deliverable Volume']):
plt.subplot(6,4,i+1)
data_grouped[col].plot.bar()
plt.show()
df.groupby('is_quarter_end').mean()

df['open-close'] = df['Open'] - df['Close']


df['low-high'] = df['Low'] - df['High']
df['target'] = np.where(df['Close'].shift(-1) > df['Close'], 1, 0)

plt.pie(df['target'].value_counts().values,labels=[0, 1], autopct='%1.1f%%')


plt.show()

plt.figure(figsize=(10, 10))

# As our concern is with the highly


# correlated features only so, we will visualize
# our heatmap as per that criteria only.
sb.heatmap(df.corr() > 0.9, annot=True, cbar=False)
plt.show()

# DATA SPLITING AND ORGANIZATION


features = df[['open-close', 'low-high', 'is_quarter_end']]
target = df['target']

scaler = StandardScaler()
features = scaler.fit_transform(features)

X_train, X_valid, Y_train, Y_valid = train_test_split(


features, target, test_size=0.1,
random_state=2022)
print(X_train.shape, X_valid.shape)

(2989, 3) (333, 3)
# Model Development and Evaluation
models = [LogisticRegression(), SVC(
kernel='poly', probability=True), XGBClassifier()]
for i in range(3):
models[i].fit(X_train, Y_train)

print(f'{models[i]} : ')
print('Training Accuracy : ', metrics.roc_auc_score(Y_train, models[i].predict_proba(X_train)[:,1]))
print('Validation Accuracy : ', metrics.roc_auc_score(Y_valid, models[i].predict_proba(X_valid)[:,1]))
print()

ACCURACY:

XGBClassifier(base_score=0.5, booster='gbtree', callbacks=None,


colsample_bylevel=1, colsample_bynode=1, colsample_bytree=1,
early_stopping_rounds=None, enable_categorical=False,
eval_metric=None, feature_types=None, gamma=0, gpu_id=-1,
grow_policy='depthwise', importance_type=None,
interaction_constraints='', learning_rate=0.300000012,
max_bin=256, max_cat_threshold=64, max_cat_to_onehot=4,
max_delta_step=0, max_depth=6, max_leaves=0, min_child_weight=1,
missing=nan, monotone_constraints='()', n_estimators=100,
n_jobs=0, num_parallel_tree=1, predictor='auto', random_state=0, ...) :
Training Accuracy : 0.9206630526371402
Validation Accuracy : 0.48574800290486564

metrics.plot_confusion_matrix(models[0], X_valid, Y_valid)


plt.show()

USING LSTM TO PREDICT STOCK PRICE:


# RNN (RECURRENT NEURAL NETWORK)
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential, load_model
from keras.layers import LSTM, Dense, Dropout
df = pd.read_csv("D:\\archive (1)\ADANIPORTS.csv")
df.head()

df = df['Open'].values
df = df.reshape(-1, 1)
dataset_train = np.array(df[:int(df.shape[0]*0.8)])
dataset_test = np.array(df[int(df.shape[0]*0.8):])

scaler = MinMaxScaler(feature_range=(0,1))
dataset_train = scaler.fit_transform(dataset_train)
dataset_test = scaler.transform(dataset_test)

def create_dataset(df):
x = []
y = []
for i in range(50, df.shape[0]):
x.append(df[i-50:i, 0])
y.append(df[i, 0])
x = np.array(x)
y = np.array(y)
return x,y
x_train, y_train = create_dataset(dataset_train)
x_test, y_test = create_dataset(dataset_test)
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))

model = Sequential()
model.add(LSTM(units=96, return_sequences=True, input_shape=(x_train.shape[1], 1)))
model.add(Dropout(0.2))
model.add(LSTM(units=96,return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=96,return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=96))
model.add(Dropout(0.2))
model.add(Dense(units=1))
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))

model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(x_train, y_train, epochs=50, batch_size=32)
model.save('stock_prediction.h5')
model = load_model('stock_prediction.h5')
predictions = model.predict(x_test)
predictions = scaler.inverse_transform(predictions)
y_test_scaled = scaler.inverse_transform(y_test.reshape(-1, 1))

fig, ax = plt.subplots(figsize=(16,8))
ax.set_facecolor('#000041')
ax.plot(y_test_scaled, color='red', label='Original price')
plt.plot(predictions, color='cyan', label='Predicted price')
plt.legend()

KNN MODEL PREDICTION FOR STOCK PRICE

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
df = pd.read_csv("D:\\archive (1)\ADANIPORTS.csv")
df.head()
print(df)

Date Symbol Series Prev Close Open High Low \


0 2007-11-27 MUNDRA PORT EQ 440.00 770.00 1050.00 770.00
1 2007-11-28 MUNDRA PORT EQ 962.90 984.00 990.00 874.00
2 2007-11-29 MUNDRA PORT EQ 893.90 909.00 914.75 841.00
3 2007-11-30 MUNDRA PORT EQ 884.20 890.00 958.00 890.00
4 2007-12-03 MUNDRA PORT EQ 921.55 939.75 995.00 922.00
... ... ... ... ... ... ... ...
3317 2021-04-26 ADANI PORTS EQ 725.35 733.00 739.65 728.90
3318 2021-04-27 ADANI PORTS EQ 730.75 735.00 757.50 727.35
3319 2021-04-28 ADANI PORTS EQ 749.15 755.00 760.00 741.10
3320 2021-04-29 ADANI PORTS EQ 746.25 753.20 765.85 743.40
3321 2021-04-30 ADANIPORTS EQ 746.75 739.00 759.45 724.50

Last Close VWAP Volume Turnover Trades \


0 959.0 962.90 984.72 27294366 2.687719e+15 NaN
1 885.0 893.90 941.38 4581338 4.312765e+14 NaN
2 887.0 884.20 888.09 5124121 4.550658e+14 NaN
3 929.0 921.55 929.17 4609762 4.283257e+14 NaN
4 980.0 969.30 965.65 2977470 2.875200e+14 NaN
... ... ... ... ... ... ...
3317 729.2 730.75 733.25 9390549 6.885658e+14 116457.0
3318 748.6 749.15 747.67 20573107 1.538191e+15 236896.0
3319 743.4 746.25 751.02 11156977 8.379106e+14 130847.0
3320 746.4 746.75 753.06 13851910 1.043139e+15 153293.0
3321 726.4 730.05 743.35 12600934 9.366911e+14 132141.0

Deliverable Volume %Deliverable


0 9859619 0.3612
1 1453278 0.3172
2 1069678 0.2088
3 1260913 0.2735
4 816123 0.2741
... ... ...
3317 838079 0.0892
3318 1779639 0.0865
3319 1342353 0.1203
3320 1304895 0.0942
3321 3514692 0.2789

[3322 rows x 15 columns]

df['Date'] = pd.to_datetime(df.Date,format='%Y-%m-%d')
df.set_index("Date", drop=False, inplace=True)
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 3322 entries, 2007-11-27 to 2021-04-30
Data columns (total 15 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Date 3322 non-null datetime64[ns]
1 Symbol 3322 non-null object
2 Series 3322 non-null object
3 Prev Close 3322 non-null float64
4 Open 3322 non-null float64
5 High 3322 non-null float64
6 Low 3322 non-null float64
7 Last 3322 non-null float64
8 Close 3322 non-null float64
9 VWAP 3322 non-null float64
10 Volume 3322 non-null int64
11 Turnover 3322 non-null float64
12 Trades 2456 non-null float64
13 Deliverable Volume 3322 non-null int64
14 %Deliverble 3322 non-null float64
dtypes: datetime64[ns](1), float64(10), int64(2), object(2)
memory usage: 415.2+ KB

df.drop(["Series","Symbol","Trades","Deliverable Volume","%Deliverble"], axis=1, inplace=True)

Date Prev Close Open High Low Last Close \


Date
2007-11-27 2007-11-27 440.00 770.00 1050.00 770.00 959.0 962.90
2007-11-28 2007-11-28 962.90 984.00 990.00 874.00 885.0 893.90
2007-11-29 2007-11-29 893.90 909.00 914.75 841.00 887.0 884.20
2007-11-30 2007-11-30 884.20 890.00 958.00 890.00 929.0 921.55
2007-12-03 2007-12-03 921.55 939.75 995.00 922.00 980.0 969.30
... ... ... ... ... ... ... ...
2021-04-26 2021-04-26 725.35 733.00 739.65 728.90 729.2 730.75
2021-04-27 2021-04-27 730.75 735.00 757.50 727.35 748.6 749.15
2021-04-28 2021-04-28 749.15 755.00 760.00 741.10 743.4 746.25
2021-04-29 2021-04-29 746.25 753.20 765.85 743.40 746.4 746.75
2021-04-30 2021-04-30 746.75 739.00 759.45 724.50 726.4 730.05

VWAP Volume Turnover


Date
2007-11-27 984.72 27294366 2.687719e+15
2007-11-28 941.38 4581338 4.312765e+14
2007-11-29 888.09 5124121 4.550658e+14
2007-11-30 929.17 4609762 4.283257e+14
2007-12-03 965.65 2977470 2.875200e+14
... ... ... ...
2021-04-26 733.25 9390549 6.885658e+14
2021-04-27 747.67 20573107 1.538191e+15
2021-04-28 751.02 11156977 8.379106e+14
2021-04-29 753.06 13851910 1.043139e+15
2021-04-30 743.35 12600934 9.366911e+14

[3322 rows x 10 columns]


100 * df.isnull().sum() / len(df)

Date 0.0
Prev Close 0.0
Open 0.0
High 0.0
Low 0.0
Last 0.0
Close 0.0
VWAP 0.0
Volume 0.0
Turnover 0.0
dtype: float64

df.VWAP.plot(figsize=(25, 5))
plt.show()

sns.kdeplot(df.VWAP, shade=True)
plt.show()
df["month"] = df.Date.dt.month
df["week"] = df.Date.dt.isocalendar().week
df["day"] = df.Date.dt.isocalendar().day
df["day_of_week"] = df.Date.dt.dayofweek
df.drop(['Date'], axis=1, inplace=True)
df

indx = df.index
cols = df.columns
(indx, cols)
scaler = MinMaxScaler(feature_range=(0, 1))
data = scaler.fit_transform(df)
data = pd.DataFrame(data, index=indx, columns=cols)
df = data
df

# y = df['VWAP']
# y.index = indx
# df.drop(['VWAP'], axis=1, inplace=True)
y = df['Close']
y.index = indx
df.drop(['VWAP', 'Close'], axis=1, inplace=True)
df
X = df
print(X)
p=len(X)
print(p)

Prev Close Open High Low Last Volume \


Date
2007-11-27 0.276794 0.550634 0.774216 0.570576 0.709167 0.279227
2007-11-28 0.712743 0.728634 0.724774 0.659896 0.647500 0.046763
2007-11-29 0.655217 0.666251 0.662766 0.631554 0.649167 0.052318
2007-11-30 0.647130 0.650447 0.698406 0.673638 0.684167 0.047054
2007-12-03 0.678269 0.691828 0.728895 0.701121 0.726667 0.030347
... ... ... ... ... ... ...
2021-04-26 0.514694 0.519859 0.518479 0.535277 0.517667 0.095984
2021-04-27 0.519196 0.521522 0.533188 0.533946 0.533833 0.210436
2021-04-28 0.534537 0.538158 0.535248 0.545755 0.529500 0.114063
2021-04-29 0.532119 0.536660 0.540068 0.547730 0.532000 0.141645
2021-04-30 0.532536 0.524849 0.534795 0.531498 0.515333 0.128842

Turnover month week day day_of_week


Date
2007-11-27 0.329318 0.909091 0.903846 0.166667 0.166667
2007-11-28 0.052818 0.909091 0.903846 0.333333 0.333333
2007-11-29 0.055733 0.909091 0.903846 0.500000 0.500000
2007-11-30 0.052456 0.909091 0.903846 0.666667 0.666667
2007-12-03 0.035202 1.000000 0.923077 0.000000 0.000000
... ... ... ... ... ...
2021-04-26 0.084346 0.272727 0.307692 0.000000 0.000000
2021-04-27 0.188457 0.272727 0.307692 0.166667 0.166667
2021-04-28 0.102646 0.272727 0.307692 0.333333 0.333333
2021-04-29 0.127794 0.272727 0.307692 0.500000 0.500000
2021-04-30 0.114750 0.272727 0.307692 0.666667 0.666667
[3322 rows x 11 columns]
3322

len(X)

3322

cut = int(len(X)*0.8)

X_train = X[:cut]
X_test = X[cut:]
y_train = y[:cut]
y_test = y[cut:]
(X_test, y_test)

( Prev Close Open High Low Last Volume \


Date
2018-08-20 0.221852 0.223248 0.221705 0.230601 0.225125 0.012979
2018-08-21 0.225312 0.225036 0.223765 0.231674 0.224625 0.016176
2018-08-23 0.225061 0.225993 0.226731 0.229184 0.230750 0.020474
2018-08-24 0.230522 0.230401 0.228544 0.231331 0.224000 0.021218
2018-08-27 0.224061 0.224579 0.224012 0.231803 0.225042 0.020912
... ... ... ... ... ... ...
2021-04-26 0.514694 0.519859 0.518479 0.535277 0.517667 0.095984
2021-04-27 0.519196 0.521522 0.533188 0.533946 0.533833 0.210436
2021-04-28 0.534537 0.538158 0.535248 0.545755 0.529500 0.114063
2021-04-29 0.532119 0.536660 0.540068 0.547730 0.532000 0.141645
2021-04-30 0.532536 0.524849 0.534795 0.531498 0.515333 0.128842

Turnover month week day day_of_week


Date
2018-08-20 0.005889 0.636364 0.634615 0.000000 0.000000
2018-08-21 0.007355 0.636364 0.634615 0.166667 0.166667
2018-08-23 0.009354 0.636364 0.634615 0.500000 0.500000
2018-08-24 0.009685 0.636364 0.634615 0.666667 0.666667
2018-08-27 0.009511 0.636364 0.653846 0.000000 0.000000
... ... ... ... ... ...
2021-04-26 0.084346 0.272727 0.307692 0.000000 0.000000
2021-04-27 0.188457 0.272727 0.307692 0.166667 0.166667
2021-04-28 0.102646 0.272727 0.307692 0.333333 0.333333
2021-04-29 0.127794 0.272727 0.307692 0.500000 0.500000
2021-04-30 0.114750 0.272727 0.307692 0.666667 0.666667

[665 rows x 11 columns],


Date
2018-08-20 0.225312
2018-08-21 0.225061
2018-08-23 0.230522
2018-08-24 0.224061
2018-08-27 0.225645
...
2021-04-26 0.519196
2021-04-27 0.534537
2021-04-28 0.532119
2021-04-29 0.532536
2021-04-30 0.518613
Name: Close, Length: 665, dtype: float64)

print(X_train.shape)
print(X_test.shape)

(2657, 11)
(665, 11)

from sklearn.model_selection import GridSearchCV


from sklearn.neighbors import KNeighborsRegressor
knn = KNeighborsRegressor(n_neighbors=2)
knn.fit(X_train,y_train)

y_pred = knn.predict(X_test)
y_pred

array([0.22389428, 0.2221018 , 0.22274793, 0.22689566, 0.22820876,


0.2263329 , 0.23291925, 0.23218975, 0.2231231 , 0.23102255,
0.2348368 , 0.23304431, 0.23960982, 0.23544124, 0.23102255,
0.2348368 , 0.23304431, 0.22787528, 0.23102255, 0.23494101,
0.22783359, 0.20847055, 0.18322981, 0.18170828, 0.17885281,
0.20102964, 0.1860436 , 0.17616408, 0.17635166, 0.1708283 ,
0.17072408, 0.16153237, 0.17076577, 0.17635166, 0.18081204,
0.17768561, 0.17610155, 0.17076577, 0.17635166, 0.17585143,
0.17426737, 0.16888991, 0.1624703 , 0.16219934, 0.16142815,
0.17426737, 0.16888991, 0.1662637 , 0.15288257, 0.15621743,
0.19037892, 0.18149985, 0.1806453 , 0.18675226, 0.19037892,
0.18969111, 0.1806453 , 0.17781066, 0.18675226, 0.19037892,
0.19907041, 0.19938305, 0.22105965, 0.24265288, 0.24250698,
0.19938305, 0.241444 , 0.24575847, 0.23506607, 0.23983909,
0.23500354, 0.23710867, 0.23517029, 0.23506607, 0.23983909,
0.23500354, 0.23710867, 0.23517029, 0.23506607, 0.24121472,
0.23500354, 0.24236108, 0.24125641, 0.23506607, 0.23925549,
0.24086039, 0.2466964 , 0.16376256, 0.24036017, 0.22664555,
0.23633749, 0.21816249, 0.22775022, 0.24036017, 0.2170995 ,
0.21935054, 0.21816249, 0.21532786, 0.23104339, 0.23481596,
0.23633749, 0.24292384, 0.23648339, 0.23131435, 0.22306057,
0.22449873, 0.22262287, 0.18375089, 0.18848222, 0.18866981,
0.19073325, 0.18689816, 0.18806536, 0.17318354, 0.17981158,
0.18041602, 0.18689816, 0.18806536, 0.17910292, 0.16891075,
0.18944099, 0.19408896, 0.18806536, 0.19342199, 0.18591855,
0.18944099, 0.19879945, 0.1731627 , 0.17043228, 0.17195381,
0.10984201, 0.13873025, 0.11182209, 0.11146776, 0.10719496,
0.10984201, 0.10896661, 0.10669474, 0.11146776, 0.10719496,
0.10984201, 0.10896661, 0.10083788, 0.10713244, 0.10165076,
0.10404769, 0.1037142 , 0.10748676, 0.10788278, 0.1037142 ,
0.11050898, 0.10769519, 0.13306099, 0.10465213, 0.14048105,
0.14110634, 0.13412397, 0.13306099, 0.17114094, 0.17514277,
0.10363083, 0.10642378, 0.09450165, 0.14527492, 0.18679395,
0.1961524 , 0.18964942, 0.19285923, 0.18373004, 0.18679395,
0.18806536, 0.19008712, 0.19346367, 0.18798199, 0.1821043 ,
0.19350536, 0.19281754, 0.19081662, 0.18875318, 0.16578432,
0.18864896, 0.20951269, 0.2052399 , 0.21053399, 0.20922089,
0.21045062, 0.21291008, 0.20974197, 0.21053399, 0.20926258,
0.21232648, 0.21291008, 0.21197215, 0.21134687, 0.21497353,
0.21451499, 0.2124307 , 0.21484847, 0.21799575, 0.19017049,
0.21451499, 0.21545292, 0.21632832, 0.22406103, 0.21632832,
0.21776648, 0.1936721 , 0.17760223, 0.17941557, 0.18099962,
0.17614323, 0.17847764, 0.17760223, 0.17941557, 0.17480929,
0.17512193, 0.17641419, 0.1807912 , 0.18045771, 0.16092793,
0.17512193, 0.17641419, 0.1807912 , 0.18045771, 0.18894076,
0.18964942, 0.17382967, 0.19402643, 0.17560132, 0.18894076,
0.18964942, 0.17716453, 0.19402643, 0.20367669, 0.20044604,
0.21805828, 0.20732419, 0.21795406, 0.21807912, 0.19879945,
0.20519822, 0.20732419, 0.21795406, 0.2133061 , 0.19761141,

sns.kdeplot(y_test, shade=True)
sns.kdeplot(y_pred, shade=True)

Y_test
Date
2018-08-20 0.225312
2018-08-21 0.225061
2018-08-23 0.230522
2018-08-24 0.224061
2018-08-27 0.225645
...
2021-04-26 0.519196
2021-04-27 0.534537
2021-04-28 0.532119
2021-04-29 0.532536
2021-04-30 0.518613
Name: Close, Length: 665, dtype: float64

np.sqrt(np.mean(np.power((np.array(y_test)-np.array(y_pred)),2)))

0.019417248435172934

plt.figure(figsize=(20, 5))
plt.plot(y)
plt.plot(y_test.index, y_pred)
plt.show()

from sklearn.metrics.pairwise import euclidean_distances


euc_l = euclidean_distances(X_test, X_train)
euc_l
print(X_test.shape)
print(X_train.shape)
print(euc_l.shape)

(665, 11)
(2657, 11)
(665, 2657)

# KNN Implementation
def get_val(x_train, test_r, y_test, n_neighbors):
distances = []
for i in range(len(x_train)):
dist = euclidean_distances([test_r], [x_train[i]] )[0][0]
distances.append((i, dist))
distances.sort(key=lambda tup: tup[1])
v=0
for i in range(n_neighbors):
v += y_test[distances[i][0]]
return v/n_neighbors

y_pred = []
for i in X_test.values:
y_pred.append(get_val(X_train.values, i, y_train, 2))
y_pred
[0.22389428488057023,
0.222101796656801,
0.22274792613281086,
0.22689566051106758,
0.22820876234941012,
0.2263329025803493,
0.23291925465838506,
0.23218975363708363,
0.22312309808662303,
0.2310225520030014,
0.2348368002000917,
0.2330443119763225,
0.23960982116803534,
0.23544124390345572,
0.2310225520030014,
0.2348368002000917,
0.2330443119763225,
0.22787527616824377,
0.2310225520030014,
0.2349410146317062,
0.227833590395598,
0.20847054900162573,
0.18322981366459626,
0.18170828296302471,
0.1788528075367877,
0.20102963858435113,
0.18604360331818748,
0.17616407520113386,
]

plt.figure(figsize=(20, 5))
plt.plot(y)
plt.plot(y_test.index, y_pred)
plt.show()

np.sqrt(np.mean(np.power((np.array(y_test)-np.array(y_pred)),2)))

FINAL OUTPUT:

0.019417248435172934

RESULT:

Thus the Stock Price Prediction are implemented using linear regression,LSTM,RNN executed
successfully.

You might also like