Stock Price Prediction

KONGU ENGINEERING COLLEGE
PERUNDURAI ERODE – 638 060
NOVEMBER 2022
DEPARTMENT OF COMPUTER SCIENCE AND ENGINEERING
MACHINE LEARNING
STOCK PRICE PREDICTION
A MINI PROJECT REPORT
Submitted by
SARAVANAKKUMAR T A
19CSR181
SUDHAN E D
20CSR211
VASANTH V
20CSR230
in partial fulfillment of the

requirements for the award of the
degree of
PROBLEM DESCRIPTION:
Stock price prediction we have using the datasets to predict and
Train and test the models .Here we have use three different model to get
the better accuracy first of we use linear regression model which is based
On the category of the supervised machine learning we have get the better
accuracy in those models and second we have to use long short term
Memory model which is also very suitable for the prediction of the stock
Price third and last model we have use is about KNN(K-Nearest Neighbor)
Model is under the category of the unsupervised machine learning model
It gives as the better accuracy for that model
We will learn how to predict stock price using the LSTM
Neural Network. Then we will build a dashboard using Plotly dash
for stock analysis.

STOCK PRICE PREDICTION:
# Using Logistic Regression

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sb
from sklearn.model_selection import train_test_split

from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn import metrics
import warnings
warnings.filterwarnings('ignore')
df= pd.read_csv("D:\\archive (1)\ADANIPORTS.csv")

df.head()
df.shape()
(3322, 15)
df.describe()
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3322 entries, 0 to 3321
Data columns (total 15 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Date 3322 non-null object
1 Symbol 3322 non-null object
2 Series 3322 non-null object
3 Prev Close 3322 non-null float64
4 Open 3322 non-null float64
5 High 3322 non-null float64
6 Low 3322 non-null float64
7 Last 3322 non-null float64
8 Close 3322 non-null float64
9 VWAP 3322 non-null float64
10 Volume 3322 non-null int64
11 Turnover 3322 non-null float64
12 Trades 2456 non-null float64
13 Deliverable Volume 3322 non-null int64
14 %Deliverble 3322 non-null float64
dtypes: float64(10), int64(2), object(3)
memory usage: 389.4+ KB
plt.figure(figsize=(15,5))
plt.plot(df['Close'])
plt.title('AdaniPorts Close price.', fontsize=15)
plt.ylabel('Price in dollars.')
plt.show()
df[df['Close'] == df['Close']].shape
df[df['Close'] == df['Close']].shape
(3322, 15)
In [22]:
df.isnull().sum()
Date 0
Symbol 0
Series 0
Prev Close 0
Open 0
High 0
Low 0
Last 0
Close 0
VWAP 0
Volume 0
Turnover 0
Trades 866
Deliverable Volume 0
%Deliverble 0
dtype: int64
features = ['Prev Close', 'Open','High', 'Low', 'Last','Close', 'VWAP','Turnover','Trades','Deliverable Volume']
plt.subplots(figsize=(20,10))
for i, col in enumerate(features):

plt.subplot(6,4,i+1)
sb.distplot(df[col])
plt.show()
for i, col in enumerate(features):
sb.boxplot(df[col])
plt.show()
splitted = df['Date'].str.split('-', expand=True)
df['year'] = splitted[2].astype('int')
df['month'] = splitted[1].astype('int')
df['date'] = splitted[0].astype('int')
df.head()
df['is_quarter_end'] = np.where(df['month']%3==0,1,0)
df.head()
data_grouped = df.groupby('year').mean()
for i, col in enumerate(['Prev Close', 'Open','High', 'Low', 'Last','Close',

'VWAP','Turnover','Trades','Deliverable Volume']):
data_grouped[col].plot.bar()
plt.show()
df.groupby('is_quarter_end').mean()
df['open-close'] = df['Open'] - df['Close']

df['low-high'] = df['Low'] - df['High']
df['target'] = np.where(df['Close'].shift(-1) > df['Close'], 1, 0)
plt.pie(df['target'].value_counts().values,labels=[0, 1], autopct='%1.1f%%')

plt.show()
plt.figure(figsize=(10, 10))
# As our concern is with the highly

# correlated features only so, we will visualize
# our heatmap as per that criteria only.
sb.heatmap(df.corr() > 0.9, annot=True, cbar=False)
plt.show()
# DATA SPLITING AND ORGANIZATION

features = df[['open-close', 'low-high', 'is_quarter_end']]
target = df['target']
scaler = StandardScaler()
features = scaler.fit_transform(features)
X_train, X_valid, Y_train, Y_valid = train_test_split(

features, target, test_size=0.1,
random_state=2022)
print(X_train.shape, X_valid.shape)
(2989, 3) (333, 3)
# Model Development and Evaluation
models = [LogisticRegression(), SVC(
kernel='poly', probability=True), XGBClassifier()]
for i in range(3):
models[i].fit(X_train, Y_train)
print(f'{models[i]} : ')
print('Training Accuracy : ', metrics.roc_auc_score(Y_train, models[i].predict_proba(X_train)[:,1]))
print('Validation Accuracy : ', metrics.roc_auc_score(Y_valid, models[i].predict_proba(X_valid)[:,1]))
print()
ACCURACY:
XGBClassifier(base_score=0.5, booster='gbtree', callbacks=None,

colsample_bylevel=1, colsample_bynode=1, colsample_bytree=1,
early_stopping_rounds=None, enable_categorical=False,
eval_metric=None, feature_types=None, gamma=0, gpu_id=-1,
grow_policy='depthwise', importance_type=None,
interaction_constraints='', learning_rate=0.300000012,
max_bin=256, max_cat_threshold=64, max_cat_to_onehot=4,
max_delta_step=0, max_depth=6, max_leaves=0, min_child_weight=1,
missing=nan, monotone_constraints='()', n_estimators=100,
n_jobs=0, num_parallel_tree=1, predictor='auto', random_state=0, ...) :
Training Accuracy : 0.9206630526371402
Validation Accuracy : 0.48574800290486564
metrics.plot_confusion_matrix(models[0], X_valid, Y_valid)

plt.show()
USING LSTM TO PREDICT STOCK PRICE:

# RNN (RECURRENT NEURAL NETWORK)
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential, load_model
from keras.layers import LSTM, Dense, Dropout
df = pd.read_csv("D:\\archive (1)\ADANIPORTS.csv")
df.head()
df = df['Open'].values
df = df.reshape(-1, 1)
dataset_train = np.array(df[:int(df.shape[0]*0.8)])
dataset_test = np.array(df[int(df.shape[0]*0.8):])
scaler = MinMaxScaler(feature_range=(0,1))
dataset_train = scaler.fit_transform(dataset_train)
dataset_test = scaler.transform(dataset_test)
def create_dataset(df):
x = []
y = []
for i in range(50, df.shape[0]):
x.append(df[i-50:i, 0])
y.append(df[i, 0])
x = np.array(x)
y = np.array(y)
return x,y
x_train, y_train = create_dataset(dataset_train)
x_test, y_test = create_dataset(dataset_test)
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))
model = Sequential()
model.add(LSTM(units=96, return_sequences=True, input_shape=(x_train.shape[1], 1)))
model.add(Dropout(0.2))
model.add(LSTM(units=96,return_sequences=True))
model.add(LSTM(units=96,return_sequences=True))
model.add(LSTM(units=96))
model.add(Dense(units=1))
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(x_train, y_train, epochs=50, batch_size=32)
model.save('stock_prediction.h5')
model = load_model('stock_prediction.h5')
predictions = model.predict(x_test)
predictions = scaler.inverse_transform(predictions)
y_test_scaled = scaler.inverse_transform(y_test.reshape(-1, 1))
fig, ax = plt.subplots(figsize=(16,8))
ax.set_facecolor('#000041')
ax.plot(y_test_scaled, color='red', label='Original price')
plt.plot(predictions, color='cyan', label='Predicted price')
plt.legend()
KNN MODEL PREDICTION FOR STOCK PRICE
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
df = pd.read_csv("D:\\archive (1)\ADANIPORTS.csv")
df.head()
print(df)
Date Symbol Series Prev Close Open High Low \

0 2007-11-27 MUNDRA PORT EQ 440.00 770.00 1050.00 770.00
1 2007-11-28 MUNDRA PORT EQ 962.90 984.00 990.00 874.00
2 2007-11-29 MUNDRA PORT EQ 893.90 909.00 914.75 841.00
3 2007-11-30 MUNDRA PORT EQ 884.20 890.00 958.00 890.00
4 2007-12-03 MUNDRA PORT EQ 921.55 939.75 995.00 922.00
... ... ... ... ... ... ... ...
3317 2021-04-26 ADANI PORTS EQ 725.35 733.00 739.65 728.90
3318 2021-04-27 ADANI PORTS EQ 730.75 735.00 757.50 727.35
3319 2021-04-28 ADANI PORTS EQ 749.15 755.00 760.00 741.10
3320 2021-04-29 ADANI PORTS EQ 746.25 753.20 765.85 743.40
3321 2021-04-30 ADANIPORTS EQ 746.75 739.00 759.45 724.50
Last Close VWAP Volume Turnover Trades \

0 959.0 962.90 984.72 27294366 2.687719e+15 NaN
1 885.0 893.90 941.38 4581338 4.312765e+14 NaN
2 887.0 884.20 888.09 5124121 4.550658e+14 NaN
3 929.0 921.55 929.17 4609762 4.283257e+14 NaN
4 980.0 969.30 965.65 2977470 2.875200e+14 NaN
... ... ... ... ... ... ...
3317 729.2 730.75 733.25 9390549 6.885658e+14 116457.0
3318 748.6 749.15 747.67 20573107 1.538191e+15 236896.0
3319 743.4 746.25 751.02 11156977 8.379106e+14 130847.0
3320 746.4 746.75 753.06 13851910 1.043139e+15 153293.0
3321 726.4 730.05 743.35 12600934 9.366911e+14 132141.0
Deliverable Volume %Deliverable

0 9859619 0.3612
1 1453278 0.3172
2 1069678 0.2088
3 1260913 0.2735
4 816123 0.2741
... ... ...
3317 838079 0.0892
3318 1779639 0.0865
3319 1342353 0.1203
3320 1304895 0.0942
3321 3514692 0.2789
[3322 rows x 15 columns]
df['Date'] = pd.to_datetime(df.Date,format='%Y-%m-%d')
df.set_index("Date", drop=False, inplace=True)
df.info()
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 3322 entries, 2007-11-27 to 2021-04-30
Data columns (total 15 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Date 3322 non-null datetime64[ns]
1 Symbol 3322 non-null object
2 Series 3322 non-null object
3 Prev Close 3322 non-null float64
4 Open 3322 non-null float64
5 High 3322 non-null float64
6 Low 3322 non-null float64
7 Last 3322 non-null float64
8 Close 3322 non-null float64
9 VWAP 3322 non-null float64
10 Volume 3322 non-null int64
11 Turnover 3322 non-null float64
12 Trades 2456 non-null float64
13 Deliverable Volume 3322 non-null int64
14 %Deliverble 3322 non-null float64
dtypes: datetime64[ns](1), float64(10), int64(2), object(2)
memory usage: 415.2+ KB
df.drop(["Series","Symbol","Trades","Deliverable Volume","%Deliverble"], axis=1, inplace=True)
Date Prev Close Open High Low Last Close \

Date
2007-11-27 2007-11-27 440.00 770.00 1050.00 770.00 959.0 962.90
2007-11-28 2007-11-28 962.90 984.00 990.00 874.00 885.0 893.90
2007-11-29 2007-11-29 893.90 909.00 914.75 841.00 887.0 884.20
2007-11-30 2007-11-30 884.20 890.00 958.00 890.00 929.0 921.55
2007-12-03 2007-12-03 921.55 939.75 995.00 922.00 980.0 969.30
... ... ... ... ... ... ... ...
2021-04-26 2021-04-26 725.35 733.00 739.65 728.90 729.2 730.75
2021-04-27 2021-04-27 730.75 735.00 757.50 727.35 748.6 749.15
2021-04-28 2021-04-28 749.15 755.00 760.00 741.10 743.4 746.25
2021-04-29 2021-04-29 746.25 753.20 765.85 743.40 746.4 746.75
2021-04-30 2021-04-30 746.75 739.00 759.45 724.50 726.4 730.05
VWAP Volume Turnover

Date
2007-11-27 984.72 27294366 2.687719e+15
2007-11-28 941.38 4581338 4.312765e+14
2007-11-29 888.09 5124121 4.550658e+14
2007-11-30 929.17 4609762 4.283257e+14
2007-12-03 965.65 2977470 2.875200e+14
... ... ... ...
2021-04-26 733.25 9390549 6.885658e+14
2021-04-27 747.67 20573107 1.538191e+15
2021-04-28 751.02 11156977 8.379106e+14
2021-04-29 753.06 13851910 1.043139e+15
2021-04-30 743.35 12600934 9.366911e+14

100 * df.isnull().sum() / len(df)
Date 0.0
Prev Close 0.0
Open 0.0
High 0.0
Low 0.0
Last 0.0
Close 0.0
VWAP 0.0
Volume 0.0
Turnover 0.0
dtype: float64
df.VWAP.plot(figsize=(25, 5))
plt.show()
sns.kdeplot(df.VWAP, shade=True)
plt.show()
df["month"] = df.Date.dt.month
df["week"] = df.Date.dt.isocalendar().week
df["day"] = df.Date.dt.isocalendar().day
df["day_of_week"] = df.Date.dt.dayofweek
df.drop(['Date'], axis=1, inplace=True)
df
indx = df.index
cols = df.columns
(indx, cols)
scaler = MinMaxScaler(feature_range=(0, 1))
data = scaler.fit_transform(df)
data = pd.DataFrame(data, index=indx, columns=cols)
df = data
df
# y = df['VWAP']
# y.index = indx
# df.drop(['VWAP'], axis=1, inplace=True)
y = df['Close']
y.index = indx
df.drop(['VWAP', 'Close'], axis=1, inplace=True)
df
X = df
print(X)
p=len(X)
print(p)
Prev Close Open High Low Last Volume \

Date
2007-11-27 0.276794 0.550634 0.774216 0.570576 0.709167 0.279227
2007-11-28 0.712743 0.728634 0.724774 0.659896 0.647500 0.046763
2007-11-29 0.655217 0.666251 0.662766 0.631554 0.649167 0.052318
2007-11-30 0.647130 0.650447 0.698406 0.673638 0.684167 0.047054
2007-12-03 0.678269 0.691828 0.728895 0.701121 0.726667 0.030347
... ... ... ... ... ... ...
2021-04-26 0.514694 0.519859 0.518479 0.535277 0.517667 0.095984
2021-04-27 0.519196 0.521522 0.533188 0.533946 0.533833 0.210436
2021-04-28 0.534537 0.538158 0.535248 0.545755 0.529500 0.114063
2021-04-29 0.532119 0.536660 0.540068 0.547730 0.532000 0.141645
2021-04-30 0.532536 0.524849 0.534795 0.531498 0.515333 0.128842
Turnover month week day day_of_week

Date
2007-11-27 0.329318 0.909091 0.903846 0.166667 0.166667
2007-11-28 0.052818 0.909091 0.903846 0.333333 0.333333
2007-11-29 0.055733 0.909091 0.903846 0.500000 0.500000
2007-11-30 0.052456 0.909091 0.903846 0.666667 0.666667
2007-12-03 0.035202 1.000000 0.923077 0.000000 0.000000
... ... ... ... ... ...
2021-04-26 0.084346 0.272727 0.307692 0.000000 0.000000
2021-04-27 0.188457 0.272727 0.307692 0.166667 0.166667
2021-04-28 0.102646 0.272727 0.307692 0.333333 0.333333
2021-04-29 0.127794 0.272727 0.307692 0.500000 0.500000
2021-04-30 0.114750 0.272727 0.307692 0.666667 0.666667
3322
len(X)
3322
cut = int(len(X)*0.8)
X_train = X[:cut]
X_test = X[cut:]
y_train = y[:cut]
y_test = y[cut:]
(X_test, y_test)
( Prev Close Open High Low Last Volume \

Date
2018-08-20 0.221852 0.223248 0.221705 0.230601 0.225125 0.012979
2018-08-21 0.225312 0.225036 0.223765 0.231674 0.224625 0.016176
2018-08-23 0.225061 0.225993 0.226731 0.229184 0.230750 0.020474
2018-08-24 0.230522 0.230401 0.228544 0.231331 0.224000 0.021218
2018-08-27 0.224061 0.224579 0.224012 0.231803 0.225042 0.020912
... ... ... ... ... ... ...
2021-04-26 0.514694 0.519859 0.518479 0.535277 0.517667 0.095984
2021-04-27 0.519196 0.521522 0.533188 0.533946 0.533833 0.210436
2021-04-28 0.534537 0.538158 0.535248 0.545755 0.529500 0.114063
2021-04-29 0.532119 0.536660 0.540068 0.547730 0.532000 0.141645
2021-04-30 0.532536 0.524849 0.534795 0.531498 0.515333 0.128842
Turnover month week day day_of_week

Date
2018-08-20 0.005889 0.636364 0.634615 0.000000 0.000000
2018-08-21 0.007355 0.636364 0.634615 0.166667 0.166667
2018-08-23 0.009354 0.636364 0.634615 0.500000 0.500000
2018-08-24 0.009685 0.636364 0.634615 0.666667 0.666667
2018-08-27 0.009511 0.636364 0.653846 0.000000 0.000000
... ... ... ... ... ...
2021-04-26 0.084346 0.272727 0.307692 0.000000 0.000000
2021-04-27 0.188457 0.272727 0.307692 0.166667 0.166667
2021-04-28 0.102646 0.272727 0.307692 0.333333 0.333333
2021-04-29 0.127794 0.272727 0.307692 0.500000 0.500000
2021-04-30 0.114750 0.272727 0.307692 0.666667 0.666667
[665 rows x 11 columns],

Date
2018-08-20 0.225312
2018-08-21 0.225061
2018-08-23 0.230522
2018-08-24 0.224061
2018-08-27 0.225645
...
2021-04-26 0.519196
2021-04-27 0.534537
2021-04-28 0.532119
2021-04-29 0.532536
2021-04-30 0.518613
Name: Close, Length: 665, dtype: float64)
print(X_train.shape)
print(X_test.shape)
(2657, 11)
(665, 11)
from sklearn.model_selection import GridSearchCV

from sklearn.neighbors import KNeighborsRegressor
knn = KNeighborsRegressor(n_neighbors=2)
knn.fit(X_train,y_train)
y_pred = knn.predict(X_test)
y_pred
array([0.22389428, 0.2221018 , 0.22274793, 0.22689566, 0.22820876,

0.2263329 , 0.23291925, 0.23218975, 0.2231231 , 0.23102255,
0.2348368 , 0.23304431, 0.23960982, 0.23544124, 0.23102255,
0.2348368 , 0.23304431, 0.22787528, 0.23102255, 0.23494101,
0.22783359, 0.20847055, 0.18322981, 0.18170828, 0.17885281,
0.20102964, 0.1860436 , 0.17616408, 0.17635166, 0.1708283 ,
0.17072408, 0.16153237, 0.17076577, 0.17635166, 0.18081204,
0.17768561, 0.17610155, 0.17076577, 0.17635166, 0.17585143,
0.17426737, 0.16888991, 0.1624703 , 0.16219934, 0.16142815,
0.17426737, 0.16888991, 0.1662637 , 0.15288257, 0.15621743,
0.19037892, 0.18149985, 0.1806453 , 0.18675226, 0.19037892,
0.18969111, 0.1806453 , 0.17781066, 0.18675226, 0.19037892,
0.19907041, 0.19938305, 0.22105965, 0.24265288, 0.24250698,
0.19938305, 0.241444 , 0.24575847, 0.23506607, 0.23983909,
0.23500354, 0.23710867, 0.23517029, 0.23506607, 0.23983909,
0.23500354, 0.23710867, 0.23517029, 0.23506607, 0.24121472,
0.23500354, 0.24236108, 0.24125641, 0.23506607, 0.23925549,
0.24086039, 0.2466964 , 0.16376256, 0.24036017, 0.22664555,
0.23633749, 0.21816249, 0.22775022, 0.24036017, 0.2170995 ,
0.21935054, 0.21816249, 0.21532786, 0.23104339, 0.23481596,
0.23633749, 0.24292384, 0.23648339, 0.23131435, 0.22306057,
0.22449873, 0.22262287, 0.18375089, 0.18848222, 0.18866981,
0.19073325, 0.18689816, 0.18806536, 0.17318354, 0.17981158,
0.18041602, 0.18689816, 0.18806536, 0.17910292, 0.16891075,
0.18944099, 0.19408896, 0.18806536, 0.19342199, 0.18591855,
0.18944099, 0.19879945, 0.1731627 , 0.17043228, 0.17195381,
0.10984201, 0.13873025, 0.11182209, 0.11146776, 0.10719496,
0.10984201, 0.10896661, 0.10669474, 0.11146776, 0.10719496,
0.10984201, 0.10896661, 0.10083788, 0.10713244, 0.10165076,
0.10404769, 0.1037142 , 0.10748676, 0.10788278, 0.1037142 ,
0.11050898, 0.10769519, 0.13306099, 0.10465213, 0.14048105,
0.14110634, 0.13412397, 0.13306099, 0.17114094, 0.17514277,
0.10363083, 0.10642378, 0.09450165, 0.14527492, 0.18679395,
0.1961524 , 0.18964942, 0.19285923, 0.18373004, 0.18679395,
0.18806536, 0.19008712, 0.19346367, 0.18798199, 0.1821043 ,
0.19350536, 0.19281754, 0.19081662, 0.18875318, 0.16578432,
0.18864896, 0.20951269, 0.2052399 , 0.21053399, 0.20922089,
0.21045062, 0.21291008, 0.20974197, 0.21053399, 0.20926258,
0.21232648, 0.21291008, 0.21197215, 0.21134687, 0.21497353,
0.21451499, 0.2124307 , 0.21484847, 0.21799575, 0.19017049,
0.21451499, 0.21545292, 0.21632832, 0.22406103, 0.21632832,
0.21776648, 0.1936721 , 0.17760223, 0.17941557, 0.18099962,
0.17614323, 0.17847764, 0.17760223, 0.17941557, 0.17480929,
0.17512193, 0.17641419, 0.1807912 , 0.18045771, 0.16092793,
0.17512193, 0.17641419, 0.1807912 , 0.18045771, 0.18894076,
0.18964942, 0.17382967, 0.19402643, 0.17560132, 0.18894076,
0.18964942, 0.17716453, 0.19402643, 0.20367669, 0.20044604,
0.21805828, 0.20732419, 0.21795406, 0.21807912, 0.19879945,
0.20519822, 0.20732419, 0.21795406, 0.2133061 , 0.19761141,
sns.kdeplot(y_test, shade=True)
sns.kdeplot(y_pred, shade=True)
Y_test
Date
2018-08-20 0.225312
2018-08-21 0.225061
2018-08-23 0.230522
2018-08-24 0.224061
2018-08-27 0.225645
...
2021-04-26 0.519196
2021-04-27 0.534537
2021-04-28 0.532119
2021-04-29 0.532536
2021-04-30 0.518613
Name: Close, Length: 665, dtype: float64
np.sqrt(np.mean(np.power((np.array(y_test)-np.array(y_pred)),2)))
0.019417248435172934
plt.plot(y)
plt.plot(y_test.index, y_pred)
plt.show()
from sklearn.metrics.pairwise import euclidean_distances

euc_l = euclidean_distances(X_test, X_train)
euc_l
print(X_test.shape)
print(X_train.shape)
print(euc_l.shape)
(665, 11)
(2657, 11)
(665, 2657)
# KNN Implementation
def get_val(x_train, test_r, y_test, n_neighbors):
distances = []
for i in range(len(x_train)):
dist = euclidean_distances([test_r], [x_train[i]] )[0][0]
distances.append((i, dist))
distances.sort(key=lambda tup: tup[1])
v=0
for i in range(n_neighbors):
v += y_test[distances[i][0]]
return v/n_neighbors
y_pred = []
for i in X_test.values:
y_pred.append(get_val(X_train.values, i, y_train, 2))
y_pred
[0.22389428488057023,
0.222101796656801,
0.22274792613281086,
0.22689566051106758,
0.22820876234941012,
0.2263329025803493,
0.23291925465838506,
0.23218975363708363,
0.22312309808662303,
0.2310225520030014,
0.2348368002000917,
0.2330443119763225,
0.23960982116803534,
0.23544124390345572,
0.2310225520030014,
0.2348368002000917,
0.2330443119763225,
0.22787527616824377,
0.2310225520030014,
0.2349410146317062,
0.227833590395598,
0.20847054900162573,
0.18322981366459626,
0.18170828296302471,
0.1788528075367877,
0.20102963858435113,
0.18604360331818748,
0.17616407520113386,
]
plt.plot(y)
plt.plot(y_test.index, y_pred)
plt.show()
np.sqrt(np.mean(np.power((np.array(y_test)-np.array(y_pred)),2)))
FINAL OUTPUT:
0.019417248435172934
RESULT:
Thus the Stock Price Prediction are implemented using linear regression,LSTM,RNN executed
successfully.

Stock Price Prediction

Uploaded by

Document Information

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

Stock Price Prediction

Uploaded by

Copyright:

Available Formats

KONGU ENGINEERING COLLEGE

PERUNDURAI ERODE – 638 060

DEPARTMENT OF COMPUTER SCIENCE AND ENGINEERING

STOCK PRICE PREDICTION

A MINI PROJECT REPORT

in partial fulfillment of the

Stock price prediction we have using the datasets to predict and

Model is under the category of the unsupervised machine learning model

It gives as the better accuracy for that model

We will learn how to predict stock price using the LSTM

Neural Network. Then we will build a dashboard using Plotly dash

for stock analysis.

# Using Logistic Regression

from sklearn.model_selection import train_test_split

df= pd.read_csv("D:\\archive (1)\ADANIPORTS.csv")

features = ['Prev Close', 'Open','High', 'Low', 'Last','Close', 'VWAP','Turnover','Trades','Deliverable Volume']

for i, col in enumerate(features):

splitted = df['Date'].str.split('-', expand=True)

for i, col in enumerate(['Prev Close', 'Open','High', 'Low', 'Last','Close',

df['open-close'] = df['Open'] - df['Close']

plt.pie(df['target'].value_counts().values,labels=[0, 1], autopct='%1.1f%%')

# As our concern is with the highly

# DATA SPLITING AND ORGANIZATION

X_train, X_valid, Y_train, Y_valid = train_test_split(

XGBClassifier(base_score=0.5, booster='gbtree', callbacks=None,

metrics.plot_confusion_matrix(models[0], X_valid, Y_valid)

USING LSTM TO PREDICT STOCK PRICE:

KNN MODEL PREDICTION FOR STOCK PRICE

Date Symbol Series Prev Close Open High Low \

Last Close VWAP Volume Turnover Trades \

Deliverable Volume %Deliverable

[3322 rows x 15 columns]

df.drop(["Series","Symbol","Trades","Deliverable Volume","%Deliverble"], axis=1, inplace=True)

Date Prev Close Open High Low Last Close \

VWAP Volume Turnover

[3322 rows x 10 columns]

Prev Close Open High Low Last Volume \

Turnover month week day day_of_week

( Prev Close Open High Low Last Volume \

Turnover month week day day_of_week

[665 rows x 11 columns],

from sklearn.model_selection import GridSearchCV

array([0.22389428, 0.2221018 , 0.22274793, 0.22689566, 0.22820876,

from sklearn.metrics.pairwise import euclidean_distances

You might also like