Professional Documents
Culture Documents
Stock Price Prediction
Stock Price Prediction
NOVEMBER 2022
MACHINE LEARNING
Submitted by
SARAVANAKKUMAR T A
19CSR181
SUDHAN E D
20CSR211
VASANTH V
20CSR230
Train and test the models .Here we have use three different model to get
the better accuracy first of we use linear regression model which is based
On the category of the supervised machine learning we have get the better
accuracy in those models and second we have to use long short term
Memory model which is also very suitable for the prediction of the stock
Price third and last model we have use is about KNN(K-Nearest Neighbor)
import warnings
warnings.filterwarnings('ignore')
df.shape()
(3322, 15)
df.describe()
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3322 entries, 0 to 3321
Data columns (total 15 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Date 3322 non-null object
1 Symbol 3322 non-null object
2 Series 3322 non-null object
3 Prev Close 3322 non-null float64
4 Open 3322 non-null float64
5 High 3322 non-null float64
6 Low 3322 non-null float64
7 Last 3322 non-null float64
8 Close 3322 non-null float64
9 VWAP 3322 non-null float64
10 Volume 3322 non-null int64
11 Turnover 3322 non-null float64
12 Trades 2456 non-null float64
13 Deliverable Volume 3322 non-null int64
14 %Deliverble 3322 non-null float64
dtypes: float64(10), int64(2), object(3)
memory usage: 389.4+ KB
plt.figure(figsize=(15,5))
plt.plot(df['Close'])
plt.title('AdaniPorts Close price.', fontsize=15)
plt.ylabel('Price in dollars.')
plt.show()
df[df['Close'] == df['Close']].shape
df[df['Close'] == df['Close']].shape
(3322, 15)
In [22]:
df.isnull().sum()
Date 0
Symbol 0
Series 0
Prev Close 0
Open 0
High 0
Low 0
Last 0
Close 0
VWAP 0
Volume 0
Turnover 0
Trades 866
Deliverable Volume 0
%Deliverble 0
dtype: int64
plt.subplots(figsize=(20,10))
plt.subplots(figsize=(20,10))
for i, col in enumerate(features):
plt.subplot(6,4,i+1)
sb.boxplot(df[col])
plt.show()
df['year'] = splitted[2].astype('int')
df['month'] = splitted[1].astype('int')
df['date'] = splitted[0].astype('int')
df.head()
df['is_quarter_end'] = np.where(df['month']%3==0,1,0)
df.head()
data_grouped = df.groupby('year').mean()
plt.subplots(figsize=(20,10))
plt.figure(figsize=(10, 10))
scaler = StandardScaler()
features = scaler.fit_transform(features)
(2989, 3) (333, 3)
# Model Development and Evaluation
models = [LogisticRegression(), SVC(
kernel='poly', probability=True), XGBClassifier()]
for i in range(3):
models[i].fit(X_train, Y_train)
print(f'{models[i]} : ')
print('Training Accuracy : ', metrics.roc_auc_score(Y_train, models[i].predict_proba(X_train)[:,1]))
print('Validation Accuracy : ', metrics.roc_auc_score(Y_valid, models[i].predict_proba(X_valid)[:,1]))
print()
ACCURACY:
df = df['Open'].values
df = df.reshape(-1, 1)
dataset_train = np.array(df[:int(df.shape[0]*0.8)])
dataset_test = np.array(df[int(df.shape[0]*0.8):])
scaler = MinMaxScaler(feature_range=(0,1))
dataset_train = scaler.fit_transform(dataset_train)
dataset_test = scaler.transform(dataset_test)
def create_dataset(df):
x = []
y = []
for i in range(50, df.shape[0]):
x.append(df[i-50:i, 0])
y.append(df[i, 0])
x = np.array(x)
y = np.array(y)
return x,y
x_train, y_train = create_dataset(dataset_train)
x_test, y_test = create_dataset(dataset_test)
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))
model = Sequential()
model.add(LSTM(units=96, return_sequences=True, input_shape=(x_train.shape[1], 1)))
model.add(Dropout(0.2))
model.add(LSTM(units=96,return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=96,return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=96))
model.add(Dropout(0.2))
model.add(Dense(units=1))
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(x_train, y_train, epochs=50, batch_size=32)
model.save('stock_prediction.h5')
model = load_model('stock_prediction.h5')
predictions = model.predict(x_test)
predictions = scaler.inverse_transform(predictions)
y_test_scaled = scaler.inverse_transform(y_test.reshape(-1, 1))
fig, ax = plt.subplots(figsize=(16,8))
ax.set_facecolor('#000041')
ax.plot(y_test_scaled, color='red', label='Original price')
plt.plot(predictions, color='cyan', label='Predicted price')
plt.legend()
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
df = pd.read_csv("D:\\archive (1)\ADANIPORTS.csv")
df.head()
print(df)
df['Date'] = pd.to_datetime(df.Date,format='%Y-%m-%d')
df.set_index("Date", drop=False, inplace=True)
df.info()
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 3322 entries, 2007-11-27 to 2021-04-30
Data columns (total 15 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Date 3322 non-null datetime64[ns]
1 Symbol 3322 non-null object
2 Series 3322 non-null object
3 Prev Close 3322 non-null float64
4 Open 3322 non-null float64
5 High 3322 non-null float64
6 Low 3322 non-null float64
7 Last 3322 non-null float64
8 Close 3322 non-null float64
9 VWAP 3322 non-null float64
10 Volume 3322 non-null int64
11 Turnover 3322 non-null float64
12 Trades 2456 non-null float64
13 Deliverable Volume 3322 non-null int64
14 %Deliverble 3322 non-null float64
dtypes: datetime64[ns](1), float64(10), int64(2), object(2)
memory usage: 415.2+ KB
Date 0.0
Prev Close 0.0
Open 0.0
High 0.0
Low 0.0
Last 0.0
Close 0.0
VWAP 0.0
Volume 0.0
Turnover 0.0
dtype: float64
df.VWAP.plot(figsize=(25, 5))
plt.show()
sns.kdeplot(df.VWAP, shade=True)
plt.show()
df["month"] = df.Date.dt.month
df["week"] = df.Date.dt.isocalendar().week
df["day"] = df.Date.dt.isocalendar().day
df["day_of_week"] = df.Date.dt.dayofweek
df.drop(['Date'], axis=1, inplace=True)
df
indx = df.index
cols = df.columns
(indx, cols)
scaler = MinMaxScaler(feature_range=(0, 1))
data = scaler.fit_transform(df)
data = pd.DataFrame(data, index=indx, columns=cols)
df = data
df
# y = df['VWAP']
# y.index = indx
# df.drop(['VWAP'], axis=1, inplace=True)
y = df['Close']
y.index = indx
df.drop(['VWAP', 'Close'], axis=1, inplace=True)
df
X = df
print(X)
p=len(X)
print(p)
len(X)
3322
cut = int(len(X)*0.8)
X_train = X[:cut]
X_test = X[cut:]
y_train = y[:cut]
y_test = y[cut:]
(X_test, y_test)
print(X_train.shape)
print(X_test.shape)
(2657, 11)
(665, 11)
y_pred = knn.predict(X_test)
y_pred
sns.kdeplot(y_test, shade=True)
sns.kdeplot(y_pred, shade=True)
Y_test
Date
2018-08-20 0.225312
2018-08-21 0.225061
2018-08-23 0.230522
2018-08-24 0.224061
2018-08-27 0.225645
...
2021-04-26 0.519196
2021-04-27 0.534537
2021-04-28 0.532119
2021-04-29 0.532536
2021-04-30 0.518613
Name: Close, Length: 665, dtype: float64
np.sqrt(np.mean(np.power((np.array(y_test)-np.array(y_pred)),2)))
0.019417248435172934
plt.figure(figsize=(20, 5))
plt.plot(y)
plt.plot(y_test.index, y_pred)
plt.show()
(665, 11)
(2657, 11)
(665, 2657)
# KNN Implementation
def get_val(x_train, test_r, y_test, n_neighbors):
distances = []
for i in range(len(x_train)):
dist = euclidean_distances([test_r], [x_train[i]] )[0][0]
distances.append((i, dist))
distances.sort(key=lambda tup: tup[1])
v=0
for i in range(n_neighbors):
v += y_test[distances[i][0]]
return v/n_neighbors
y_pred = []
for i in X_test.values:
y_pred.append(get_val(X_train.values, i, y_train, 2))
y_pred
[0.22389428488057023,
0.222101796656801,
0.22274792613281086,
0.22689566051106758,
0.22820876234941012,
0.2263329025803493,
0.23291925465838506,
0.23218975363708363,
0.22312309808662303,
0.2310225520030014,
0.2348368002000917,
0.2330443119763225,
0.23960982116803534,
0.23544124390345572,
0.2310225520030014,
0.2348368002000917,
0.2330443119763225,
0.22787527616824377,
0.2310225520030014,
0.2349410146317062,
0.227833590395598,
0.20847054900162573,
0.18322981366459626,
0.18170828296302471,
0.1788528075367877,
0.20102963858435113,
0.18604360331818748,
0.17616407520113386,
]
plt.figure(figsize=(20, 5))
plt.plot(y)
plt.plot(y_test.index, y_pred)
plt.show()
np.sqrt(np.mean(np.power((np.array(y_test)-np.array(y_pred)),2)))
FINAL OUTPUT:
0.019417248435172934
RESULT:
Thus the Stock Price Prediction are implemented using linear regression,LSTM,RNN executed
successfully.