Simple Linear Regression

You might also like

Download as docx, pdf, or txt
Download as docx, pdf, or txt
You are on page 1of 11

SIMPLE LINEAR REGRESSION

#importing the libraries


import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# In[4]:
myfile="D:\Sunny115\week2_Salary_Data.csv"
dataset=pd.read_csv(myfile)
X=dataset.iloc[:,:-1].values
y=dataset.iloc[:,-1].values

# In[5]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=1/3,random_st
ate=0)

# In[6]:
from sklearn.linear_model import LinearRegression
regressor=LinearRegression()
regressor.fit(X_train,y_train)

# In[7]:
print(X_train)

# In[9]:
y_pred=regressor.predict(X_test)
print(y_test)
print(y_pred)

# In[10]:
from sklearn.metrics import r2_score
r2_score(y_test,y_pred)

#Visulising the training set results


plt.scatter(X_train,y_train,color='blue')
plt.plot(X_train,regressor.predict(X_train),color='green')
plt.title('Salary vs Expereince (training set)')
plt.xlabel('Years of Experience')
plt.ylabel('Salary')
plt.show()

#Visulising the training set results


plt.scatter(X_test,y_test,color='blue')
plt.plot(X_train,regressor.predict(X_train),color='green')
plt.title('Salary vs Expereince (training set)')
plt.xlabel('Years of Experience')
plt.ylabel('Salary')
plt.show()
MULTIPLE LINEAR REGRESSION

import numpy as np

import matplotlib.pyplot as plt

import pandas as pd

# In[2]:
myfile="D:\Sunny115\week2_50_Startups.csv"
dataset=pd.read_csv(myfile)
X=dataset.iloc[:,:-1].values
y=dataset.iloc[:,-1].values

#encoding categorical data


from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct=ColumnTransformer(transformers=[('encoder',OneHotEncoder(),
[3])],remainder='passthrough')
X=np.array(ct.fit_transform(X))

# In[8]:
print(X)

#splitting the dataset


from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,ran
dom_state=0)

# In[14]:
from sklearn.linear_model import LinearRegression
regressor=LinearRegression()
regressor.fit(X_train,y_train)

# In[15]:
y_pred=regressor.predict(X_test)
print(y_test)
print(y_pred)

# In[16]:
from sklearn.metrics import r2_score
r2_score(y_test,y_pred)
BINOMIAL LOGISTIC REGRESSION

#import the necessary libraries


from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

#load breast cancer dataset


X,y=load_breast_cancer(return_X_y=True)

#split the test train data


X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.20,ra
ndom_state=42)
print(y_test)

#standardize features using standardscaler


scaler=StandardScaler()
scaler.fit(X_train)
X_train_scaled=scaler.transform(X_train)
X_test_scaled=scaler.transform(X_test)

#create the logistic regression model


model=LogisticRegression()

#train the model on scaled training data


model.fit(X_train_scaled,y_train)

#make predidction on the sclaed testing data


y_pred=model.predict(X_test_scaled)
print(y_pred)

#evaluate model performance


acc=accuracy_score(y_test,y_pred)
print("Logistic Regression model accuarcy (in %):",acc*100)
MULTIPLE LOGISTIC REGRESSION

#import the necessary libraries


from sklearn import datasets
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

#load the digit datasets


digits=datasets.load_digits()

#defining feature matrix x and response vecoor y


X=digits.data
y=digits.target
print(X)
print(y)

#split the test train data


X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,ran
dom_state=1)
print(y_test)

#standradize features using standardscaler


scaler=StandardScaler()
scaler.fit(X_train)
X_train_scaled=scaler.transform(X_train)
X_test_scaled=scaler.transform(X_test)

#create the logistic regression model


model=LogisticRegression()

#train the model on scaled training data


model.fit(X_train_scaled,y_train)

#make predidction on the sclaed testing data


y_pred=model.predict(X_test_scaled)
print(y_pred)

#evaluate model performance


acc=accuracy_score(y_test,y_pred)
print("Logistic Regression model accuarcy (in %):",acc*100)
KNN CLASSIFIER

#k-nearest-neighbours(knn)
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# In[9]:
myfile="D:\BD_dataset\week3_Social_Network_Ads.csv"
dataset=pd.read_csv(myfile)
x=dataset.iloc[:,:-1].values
y=dataset.iloc[:,-1].values

#Splitting the dataset into the training set and test set
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size =
0.20, random_state = 0)
print(x_train)

# In[21]:
print(y_test)

#Feature Scaling
from sklearn.preprocessing import StandardScaler
sc=StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)
print(x_train)

# In[23]:
print(x_test)

#Training the KNN model on the training set


from sklearn.neighbors import KNeighborsClassifier
classifier = KNeighborsClassifier(n_neighbors= 5, metric =
'minkowski', p=2)
classifier.fit(x_train,y_train)

#Predicting the test set result


y_pred=classifier.predict(x_test)
print(y_pred)

#Making the Confusion Matrix


from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test,y_pred)
SVM CLASSIFIER

#Support Vector Machine (SVM)


import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# In[12]:
myfile="D:\BD_dataset\week3_Social_Network_Ads.csv"
dataset=pd.read_csv(myfile)
x=dataset.iloc[:,:-1].values
y=dataset.iloc[:,-1].values

#Splitting the dataset into the training set and test set
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size =
0.20, random_state = 0)
print(x_train)

# In[14]:
print(y_test)

#Feature Scaling
from sklearn.preprocessing import StandardScaler
sc=StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)
print(x_train)

# In[16]:
print(x_test)

#Training the DecesionTreeClassifier model on the training set


from sklearn.tree import DecisionTreeClassifier
classifier = DecisionTreeClassifier(criterion = 'entropy',
random_state=0)
classifier.fit(x_train,y_train)

#Predicting the test set result


y_pred=classifier.predict(x_test)
print(y_pred)

#Making the Confusion Matrix


from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test,y_pred)
DECESION TREE CLASSIFIER

#DecisionTreeClassifier
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# In[20]:
myfile="D:\BD_dataset\week3_Social_Network_Ads.csv"
dataset=pd.read_csv(myfile)
x=dataset.iloc[:,:-1].values
y=dataset.iloc[:,-1].values

#Splitting the dataset into the training set and test set
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size =
0.20, random_state = 0)
print(x_train)

# In[22]:
print(y_test)

#Feature Scaling
from sklearn.preprocessing import StandardScaler
sc=StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)
print(x_train)

# In[24]:
print(x_test)

#Training the DecesionTreeClassifier model on the training set


from sklearn.naive_bayes import GaussianNB
classifier = DecisionTreeClassifier(criterion = 'entropy',
random_state=0)
classifier.fit(x_train,y_train)

#Predicting the test set result


y_pred=classifier.predict(x_test)
print(y_pred)

#Making the Confusion Matrix


from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test,y_pred)
NAIVE BAYES CLASSIFIER

#DecisionTreeClassifier
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# In[46]:
myfile="D:\BD_dataset\week3_Social_Network_Ads.csv"
dataset=pd.read_csv(myfile)
x=dataset.iloc[:,:-1].values
y=dataset.iloc[:,-1].values

#Splitting the dataset into the training set and test set
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size =
0.20, random_state = 0)
print(x_train)

# In[48]:
print(y_test)

#Feature Scaling
from sklearn.preprocessing import StandardScaler
sc=StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)
print(x_train)

# In[50]:
print(x_test)

#Training the DecesionTreeClassifier model on the training set


from sklearn.naive_bayes import GaussianNB
classifier = GaussianNB()
classifier.fit(x_train,y_train)

#Predicting the test set result


y_pred=classifier.predict(x_test)
print(y_pred)

#Making the Confusion Matrix


from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test,y_pred)
RANDOM FOREST CLASSIFIER

#k-nearest-neighbours(knn)
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# In[2]:
myfile="D:\BD_dataset\week3_Social_Network_Ads.csv"
dataset=pd.read_csv(myfile)
x=dataset.iloc[:,:-1].values
y=dataset.iloc[:,-1].values

#Splitting the dataset into the training set and test set
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size =
0.20, random_state = 0)
print(x_train)

# In[4]:
print(y_test)

#Feature Scaling
from sklearn.preprocessing import StandardScaler
sc=StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)
print(x_train)

# In[6]:
print(x_test)

#Training the RandomForestClassifier model on the training set


from sklearn.ensemble import RandomForestClassifier
classifier = RandomForestClassifier(random_state=0)
classifier.fit(x_train,y_train)

#Predicting the test set result


y_pred=classifier.predict(x_test)
print(y_pred)

#Making the Confusion Matrix


from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test,y_pred)
LOGISTIC REGRESSION USING EXTERNAL DATASET
import numpy as np # type: ignore
import pandas as pd # type: ignore
import matplotlib.pyplot as plt # type: ignore

mydatafile = "week3_social_networks_ads.csv"
dataset = pd.read_csv(mydatafile)

x = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

from sklearn.model_selection import train_test_split # type: ignore


x_train, x_test, y_train, y_test = train_test_split(x, y,
test_size=0.25, random_state=0)

#print(x_train)
#print(y_train)
#print(x_test)
#print(y_test)

from sklearn.preprocessing import StandardScaler # type: ignore


sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.fit_transform(x_test)

#print(x_train)
#print(x_test)

from sklearn.linear_model import LogisticRegression # type: ignore


classifier = LogisticRegression(random_state=0)
classifier.fit(x_train, y_train)

print(classifier.predict(sc.transform([[30, 87000]])))

# Evaluate the model


y_pred = classifier.predict(X_test) # type: ignore
#print(y_test)
#print(y_pred)

from sklearn.metrics import confusion_matrix, accuracy_score # type:


ignore
cm = confusion_matrix(y_test, y_pred)
print(cm)
print("Accuracy:", accuracy_score(y_test, y_pred))

You might also like