Simple Linear regression-LAB4.ipynb - Colaboratory

You might also like

Download as pdf or txt
Download as pdf or txt
You are on page 1of 6

Import necessary libraries

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Importing the dataset
df = pd.read_csv('MBA Salary.csv')

df.head()

S. No. Percentage in Grade 10 Salary

0 1 62.00 270000

1 2 76.33 200000

2 3 72.00 240000

3 4 60.00 250000

4 5 61.00 180000

df.info()

<class 'pandas.core.frame.DataFrame'>

RangeIndex: 50 entries, 0 to 49

Data columns (total 3 columns):

# Column Non-Null Count Dtype

--- ------ -------------- -----

0 S. No. 50 non-null int64

1 Percentage in Grade 10 50 non-null float64

2 Salary 50 non-null int64

dtypes: float64(1), int64(2)

memory usage: 1.3 KB

print(df.shape)

(50, 3)

# View descriptive statistics

print(df.describe())

S. No. Percentage in Grade 10 Salary

count 50.00000 50.000000 50.000000

mean 25.50000 63.922400 258192.000000

std 14.57738 9.859937 76715.790993

min 1.00000 37.330000 120000.000000

25% 13.25000 57.685000 204500.000000

50% 25.50000 64.700000 250000.000000

75% 37.75000 70.000000 300000.000000

max 50.00000 83.000000 450000.000000

# Declare feature variable and target variable

X = df['Percentage in Grade 10']

y = df['Salary']

# Plot scatter plot between X and y

plt.scatter(X, y, color = 'blue', label='Scatter Plot')

plt.title('Relationship between Grades and Salary of a person')

plt.xlabel('Percentage in Grade 10')

plt.ylabel('Salary')

plt.legend(loc=4)

plt.show()

# Print the dimensions of X and y
print(X.shape)

print(y.shape)

(50,)

(50,)

0 62.00

1 76.33

2 72.00

3 60.00

4 61.00

5 55.00

6 70.00

7 68.00

8 82.80

9 59.00

10 58.00

11 60.00

12 66.00

13 83.00

14 68.00

15 37.33

16 79.00

17 68.40

18 70.00

19 59.00

20 63.00

21 50.00

22 69.00

23 52.00

24 49.00

25 64.60

26 50.00

27 74.00

28 58.00

29 67.00

30 75.00

31 60.00

32 55.00

33 78.00

34 50.08

35 56.00

36 68.00

37 52.00

38 54.00

39 52.00

40 76.00

41 64.80

42 74.40

43 74.50

44 73.50

45 57.58

46 68.00

47 69.00

48 66.00

49 60.80

Name: Percentage in Grade 10, dtype: float64

X=np.array(X)

y=np.array(y)

array([62. , 76.33, 72. , 60. , 61. , 55. , 70. , 68. , 82.8 ,

59. , 58. , 60. , 66. , 83. , 68. , 37.33, 79. , 68.4 ,

70. , 59. , 63. , 50. , 69. , 52. , 49. , 64.6 , 50. ,

74. , 58. , 67. , 75. , 60. , 55. , 78. , 50.08, 56. ,

68. , 52. , 54. , 52. , 76. , 64.8 , 74.4 , 74.5 , 73.5 ,

57.58, 68. , 69. , 66. , 60.8 ])

# Reshape X and y

X = X.reshape(-1,1)

y = y.reshape(-1,1)

# Print the dimensions of X and y after reshaping

print(X.shape)

print(y.shape)

(50,)

(50,)

# Split X and y into training and test data sets

#random_state--the set of data does not change

from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test = train_test_split(X, y, test_size=0.30, random_state=42)

# Print the dimensions of X_train,X_test,y_train,y_test

print(X_train.shape)

print(y_train.shape)

print(X_test.shape)

print(y_test.shape)

(33, 1)

(33, 1)

(17, 1)

(17, 1)

# Fit the linear model

# Instantiate the linear regression object lm

from sklearn.linear_model import LinearRegression

lm = LinearRegression()

# Train the model using training data sets

lm.fit(X_train,y_train)

# Predict on the test data

y_pred=lm.predict(X_test)

# Visualising the Training set results

plt.scatter(X_train, y_train, color = 'red')

plt.plot(X_train, lm.predict(X_train), color = 'blue')

[<matplotlib.lines.Line2D at 0x22c2d23c430>]

# Visualising the Test set results

plt.scatter(X_test, y_test, color = 'red')

plt.plot(X_test, lm.predict(X_test), color = 'blue')

plt.title('Test set results')

plt.xlabel('Grades')

plt.ylabel('Salary')

plt.show()

# Compute model slope and intercept

slope = lm.coef_

intercept = lm.intercept_,

print("Estimated model slope:" , slope)

print("Estimated model intercept:" , intercept)

Estimated model slope: [[1504.41195413]]

Estimated model intercept: (array([152845.01374103]),)

X_new = [[80]]

lm.predict(X_new)

array([[273197.97007155]])
Colab paid products
-
Cancel contracts here

You might also like