ML Record

You might also like

Download as docx, pdf, or txt
Download as docx, pdf, or txt
You are on page 1of 24

PROGRAM

import csv

hypo = ['%', '%', '%', '%', '%', '%']

# Open the CSV file


with open('ws.csv') as
csv_file: # Create a CSV
reader
readcsv = csv.reader(csv_file, delimiter=',')

# Print the CSV reader object


(optional) print(readcsv)

# Initialize an empty list to store positive examples


data = []
# Print the given training examples print("\
nThe given training examples are:") for
row in readcsv:
print(row)
if row[-1].upper() == "YES":
data.append(row)

# Print the positive examples print("\


nThe positive examples are:") for x
in data:
print(x)
print("\n")

# Initialize variables
TotalExamples = len(data)
d = len(data[0]) - 1

print("The steps of the Find-s algorithm are :\n", hypo)

# Initialize the hypothesis with the first positive


example hypo = data[0][:-1]

# Apply the Find-S algorithm


for i in range(1,
TotalExamples):
for k in range(d):
if hypo[k] != data[i][k]:
hypo[k] = '?'
print(hypo)
# Print the maximally specific Find-S hypothesis
print("\nThe maximally specific Find-s hypothesis for the given training examples is:")
print(hypo)

REG NO: NAME:


OUTPUT

REG NO: NAME:


PROGRAM

import numpy as np
import pandas as pd

# Loading Data from a CSV File


data = pd.DataFrame(data=pd.read_csv('trainingdata_candidate elimination.csv'))
print(data)

# Separating concept features from Target


concepts = np.array(data.iloc[:,0:-1])
print(concepts)

# Isolating target into a separate


DataFrame # copying last column to target
array
target = np.array(data.iloc[:,-1])
print(target)
['Yes' 'Yes' 'No' 'Yes']

def learn(concepts, target):

'''
learn() function implements the learning method of the Candidate elimination algorithm.
Arguments:
concepts - a data frame with all the features
target - a data frame with corresponding output values
'''

# Initialise S0 with the first instance from concepts


# .copy() makes sure a new list is created instead of just pointing to the same memory location
specific_h = concepts[0].copy()
print("\nInitialization of specific_h and general_h")
print(specific_h)
#h=["#" for i in range(0,5)]
#print(h)

general_h = [["?" for i in range(len(specific_h))] for i in range(len(specific_h))]


print(general_h)
# The learning iterations
for i, h in enumerate(concepts):

# Checking if the hypothesis has a positive


target if target[i] == "Yes":
for x in range(len(specific_h)):

# Change values in S & G only if values change


if h[x] != specific_h[x]:
specific_h[x] = '?'
general_h[x][x] = '?'

# Checking if the hypothesis has a positive


target if target[i] == "No":

REG NO: NAME:


for x in range(len(specific_h)):
# For negative hyposthesis change values only in G
if h[x] != specific_h[x]:
general_h[x][x] =
specific_h[x] else:
general_h[x][x] = '?'

print("\nSteps of Candidate Elimination Algorithm",i+1)


print(specific_h)
print(general_h)

# find indices where we have empty rows, meaning those that are
unchanged indices = [i for i, val in enumerate(general_h) if val == ['?', '?',
'?', '?', '?', '?']] for i in indices:
# remove those rows from general_h
general_h.remove(['?', '?', '?', '?', '?',
'?'])
# Return final values
return specific_h, general_h

s_final, g_final = learn(concepts, target)


print("\nFinal Specific_h:", s_final, sep="\n")
print("\nFinal General_h:", g_final, sep="\n")

REG NO: NAME:


OUTPUT

REG NO: NAME:


PROGRAM
import numpy as np
import math
import csv

def read_data(filename):
with open(filename, 'r') as csvfile:
datareader = csv.reader(csvfile,
delimiter=',') headers = next(datareader)
metadata = []
traindata = []
for name in headers:
metadata.append(name)
for row in datareader:
traindata.append(row)
return metadata, traindata

class Node:
def init (self, attribute):
self.attribute = attribute
self.children = []
self.answer = ""

def str (self):


return self.attribute

def subtables(data, col, delete):


unique_items = np.unique(data[:,
col])
count = np.zeros((unique_items.shape[0], 1), dtype=np.int32)

for x in
range(unique_items.shape[0]): for y
in range(data.shape[0]):
if data[y, col] ==
unique_items[x]: count[x] += 1

subtables_dict = {}
for x in range(unique_items.shape[0]):
subtables_dict[unique_items[x]] = np.empty((int(count[x]), data.shape[1]), dtype="|S32")
pos = 0
for y in range(data.shape[0]):
if data[y, col] == unique_items[x]:
subtables_dict[unique_items[x]][pos] =
data[y] pos += 1
if delete:
subtables_dict[unique_items[x]] = np.delete(subtables_dict[unique_items[x]], col, 1)
return unique_items, subtables_dict

def entropy(S):
items = np.unique(S)
if items.size == 1:
return 0
REG NO: NAME:
counts = np.zeros((items.shape[0],
1)) sums = 0
for x in range(items.shape[0]):
counts[x] = sum(S == items[x]) / (S.size * 1.0)
for count in counts:
sums += -1 * count * math.log(count, 2)
return sums

def gain_ratio(data, col):


items, subtables_dict = subtables(data, col, delete=False)
total_size = data.shape[0]
entropies = np.zeros((items.shape[0],
1)) intrinsic = np.zeros((items.shape[0],
1))

for x in range(items.shape[0]):
ratio = subtables_dict[items[x]].shape[0] / (total_size * 1.0)
entropies[x] = ratio * entropy(subtables_dict[items[x]][:, -1])
intrinsic[x] = ratio * math.log(ratio, 2)

total_entropy = entropy(data[:, -1])


iv = -1 * sum(intrinsic)

for x in
range(entropies.shape[0]):
total_entropy -= entropies[x]

return total_entropy / iv

def create_node(data, metadata):


if (np.unique(data[:, -1])).shape[0] == 1:
node = Node("")
node.answer = np.unique(data[:, -1])[0]
return node

gains = np.zeros((data.shape[1] - 1, 1))


for col in range(data.shape[1] - 1):
gains[col] = gain_ratio(data, col)

split = np.argmax(gains)
node =
Node(metadata[split])
metadata = np.delete(metadata, split, 0)
items, subtables_dict = subtables(data, split, delete=True)

for x in range(items.shape[0]):
child = create_node(subtables_dict[items[x]], metadata)
node.children.append((items[x], child))

return node

def empty(size):
s = ""
for x in
REG NO: NAME:
range(size): s +=
""

REG NO: NAME:


return s

def print_tree(node,
level): if node.answer !
= "":
print(empty(level), node.answer)
return
print(empty(level), node.attribute)
for value, n in node.children:
print(empty(level + 1),
value) print_tree(n, level + 2)

metadata, traindata =
read_data("Tennis.csv") data =
np.array(traindata)
node = create_node(data, metadata)
print_tree(node, 0)

OUTPUT

REG NO: NAME:


PROGRAM

from math import exp


from random import seed
from random import random

# Initialize a network
def initialize_network(n_inputs, n_hidden, n_outputs):
network = list()
hidden_layer = [{'weights':[random() for i in range(n_inputs + 1)]} for i in range(n_hidden)]
network.append(hidden_layer)
output_layer = [{'weights':[random() for i in range(n_hidden + 1)]} for i in range(n_outputs)]
network.append(output_layer)
return network

# Calculate neuron activation for an


input def activate(weights, inputs):
activation = weights[-1]
for i in range(len(weights)-1):
activation += weights[i] *
inputs[i]
return activation

# Transfer neuron activation


def transfer(activation):
return 1.0 / (1.0 + exp(-activation))

# Forward propagate input to a network output


def forward_propagate(network, row):
inputs = row
for layer in network:
new_inputs = []
for neuron in
layer:
activation = activate(neuron['weights'], inputs)
neuron['output'] = transfer(activation)
new_inputs.append(neuron['output'])
inputs = new_inputs
return inputs

# Calculate the derivative of a neuron output


def transfer_derivative(output):
return output * (1.0 - output)

# Backpropagate error and store in neurons


def backward_propagate_error(network, expected):
for i in reversed(range(len(network))):
layer = network[i]
errors = list()
if i != len(network)-1:
for j in
range(len(layer)):
error = 0.0
for neuron in network[i + 1]:
REG NO: NAME:
error += (neuron['weights'][j] * neuron['delta'])

REG NO: NAME:


errors.append(error)
else:
for j in
range(len(layer)):
neuron = layer[j]
errors.append(neuron['output'] - expected[j])
for j in range(len(layer)):
neuron = layer[j]
neuron['delta'] = errors[j] * transfer_derivative(neuron['output'])

# Update network weights with error


def update_weights(network, row, l_rate):
for i in range(len(network)):
inputs = row[:-1]
if i != 0:
inputs = [neuron['output'] for neuron in network[i - 1]]
for neuron in network[i]:
for j in range(len(inputs)):
neuron['weights'][j] -= l_rate * neuron['delta'] *
inputs[j] neuron['weights'][-1] -= l_rate * neuron['delta']

# Train a network for a fixed number of epochs


def train_network(network, train, l_rate, n_epoch, n_outputs):
for epoch in range(n_epoch):
sum_error = 0
for row in train:
outputs = forward_propagate(network, row)
expected = [0 for i in range(n_outputs)]
expected[row[-1]] = 1
sum_error += sum([(expected[i] - outputs[i]) ** 2 for i in range(len(expected))])
backward_propagate_error(network, expected)
update_weights(network, row, l_rate)
print('>epoch=%d, lrate=%.3f, error=%.3f' % (epoch, l_rate, sum_error))

# Test training backprop algorithm


seed(1)
dataset = [
[2.7810836, 2.550537003, 0],
[1.465489372, 2.362125076, 0],
[3.396561688, 4.400293529, 0],
[1.38807019, 1.850220317, 0],
[3.06407232, 3.005305973, 0],
[7.627531214, 2.759262235, 1],
[5.332441248, 2.088626775, 1],
[6.922596716, 1.77106367, 1],
[8.675418651, -0.242068655, 1],
[7.673756466, 3.508563011, 1]
]
n_inputs = len(dataset[0]) - 1
n_outputs = len(set([row[-1] for row in dataset]))

network = initialize_network(n_inputs, 2, n_outputs)


train_network(network, dataset, 0.5, 20, n_outputs)

REG NO: NAME:


for layer in network:
print(layer)

OUTPUT

REG NO: NAME:


PROGRAM

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score

# Load the dataset


data = pd.read_csv(r"Tennis.csv")
print("The first 5 values of data is:\n", data.head())

# Extract features (X) and target variable


(y) X = data.iloc[:, :-1]
print("\nThe first 5 values of train data is\n", X.head())
y = data.iloc[:, -1]
print("\nThe first 5 values of Train output is\n", y.head())

# Encode categorical variables using LabelEncoder


le_outlook = LabelEncoder()
X['outlook'] = le_outlook.fit_transform(X['outlook'])
le_Temperature = LabelEncoder()
X['temp'] = le_Temperature.fit_transform(X['temp'])
le_Humidity = LabelEncoder()
X['humidity'] = le_Humidity.fit_transform(X['humidity'])
le_Windy = LabelEncoder()
X['windy'] =

le_Windy.fit_transform(X['windy']) print("\

nNow the Train data is:\n", X.head())

le_PlayTennis = LabelEncoder()
y = le_PlayTennis.fit_transform(y)
print("\nNow the Train output is\n",
y)

# Split the dataset into training and testing sets


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

# Create and train the Gaussian Naive Bayes classifier


classifier = GaussianNB()
classifier.fit(X_train, y_train)

# Evaluate the classifier's accuracy on the test set


y_pred = classifier.predict(X_test)
print("Accuracy is:", accuracy_score(y_test, y_pred))

REG NO: NAME:


OUTPUT

REG NO: NAME:


PROGRAM

from sklearn.datasets import fetch_20newsgroups


from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Define categories for text classification


categories = ['alt.atheism', 'soc.religion.christian', 'comp.graphics', 'sci.med']

# Fetch the training and testing datasets


twenty_train = fetch_20newsgroups(subset='train', categories=categories, shuffle=True,
remove=('headers', 'footers', 'quotes'))
twenty_test = fetch_20newsgroups(subset='test', categories=categories, shuffle=True,
remove=('headers', 'footers', 'quotes'))

# Convert text data to feature vectors using


CountVectorizer count_vect = CountVectorizer()
X_train_tf = count_vect.fit_transform(twenty_train.data)

# Apply TF-IDF transformation to the feature vectors


tfidf_transformer = TfidfTransformer()
X_train_tfidf = tfidf_transformer.fit_transform(X_train_tf)

# Train a Multinomial Naive Bayes classifier


mod = MultinomialNB()
mod.fit(X_train_tfidf, twenty_train.target)

# Transform the test data to feature vectors and apply TF-IDF transformation
X_test_tf = count_vect.transform(twenty_test.data)
X_test_tfidf = tfidf_transformer.transform(X_test_tf)

# Make predictions on the test set


predicted =
mod.predict(X_test_tfidf)

# Evaluate the performance of the classifier


print("Accuracy:", accuracy_score(twenty_test.target, predicted)) print("\nClassificationReport:\
n",classification_report(twenty_test.target,predicted,target_names=twen ty_test.target_names))
print("\nConfusion Matrix:\n", confusion_matrix(twenty_test.target, predicted))

REG NO: NAME:


OUTPUT

REG NO: NAME:


PROGRAM

import pandas as pd
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.models import BayesianModel
from pgmpy.inference import VariableElimination

data =
pd.read_csv("heartdisease.csv")
heart_disease = pd.DataFrame(data)
print(heart_disease)

model = BayesianModel([
('age', 'Lifestyle'),
('Gender', 'Lifestyle'),
('Family', 'heartdisease'),
('diet', 'cholestrol'),
('Lifestyle', 'diet'),
('cholestrol', 'heartdisease'),
('diet', 'cholestrol')
])

model.fit(heart_disease, estimator=MaximumLikelihoodEstimator)

HeartDisease_infer = VariableElimination(model)

print('For Age enter SuperSeniorCitizen:0, SeniorCitizen:1, MiddleAged:2, Youth:3, Teen:4')


print('For Gender enter Male:0, Female:1')
print('For Family History enter Yes:1, No:0')
print('For Diet enter High:0, Medium:1')
print('for LifeStyle enter Athlete:0, Active:1, Moderate:2, Sedentary:3')
print('for Cholesterol enter High:0, BorderLine:1, Normal:2')

q = HeartDisease_infer.query(variables=['heartdisease'], evidence={
'age': int(input('Enter Age: ')),
'Gender': int(input('Enter Gender: ')),
'Family': int(input('Enter Family History: ')),
'diet': int(input('Enter Diet: ')),
'Lifestyle': int(input('Enter Lifestyle: ')),
'cholestrol': int(input('Enter Cholestrol: '))
})

print(q)

probability_heart_disease = q.values[1] # Assuming 1 is the index for heart disease being present
print(f'Probability of Heart Disease: {probability_heart_disease}')

REG NO: NAME:


OUTPUT

REG NO: NAME:


PROGRAM

from sklearn.cluster import


KMeans from sklearn import
preprocessing
from sklearn.mixture import GaussianMixture
from sklearn.datasets import load_iris
import sklearn.metrics as sm
import pandas as pd
import numpy as np
import matplotlib.pyplot as

plt dataset = load_iris()

X = pd.DataFrame(dataset.data)
X.columns = ['Sepal_Length', 'Sepal_Width', 'Petal_Length', 'Petal_Width']
y = pd.DataFrame(dataset.target)
y.columns = ['Targets']

plt.figure(figsize=(14, 7))
colormap = np.array(['red', 'lime', 'black'])

# REAL PLOT
plt.subplot(1, 3, 1)
plt.scatter(X.Petal_Length, X.Petal_Width, c=colormap[y.Targets], s=40)
plt.title('Real')

# K-PLOT
plt.subplot(1, 3, 2)
model = KMeans(n_clusters=3, n_init=10) # Explicitly set n_init
model.fit(X)
predY = np.choose(model.labels_, [0, 1, 2]).astype(np.int64)
plt.scatter(X.Petal_Length, X.Petal_Width, c=colormap[predY], s=40)
plt.title('KMeans')

# GMM PLOT
scaler = preprocessing.StandardScaler()
scaler.fit(X)
xsa = scaler.transform(X)
xs = pd.DataFrame(xsa,
columns=X.columns) gmm =
GaussianMixture(n_components=3)
gmm.fit(xs)
y_cluster_gmm =
gmm.predict(xs) plt.subplot(1, 3,
3)
plt.scatter(X.Petal_Length, X.Petal_Width, c=colormap[y_cluster_gmm], s=40)
plt.title('GMM Classification')

# Display the plots


plt.tight_layout()
plt.show()

REG NO: NAME:


OUTPUT

REG NO: NAME:


PROGRAM

from sklearn.datasets import load_iris


from sklearn.neighbors import
KNeighborsClassifier from sklearn.model_selection
import train_test_split import numpy as np
dataset=load_iris()
#print(dataset)
X_train,X_test,y_train,y_test=train_test_split(dataset["data"],dataset["target"],random_state=0)
kn=KNeighborsClassifier(n_neighbors=1)
kn.fit(X_train,y_train)
KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
metric_params=None, n_jobs=None, n_neighbors=1, p=2,
weights='uniform')
for i in range(len(X_test)):
x=X_test[i]
x_new=np.array([x])
prediction=kn.predict(x_new)

print("TARGET=",y_test[i],dataset["target_names"][y_test[i]],"PREDICTED=",prediction,dataset["ta
rget_names"][prediction])
print(kn.score(X_test,y_test))

OUTPUT

REG NO: NAME:


PROGRAM

from math import ceil


import numpy as np
from scipy import
linalg
import matplotlib.pyplot as plt

def lowess(x, y, f, iterations):


n = len(x)
r = int(ceil(f * n))
h = [np.sort(np.abs(x - x[i]))[r] for i in range(n)]

w = np.clip(np.abs((x[:, None] - x[None, :]) / h), 0.0, 1.0)


w = (1 - w ** 3) ** 3
yest = np.zeros(n)
delta = np.ones(n)

for iteration in
range(iterations): for i in
range(n):
weights = delta * w[:, i]
b = np.array([np.sum(weights * y), np.sum(weights * y * x)])
A = np.array([[np.sum(weights), np.sum(weights * x)],
[np.sum(weights * x), np.sum(weights * x * x)]])
beta = linalg.solve(A, b)
yest[i] = beta[0] + beta[1] * x[i]

residuals = y - yest
s = np.median(np.abs(residuals))
delta = np.clip(residuals / (6.0 * s), -1, 1)
delta = (1 - delta ** 2) ** 2

return yest

import math

n = 100
x = np.linspace(0, 2 * math.pi, n)
y = np.sin(x) + 0.3 *
np.random.randn(n) f = 0.25
iterations = 3

yest = lowess(x, y, f, iterations)

import matplotlib.pyplot as
plt plt.plot(x,y,"r.")
plt.plot(x,yest,"b-")

REG NO: NAME:


OUTPUT

REG NO: NAME:

You might also like