ML Record

PROGRAM
import csv
hypo = ['%', '%', '%', '%', '%', '%']
# Open the CSV file

with open('ws.csv') as
csv_file: # Create a CSV
reader
readcsv = csv.reader(csv_file, delimiter=',')
# Print the CSV reader object

(optional) print(readcsv)
# Initialize an empty list to store positive examples

data = []
# Print the given training examples print("\
nThe given training examples are:") for
row in readcsv:
print(row)
if row[-1].upper() == "YES":
data.append(row)
# Print the positive examples print("\

nThe positive examples are:") for x
in data:
print(x)
print("\n")
# Initialize variables
TotalExamples = len(data)
d = len(data[0]) - 1
print("The steps of the Find-s algorithm are :\n", hypo)
# Initialize the hypothesis with the first positive

example hypo = data[0][:-1]
# Apply the Find-S algorithm

for i in range(1,
TotalExamples):
for k in range(d):
if hypo[k] != data[i][k]:
hypo[k] = '?'
print(hypo)
# Print the maximally specific Find-S hypothesis
print("\nThe maximally specific Find-s hypothesis for the given training examples is:")
print(hypo)
REG NO: NAME:

OUTPUT
REG NO: NAME:

PROGRAM
import numpy as np
import pandas as pd
# Loading Data from a CSV File

data = pd.DataFrame(data=pd.read_csv('trainingdata_candidate elimination.csv'))
print(data)
# Separating concept features from Target

concepts = np.array(data.iloc[:,0:-1])
print(concepts)
# Isolating target into a separate

DataFrame # copying last column to target
array
target = np.array(data.iloc[:,-1])
print(target)
['Yes' 'Yes' 'No' 'Yes']
def learn(concepts, target):
'''
learn() function implements the learning method of the Candidate elimination algorithm.
Arguments:
concepts - a data frame with all the features
target - a data frame with corresponding output values
'''
# Initialise S0 with the first instance from concepts

# .copy() makes sure a new list is created instead of just pointing to the same memory location
specific_h = concepts[0].copy()
print("\nInitialization of specific_h and general_h")
print(specific_h)
#h=["#" for i in range(0,5)]
#print(h)
general_h = [["?" for i in range(len(specific_h))] for i in range(len(specific_h))]

print(general_h)
# The learning iterations
for i, h in enumerate(concepts):
# Checking if the hypothesis has a positive

target if target[i] == "Yes":
for x in range(len(specific_h)):
# Change values in S & G only if values change

if h[x] != specific_h[x]:
specific_h[x] = '?'
general_h[x][x] = '?'
# Checking if the hypothesis has a positive

target if target[i] == "No":
REG NO: NAME:

for x in range(len(specific_h)):
# For negative hyposthesis change values only in G
if h[x] != specific_h[x]:
general_h[x][x] =
specific_h[x] else:
general_h[x][x] = '?'
print("\nSteps of Candidate Elimination Algorithm",i+1)

print(specific_h)
print(general_h)
# find indices where we have empty rows, meaning those that are
unchanged indices = [i for i, val in enumerate(general_h) if val == ['?', '?',
'?', '?', '?', '?']] for i in indices:
# remove those rows from general_h
general_h.remove(['?', '?', '?', '?', '?',
'?'])
# Return final values
return specific_h, general_h
s_final, g_final = learn(concepts, target)

print("\nFinal Specific_h:", s_final, sep="\n")
print("\nFinal General_h:", g_final, sep="\n")
REG NO: NAME:

OUTPUT
REG NO: NAME:

PROGRAM
import numpy as np
import math
import csv
def read_data(filename):
with open(filename, 'r') as csvfile:
datareader = csv.reader(csvfile,
delimiter=',') headers = next(datareader)
metadata = []
traindata = []
for name in headers:
metadata.append(name)
for row in datareader:
traindata.append(row)
return metadata, traindata
class Node:
def init (self, attribute):
self.attribute = attribute
self.children = []
self.answer = ""
def str (self):

return self.attribute
def subtables(data, col, delete):

unique_items = np.unique(data[:,
col])
count = np.zeros((unique_items.shape[0], 1), dtype=np.int32)
for x in
range(unique_items.shape[0]): for y
in range(data.shape[0]):
if data[y, col] ==
unique_items[x]: count[x] += 1
subtables_dict = {}
for x in range(unique_items.shape[0]):
subtables_dict[unique_items[x]] = np.empty((int(count[x]), data.shape[1]), dtype="|S32")
pos = 0
for y in range(data.shape[0]):
if data[y, col] == unique_items[x]:
subtables_dict[unique_items[x]][pos] =
data[y] pos += 1
if delete:
subtables_dict[unique_items[x]] = np.delete(subtables_dict[unique_items[x]], col, 1)
return unique_items, subtables_dict
def entropy(S):
items = np.unique(S)
if items.size == 1:
return 0
REG NO: NAME:
counts = np.zeros((items.shape[0],
1)) sums = 0
for x in range(items.shape[0]):
counts[x] = sum(S == items[x]) / (S.size * 1.0)
for count in counts:
sums += -1 * count * math.log(count, 2)
return sums
def gain_ratio(data, col):

items, subtables_dict = subtables(data, col, delete=False)
total_size = data.shape[0]
entropies = np.zeros((items.shape[0],
1)) intrinsic = np.zeros((items.shape[0],
1))
ratio = subtables_dict[items[x]].shape[0] / (total_size * 1.0)
entropies[x] = ratio * entropy(subtables_dict[items[x]][:, -1])
intrinsic[x] = ratio * math.log(ratio, 2)
total_entropy = entropy(data[:, -1])

iv = -1 * sum(intrinsic)
for x in
range(entropies.shape[0]):
total_entropy -= entropies[x]
return total_entropy / iv
def create_node(data, metadata):

if (np.unique(data[:, -1])).shape[0] == 1:
node = Node("")
node.answer = np.unique(data[:, -1])[0]
return node
gains = np.zeros((data.shape[1] - 1, 1))

for col in range(data.shape[1] - 1):
gains[col] = gain_ratio(data, col)
split = np.argmax(gains)
node =
Node(metadata[split])
metadata = np.delete(metadata, split, 0)
items, subtables_dict = subtables(data, split, delete=True)
child = create_node(subtables_dict[items[x]], metadata)
node.children.append((items[x], child))
return node
def empty(size):
s = ""
for x in
REG NO: NAME:
range(size): s +=
""
REG NO: NAME:

return s
def print_tree(node,
level): if node.answer !
= "":
print(empty(level), node.answer)
return
print(empty(level), node.attribute)
for value, n in node.children:
print(empty(level + 1),
value) print_tree(n, level + 2)
metadata, traindata =
read_data("Tennis.csv") data =
np.array(traindata)
node = create_node(data, metadata)
print_tree(node, 0)
OUTPUT
REG NO: NAME:

PROGRAM
from math import exp

from random import seed
from random import random
# Initialize a network
def initialize_network(n_inputs, n_hidden, n_outputs):
network = list()
hidden_layer = [{'weights':[random() for i in range(n_inputs + 1)]} for i in range(n_hidden)]
network.append(hidden_layer)
output_layer = [{'weights':[random() for i in range(n_hidden + 1)]} for i in range(n_outputs)]
network.append(output_layer)
return network
# Calculate neuron activation for an

input def activate(weights, inputs):
activation = weights[-1]
for i in range(len(weights)-1):
activation += weights[i] *
inputs[i]
return activation
# Transfer neuron activation

def transfer(activation):
return 1.0 / (1.0 + exp(-activation))
# Forward propagate input to a network output

def forward_propagate(network, row):
inputs = row
for layer in network:
new_inputs = []
for neuron in
layer:
activation = activate(neuron['weights'], inputs)
neuron['output'] = transfer(activation)
new_inputs.append(neuron['output'])
inputs = new_inputs
return inputs
# Calculate the derivative of a neuron output

def transfer_derivative(output):
return output * (1.0 - output)
# Backpropagate error and store in neurons

def backward_propagate_error(network, expected):
for i in reversed(range(len(network))):
layer = network[i]
errors = list()
if i != len(network)-1:
for j in
range(len(layer)):
error = 0.0
for neuron in network[i + 1]:
REG NO: NAME:
error += (neuron['weights'][j] * neuron['delta'])
REG NO: NAME:

errors.append(error)
else:
for j in
range(len(layer)):
neuron = layer[j]
errors.append(neuron['output'] - expected[j])
for j in range(len(layer)):
neuron = layer[j]
neuron['delta'] = errors[j] * transfer_derivative(neuron['output'])
# Update network weights with error

def update_weights(network, row, l_rate):
for i in range(len(network)):
inputs = row[:-1]
if i != 0:
inputs = [neuron['output'] for neuron in network[i - 1]]
for neuron in network[i]:
for j in range(len(inputs)):
neuron['weights'][j] -= l_rate * neuron['delta'] *
inputs[j] neuron['weights'][-1] -= l_rate * neuron['delta']
# Train a network for a fixed number of epochs

def train_network(network, train, l_rate, n_epoch, n_outputs):
for epoch in range(n_epoch):
sum_error = 0
for row in train:
outputs = forward_propagate(network, row)
expected = [0 for i in range(n_outputs)]
expected[row[-1]] = 1
sum_error += sum([(expected[i] - outputs[i]) ** 2 for i in range(len(expected))])
backward_propagate_error(network, expected)
update_weights(network, row, l_rate)
print('>epoch=%d, lrate=%.3f, error=%.3f' % (epoch, l_rate, sum_error))
# Test training backprop algorithm

seed(1)
dataset = [
[2.7810836, 2.550537003, 0],
[1.465489372, 2.362125076, 0],
[3.396561688, 4.400293529, 0],
[1.38807019, 1.850220317, 0],
[3.06407232, 3.005305973, 0],
[7.627531214, 2.759262235, 1],
[5.332441248, 2.088626775, 1],
[6.922596716, 1.77106367, 1],
[8.675418651, -0.242068655, 1],
[7.673756466, 3.508563011, 1]
]
n_inputs = len(dataset[0]) - 1
n_outputs = len(set([row[-1] for row in dataset]))
network = initialize_network(n_inputs, 2, n_outputs)

train_network(network, dataset, 0.5, 20, n_outputs)
REG NO: NAME:

for layer in network:
print(layer)
OUTPUT
REG NO: NAME:

PROGRAM
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
# Load the dataset

data = pd.read_csv(r"Tennis.csv")
print("The first 5 values of data is:\n", data.head())
# Extract features (X) and target variable

(y) X = data.iloc[:, :-1]
print("\nThe first 5 values of train data is\n", X.head())
y = data.iloc[:, -1]
print("\nThe first 5 values of Train output is\n", y.head())
# Encode categorical variables using LabelEncoder

le_outlook = LabelEncoder()
X['outlook'] = le_outlook.fit_transform(X['outlook'])
le_Temperature = LabelEncoder()
X['temp'] = le_Temperature.fit_transform(X['temp'])
le_Humidity = LabelEncoder()
X['humidity'] = le_Humidity.fit_transform(X['humidity'])
le_Windy = LabelEncoder()
X['windy'] =
le_Windy.fit_transform(X['windy']) print("\
nNow the Train data is:\n", X.head())
le_PlayTennis = LabelEncoder()
y = le_PlayTennis.fit_transform(y)
print("\nNow the Train output is\n",
y)
# Split the dataset into training and testing sets

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)
# Create and train the Gaussian Naive Bayes classifier

classifier = GaussianNB()
classifier.fit(X_train, y_train)
# Evaluate the classifier's accuracy on the test set

y_pred = classifier.predict(X_test)
print("Accuracy is:", accuracy_score(y_test, y_pred))
REG NO: NAME:

OUTPUT
REG NO: NAME:

PROGRAM
from sklearn.datasets import fetch_20newsgroups

from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
# Define categories for text classification

categories = ['alt.atheism', 'soc.religion.christian', 'comp.graphics', 'sci.med']
# Fetch the training and testing datasets

twenty_train = fetch_20newsgroups(subset='train', categories=categories, shuffle=True,
remove=('headers', 'footers', 'quotes'))
twenty_test = fetch_20newsgroups(subset='test', categories=categories, shuffle=True,
remove=('headers', 'footers', 'quotes'))
# Convert text data to feature vectors using

CountVectorizer count_vect = CountVectorizer()
X_train_tf = count_vect.fit_transform(twenty_train.data)
# Apply TF-IDF transformation to the feature vectors

tfidf_transformer = TfidfTransformer()
X_train_tfidf = tfidf_transformer.fit_transform(X_train_tf)
# Train a Multinomial Naive Bayes classifier

mod = MultinomialNB()
mod.fit(X_train_tfidf, twenty_train.target)
# Transform the test data to feature vectors and apply TF-IDF transformation
X_test_tf = count_vect.transform(twenty_test.data)
X_test_tfidf = tfidf_transformer.transform(X_test_tf)
# Make predictions on the test set

predicted =
mod.predict(X_test_tfidf)
# Evaluate the performance of the classifier

print("Accuracy:", accuracy_score(twenty_test.target, predicted)) print("\nClassificationReport:\
n",classification_report(twenty_test.target,predicted,target_names=twen ty_test.target_names))
print("\nConfusion Matrix:\n", confusion_matrix(twenty_test.target, predicted))
REG NO: NAME:

OUTPUT
REG NO: NAME:

PROGRAM
import pandas as pd
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.models import BayesianModel
from pgmpy.inference import VariableElimination
data =
pd.read_csv("heartdisease.csv")
heart_disease = pd.DataFrame(data)
print(heart_disease)
model = BayesianModel([
('age', 'Lifestyle'),
('Gender', 'Lifestyle'),
('Family', 'heartdisease'),
('diet', 'cholestrol'),
('Lifestyle', 'diet'),
('cholestrol', 'heartdisease'),
('diet', 'cholestrol')
])
model.fit(heart_disease, estimator=MaximumLikelihoodEstimator)
HeartDisease_infer = VariableElimination(model)
print('For Age enter SuperSeniorCitizen:0, SeniorCitizen:1, MiddleAged:2, Youth:3, Teen:4')

print('For Gender enter Male:0, Female:1')
print('For Family History enter Yes:1, No:0')
print('For Diet enter High:0, Medium:1')
print('for LifeStyle enter Athlete:0, Active:1, Moderate:2, Sedentary:3')
print('for Cholesterol enter High:0, BorderLine:1, Normal:2')
q = HeartDisease_infer.query(variables=['heartdisease'], evidence={
'age': int(input('Enter Age: ')),
'Gender': int(input('Enter Gender: ')),
'Family': int(input('Enter Family History: ')),
'diet': int(input('Enter Diet: ')),
'Lifestyle': int(input('Enter Lifestyle: ')),
'cholestrol': int(input('Enter Cholestrol: '))
})
print(q)
probability_heart_disease = q.values[1] # Assuming 1 is the index for heart disease being present
print(f'Probability of Heart Disease: {probability_heart_disease}')
REG NO: NAME:

OUTPUT
REG NO: NAME:

PROGRAM
from sklearn.cluster import

KMeans from sklearn import
preprocessing
from sklearn.mixture import GaussianMixture
from sklearn.datasets import load_iris
import sklearn.metrics as sm
import pandas as pd
import numpy as np
import matplotlib.pyplot as
plt dataset = load_iris()
X = pd.DataFrame(dataset.data)
X.columns = ['Sepal_Length', 'Sepal_Width', 'Petal_Length', 'Petal_Width']
y = pd.DataFrame(dataset.target)
y.columns = ['Targets']
plt.figure(figsize=(14, 7))
colormap = np.array(['red', 'lime', 'black'])
# REAL PLOT
plt.subplot(1, 3, 1)
plt.scatter(X.Petal_Length, X.Petal_Width, c=colormap[y.Targets], s=40)
plt.title('Real')
# K-PLOT
plt.subplot(1, 3, 2)
model = KMeans(n_clusters=3, n_init=10) # Explicitly set n_init
model.fit(X)
predY = np.choose(model.labels_, [0, 1, 2]).astype(np.int64)
plt.scatter(X.Petal_Length, X.Petal_Width, c=colormap[predY], s=40)
plt.title('KMeans')
# GMM PLOT
scaler = preprocessing.StandardScaler()
scaler.fit(X)
xsa = scaler.transform(X)
xs = pd.DataFrame(xsa,
columns=X.columns) gmm =
GaussianMixture(n_components=3)
gmm.fit(xs)
y_cluster_gmm =
gmm.predict(xs) plt.subplot(1, 3,
3)
plt.scatter(X.Petal_Length, X.Petal_Width, c=colormap[y_cluster_gmm], s=40)
plt.title('GMM Classification')
# Display the plots

plt.tight_layout()
plt.show()
REG NO: NAME:

OUTPUT
REG NO: NAME:

PROGRAM
from sklearn.datasets import load_iris

from sklearn.neighbors import
KNeighborsClassifier from sklearn.model_selection
import train_test_split import numpy as np
dataset=load_iris()
#print(dataset)
X_train,X_test,y_train,y_test=train_test_split(dataset["data"],dataset["target"],random_state=0)
kn=KNeighborsClassifier(n_neighbors=1)
kn.fit(X_train,y_train)
KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
metric_params=None, n_jobs=None, n_neighbors=1, p=2,
weights='uniform')
for i in range(len(X_test)):
x=X_test[i]
x_new=np.array([x])
prediction=kn.predict(x_new)
print("TARGET=",y_test[i],dataset["target_names"][y_test[i]],"PREDICTED=",prediction,dataset["ta
rget_names"][prediction])
print(kn.score(X_test,y_test))
OUTPUT
REG NO: NAME:

PROGRAM
from math import ceil

import numpy as np
from scipy import
linalg
import matplotlib.pyplot as plt
def lowess(x, y, f, iterations):

n = len(x)
r = int(ceil(f * n))
h = [np.sort(np.abs(x - x[i]))[r] for i in range(n)]
w = np.clip(np.abs((x[:, None] - x[None, :]) / h), 0.0, 1.0)

w = (1 - w ** 3) ** 3
yest = np.zeros(n)
delta = np.ones(n)
for iteration in
range(iterations): for i in
range(n):
weights = delta * w[:, i]
b = np.array([np.sum(weights * y), np.sum(weights * y * x)])
A = np.array([[np.sum(weights), np.sum(weights * x)],
[np.sum(weights * x), np.sum(weights * x * x)]])
beta = linalg.solve(A, b)
yest[i] = beta[0] + beta[1] * x[i]
residuals = y - yest
s = np.median(np.abs(residuals))
delta = np.clip(residuals / (6.0 * s), -1, 1)
delta = (1 - delta ** 2) ** 2
return yest
import math
n = 100
x = np.linspace(0, 2 * math.pi, n)
y = np.sin(x) + 0.3 *
np.random.randn(n) f = 0.25
iterations = 3
yest = lowess(x, y, f, iterations)
import matplotlib.pyplot as
plt plt.plot(x,y,"r.")
plt.plot(x,yest,"b-")
REG NO: NAME:

OUTPUT
REG NO: NAME:

ML Record

Uploaded by

Copyright:

Available Formats

You might also like

ML Record

Uploaded by

Document Information

Original Title

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

ML Record

Uploaded by

Copyright:

Available Formats

PROGRAM

hypo = ['%', '%', '%', '%', '%', '%']

# Open the CSV file

# Print the CSV reader object

# Initialize an empty list to store positive examples

# Print the positive examples print("\

print("The steps of the Find-s algorithm are :\n", hypo)

# Initialize the hypothesis with the first positive

# Apply the Find-S algorithm

REG NO: NAME:

REG NO: NAME:

# Loading Data from a CSV File

# Separating concept features from Target

# Isolating target into a separate

def learn(concepts, target):

# Initialise S0 with the first instance from concepts

general_h = [["?" for i in range(len(specific_h))] for i in range(len(specific_h))]

# Checking if the hypothesis has a positive

# Change values in S & G only if values change

# Checking if the hypothesis has a positive

REG NO: NAME:

print("\nSteps of Candidate Elimination Algorithm",i+1)

s_final, g_final = learn(concepts, target)

REG NO: NAME:

REG NO: NAME:

def str (self):

def subtables(data, col, delete):

def gain_ratio(data, col):

total_entropy = entropy(data[:, -1])

def create_node(data, metadata):

gains = np.zeros((data.shape[1] - 1, 1))

REG NO: NAME:

REG NO: NAME:

from math import exp

# Calculate neuron activation for an

# Transfer neuron activation

# Forward propagate input to a network output

# Calculate the derivative of a neuron output

# Backpropagate error and store in neurons

REG NO: NAME:

# Update network weights with error

# Train a network for a fixed number of epochs

# Test training backprop algorithm

network = initialize_network(n_inputs, 2, n_outputs)

REG NO: NAME:

REG NO: NAME:

# Load the dataset

# Extract features (X) and target variable

# Encode categorical variables using LabelEncoder

nNow the Train data is:\n", X.head())

# Split the dataset into training and testing sets

# Create and train the Gaussian Naive Bayes classifier

# Evaluate the classifier's accuracy on the test set

REG NO: NAME:

REG NO: NAME:

from sklearn.datasets import fetch_20newsgroups

# Define categories for text classification

# Fetch the training and testing datasets