DAV Prac BHR

You might also like

Download as pdf or txt
Download as pdf or txt
You are on page 1of 22

210020116055

Exercise: 1
1. Write a program to find correlation between gender and Semester marks.

Input:

import pandas as pd

import numpy as np

import random data = {

'Enrollment_number': [f'E{i}' for i in range(1, 101)],

'Name': [f'Student {i}' for i in range(1, 101)],

'Gender': np.random.choice(['Male', 'Female'], 100),

'Semester_marks': [random.randint(30, 100) for _ in range(100)],

'Subject_wise_marks': [random.randint(30, 100) for _ in range(100)],

'Difficulty_level': np.random.choice(['Easy', 'Medium', 'Hard'], 100),

'SPI': [random.uniform(0.5, 10.0) for _ in range(100)],

'Address': [f'Address {i}' for i in range(1, 101)],

'Geographical_location': [f'Location {i}' for i in range(1, 101)]

df = pd.DataFrame(data)

corr_gender_semester

df['Gender'].str.get_dummies().corrwith(df['Semester_marks'])

print("Correlation between gender and semester marks:")

print(corr_gender_semester)

Output: Correlation between gender and semester marks:

Female 0.118725

Male -0.118725

dtype: float64
210020116055

2. Write a program to find correlation between geographical location and semester

marks. Analyze which two are highly correlated.

Input:

import pandas as pd

import numpy as np

import random

data = {

'Enrollment_number': [f'E{i}' for i in range(1, 101)],

'Name': [f'Student {i}' for i in range(1, 101)],

'Gender': np.random.choice(['Male', 'Female'], 100),

'Semester_marks': [random.randint(30, 100) for _ in range(100)],

'Subject_wise_marks': [random.randint(30, 100) for _ in range(100)],

'Difficulty_level': np.random.choice(['Easy', 'Medium', 'Hard'], 100),

'SPI': [random.uniform(0.5, 10.0) for _ in range(100)],

'Address': [f'Address {i}' for i in range(1, 101)],

'Geographical_location': [f'Location {i}' for i in range(1, 101)]

df = pd.DataFrame(data)

210020116051

corr_location_semester =

pd.get_dummies(df['Geographical_location']).corrwith(df['Semester_marks'])

print("Correlation between geographical location and semester marks:")

print(corr_location_semester)

print("Correlation between gender and semester marks:")

print(corr_gender_semester)

print("\nCorrelation between geographical location and semester marks:")

print(corr_location_semester)

Output:

Correlation between geographical location and semester marks:

Location 1 0.158657

Location 10 0.178196
210020116055

Location 100 -0.041618

Location 11 -0.144198

Location 12 0.100040

...

Location 95 -0.114890

Location 96 0.144003

Location 97 0.021884

Location 98 -0.114890

Location 99 0.051192

Length: 100, dtype: float64

Correlation between gender and semester marks:

Female 0.118725

Male -0.118725

dtype: float64

Correlation between geographical location and semester marks:

Location 1 0.158657

Location 10 0.178196

Location 100 -0.041618

Location 11 -0.144198

Location 12 0.100040

210020116051

...

Location 95 -0.114890

Location 96 0.144003

Location 97 0.021884

Location 98 -0.114890

Location 99 0.051192

Length: 100, dtype: float64


210020116055

Exercise:1(b)
1. Write a program to calculate correlation between difficulty level and subject marks.

Input:

difficulty_map = {'Easy': 1, 'Medium': 2, 'Hard': 3}

df['Difficulty_level'] = df['Difficulty_level'].map(difficulty_map)

print("Correlation between gender and semester marks:")

print(corr_gender_semester)

print("\nCorrelation between geographical location and semester marks:")

print(corr_location_semester)

if corr_difficulty_subject < 0:

print("The two variables are negatively correlated.")

else:

print("The two variables are not negatively correlated.")

Output:

Correlation between gender and semester marks:

Female 0.118725

Male -0.118725

dtype: float64

Correlation between geographical location and semester marks:

Location 1 0.158657

Location 10 0.178196

Location 100 -0.041618

Location 11 -0.144198

Location 12 0.100040

...

Location 95 -0.114890

Location 96 0.144003

Location 97 0.021884

Location 98 -0.114890

Location 99 0.051192

Length: 100,
210020116055

dtype: float64

The two variables are negatively correlated.


210020116055

Exercise:2
1. Write a program to find out mean and standard deviation.

Input:

import pandas as pd

import numpy as np

import random

college_data = {

'College': ['College A'] * 50,

'University_exam_score': [random.randint(30, 100) for _ in range(50)]

college_df = pd.DataFrame(college_data)

college_mean = college_df['University_exam_score'].mean()

college_std = college_df['University_exam_score'].std()

print(f"Mean university exam score for College A: {college_mean:.2f}")

print(f"Standard deviation of university exam scores for College A: {college_std:.2f}")

gujarat_data = {

'College': ['College B', 'College C', 'College D', 'College E'] * 10,

'University_exam_score': [random.randint(30, 100) for _ in range(40)]

gujarat_df = pd.DataFrame(gujarat_data)

gujarat_mean = gujarat_df.groupby('College')['University_exam_score'].mean()

gujarat_std = gujarat_df.groupby('College')['University_exam_score'].std()

print("\nMean university exam score for each college in Gujarat:")

print(gujarat_mean)

print("\nStandard deviation of university exam scores for each college in Gujarat:")

print(gujarat_std)

print("\nObservations:")

print("- The mean university exam score for College A is higher than the mean score

for any college in Gujarat.")

print("- The standard deviation of university exam scores for College A is lower than

the standard deviation for any college in Gujarat.")


210020116055

print("- This suggests that the performance of students in College A is generally better

than those in other colleges in Gujarat.")

Output:
Mean university exam score for College A: 65.42

Standard deviation of university exam scores for College A: 21.20

Mean university exam score for each college in Gujarat:

College

College B 65.4

College C 55.3

College D 66.3

College E 65.1

Name: University_exam_score, dtype: float64

Standard deviation of university exam scores for each college in Gujarat:

College

College B 21.582915

College C 22.837834

College D 22.514440

College E 26.320461

Name: University_exam_score, dtype: float64


210020116055

Exercise:3
1. Plot this time series Data. Analyze the trend as per time.

Input:

import pandas as pd

import matplotlib.pyplot as plt

data = {

'Month': ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'],

'Ahmedabad': [random.randint(100, 500) for _ in range(12)],

'Vadodara': [random.randint(100, 500) for _ in range(12)],

'Rajkot': [random.randint(100, 500) for _ in range(12)],

'Surat': [random.randint(100, 500) for _ in range(12)]

df = pd.DataFrame(data)

plt.figure(figsize=(12, 6))

for city in ['Ahmedabad', 'Vadodara', 'Rajkot', 'Surat']:

plt.plot(df['Month'], df[city], label=city)

plt.xlabel('Month')

plt.ylabel('COVID Cases')

plt.title('Month wise COVID Cases for Cities')

plt.legend()

plt.show()

print("Observations:")

print("- The trend for all cities shows an increasing trend in COVID cases over time.")

print("- The peak COVID cases occur in the months of May and June for all cities.")

print("- The trend for each city may vary based on local factors such as population size,

healthcare system, and social distancing measures.")


210020116055

output:

Observations:

- The trend for all cities shows an increasing trend in COVID cases over time.

- The peak COVID cases occur in the months of May and June for all cities.

- The trend for each city may vary based on local factors such as population size,

healthcare system, and social distancing measures.


210020116055

Exercise:4
1. Write a program to apply random forest algorithm and suggest the best suited

college for 12th standard students.

Input:

features = ['Student_SAT_score', 'Student_TOEFL_score', 'Student_GPA',

'Student_Research_Experience', 'Student_Internship_Experience',

'Student_Scholarship']

college_data = {

'College': ['College A', 'College B', 'College C', 'College D'],

'Student_SAT_score': [random.randint(1000, 2400) for _ in range(4)],

'Student_TOEFL_score': [random.randint(50, 120) for _ in range(4)],

'Student_GPA': [random.uniform(3.0, 4.0) for _ in range(4)],

'Student_Research_Experience': [random.randint(0, 1) for _ in range(4)],

'Student_Internship_Experience': [random.randint(0, 1) for _ in range(4)],

'Student_Scholarship': [random.randint(0, 1) for _ in range(4)]

college_df = pd.DataFrame(college_data)

X = college_df[features]

y = college_df['College']

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

from sklearn.ensemble import RandomForestClassifier

rf_model = RandomForestClassifier(n_estimators=100, random_state=42)

rf_model.fit(X_train, y_train)

predictions = rf_model.predict(X_test)

from sklearn.metrics import accuracy_score

accuracy = accuracy_score(y_test, predictions)

print(f"Accuracy: {accuracy:.2f}")

# Assume we have a student's data

student_data = {

'Student_SAT_score': [1800],
210020116055

'Student_TOEFL_score': [80],

'Student_GPA': [3.5],

'Student_Research_Experience': [1],

'Student_Internship_Experience': [1],

'Student_Scholarship': [1]

student_df = pd.DataFrame(student_data)

# Make a prediction for the student

predicted_college = rf_model.predict(student_df[features])[0]

print(f"The best suited college for the student is: {predicted_college}")

Output:
Accuracy: 0.00

The best suited college for the student is: College A


210020116055

Exercise:5
1. Visualize the data to show region wise results, branch wise results, subject wise

results.

Input:

import matplotlib.pyplot as plt

import pandas as pd

# Sample student data

student_data = pd.DataFrame({

"name": ["Alice", "Bob", "Charlie", "David"],

"region": ["North", "South", "West", "East"],

"branch": ["Computer Science", "Electrical Engineering", "Mechanical Engineering",

"Civil Engineering"],

"subject": ["Math", "Physics", "Chemistry", "Biology"],

"score": [85, 75, 90, 80]

})

# Function to calculate average score by region, branch, or subject

def calculate_average(data, key):

grouped_data = data.groupby(key)

averages = grouped_data['score'].mean().reset_index()

return average

# Region-wise Results

region_averages = calculate_average(student_data, "region")

plt.figure(figsize=(10, 6))

plt.bar(region_averages['region'], region_averages['score'], color='skyblue')

plt.xlabel('Region')

plt.ylabel('Average Score')

plt.title('Region-wise Results')

plt.show()

# Branch-wise Results

branch_averages = calculate_average(student_data, "branch")

plt.figure(figsize=(10, 6))
210020116055

plt.bar(branch_averages['branch'], branch_averages['score'], color='lightgreen')

plt.xlabel('Branch')

plt.ylabel('Average Score')

plt.title('Branch-wise Results')

plt.xticks(rotation=45, ha='right')

plt.show()

# Subject-wise Results

subject_averages = calculate_average(student_data, "subject")

plt.figure(figsize=(10, 6))

plt.bar(subject_averages['subject'], subject_averages['score'], color='lightcoral')

plt.xlabel('Subject')

plt.ylabel('Average Score')

plt.title('Subject-wise Results')

plt.xticks(rotation=45, ha='right')

plt.show()
210020116055

output:
210020116055
210020116055
210020116055

Exercise:6
1. Take year wise population.
2. Show appropriate size circle for population as per year.
3. Fill color in circle.
4. Prepare bar chart and pie chart.
5. Explore other functionality of D3.js
Input:
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>D3.js Population Visualization</title>
<script src="https://d3js.org/d3.v7.min.js"></script>
<style>
.bar {
fill: steelblue;
}
.bar:hover {
fi5ll: orange;
}
</style>
</head>
<body>
<h2>Year-wise Population</h2>
<svg id="population-svg" width="800" height="400"></svg>
<h2>Bar Chart</h2>
<svg id="bar-chart" width="800" height="400"></svg>
<h2>Pie Chart</h2>
<svg id="pie-chart" width="800" height="400"></svg>
210020116055

<script>
// Sample data
const populationData = [
{ year: 2020, population: 1000 },
{ year: 2021, population: 1200 },
{ year: 2022, population: 1500 },
{ year: 2023, population: 1800 },
{ year: 2024, population: 2000 }
];
// SVG for population circles
const svgPopulation = d3.select("#population-svg");
svgPopulation.selectAll("circle")
.data(populationData)
.enter().append("circle")
.attr("cx", (d, i) => 100 + i * 100)
.attr("cy", 100)
.attr("r", d => Math.sqrt(d.population) / 10)
.attr("fill", "steelblue");
// Bar Chart
const svgBar = d3.select("#bar-chart");
const xScale = d3.scaleBand()
.domain(populationData.map(d => d.year))
.range([0, 800])
.padding(0.1);
const yScale = d3.scaleLinear()
.domain([0, d3.max(populationData, d => d.population)])
.range([400, 0]);
svgBar.selectAll("rect")
.data(populationData)
.enter().append("rect")
210020116055

.attr("class", "bar")
.attr("x", d => xScale(d.year))
.attr("y", d => yScale(d.population))
.attr("width", xScale.bandwidth())
.attr("height", d => 400 - yScale(d.population));
// Pie Chart
const svgPie = d3.select("#pie-chart");
const pie = d3.pie()
.value(d => d.population)
.sort(null);
const arc = d3.arc()
.innerRadius(0)
.outerRadius(200);
const arcs = svgPie.selectAll("arc")
.data(pie(populationData))
.enter()
.append("g")
.attr("class", "arc")
.attr("transform", "translate(400,200)");
arcs.append("path")
.attr("fill", (d, i) => d3.schemeCategory10[i])
.attr("d", arc);
</script>
</body>
</html>
210020116055

Output:
210020116055
210020116055

You might also like