Download as pdf or txt
Download as pdf or txt
You are on page 1of 42

Data Analysis And Tools MScIT Part 1 | Sem 1

Practical No 1

Create a database and execute sql queries using sqlite3

1) Creation of Database

First we create a database by typing the command: sqlite3 AnyName.db where


AnyName can be substituted with the name of your choice.
Then we check to see if the database we created is attached by typing the command:
sqlite> .databases

2) Queries for performing DDL commands.

sqlite> CREATE TABLE COMPANY( ID INT PRIMARY KEY NOT NULL,


NAME TEXT NOT NULL,
AGE INT NOT NULL,
ADDRESS CHAR(50),
SALARY REAL
);

sqlite> CREATE TABLE DEPARTMENT( ID INT PRIMARY KEY NOT NULL,


DEPT CHAR(50) NOT NULL,
EMP_ID INT NOT NULL
);
You can verify if your table has been created successfully using SQLIte
command .tables command
sqlite>.tables

1
Data Analysis And Tools MScIT Part 1 | Sem 1

2) Queries for performing DML commands

Insertion of Values into the Company and Department Table

INSERT INTO COMPANY (ID,NAME,AGE,ADDRESS,SALARY) VALUES (1,


'Paul', 32,
'California', 20000.00 );

INSERT INTO COMPANY (ID,NAME,AGE,ADDRESS,SALARY) VALUES (2,


'Allen', 25, 'Texas', 15000.00 );

INSERT INTO COMPANY (ID,NAME,AGE,ADDRESS,SALARY) VALUES (3,


'Teddy', 23 , 'Norway', 20000.00 );

INSERT INTO COMPANY (ID,NAME,AGE,ADDRESS,SALARY) VALUES (4,


'Mark', 25, 'Rich-Mond ', 65000.00 );

INSERT INTO COMPANY (ID,NAME,AGE,ADDRESS,SALARY) VALUES (5,


'David',
27 , 'Texas', 85000.00 );

INSERT INTO COMPANY (ID,NAME,AGE,ADDRESS,SALARY) VALUES (6,


'Kim', 22,
'South-Hall', 45000.00 );

INSERT INTO COMPANY VALUES (7, 'James', 24, 'Houston', 10000.00 );

INSERT INTO DEPARTMENT (ID, DEPT, EMP_ID) VALUES (1, 'IT Billing', 1 );

INSERT INTO DEPARTMENT (ID, DEPT, EMP_ID) VALUES (2, 'Engineering', 2


);

2
Data Analysis And Tools MScIT Part 1 | Sem 1

INSERT INTO DEPARTMENT (ID, DEPT, EMP_ID) VALUES (3, 'Finance', 7 );

Using the select statement to retrieve data

Select clause is a data manipulation command used for retrieving the data in the
desired format from the database objects. The syntax of the various select clause
and its purpose is given below:

3
Data Analysis And Tools MScIT Part 1 | Sem 1

select * from COMPANY

4
Data Analysis And Tools MScIT Part 1 | Sem 1

1. List down all the records where AGE is greater than or equal to 25 AND salary
is greater than or equal to 65000.00:

2. List down all the records where AGE is greater than or equal to 25 OR salary is
greater than or equal to 65000.00:

3. List down all the records where AGE is not NULL which means all the records
because none of the record is having AGE equal to NULL.

4. List down all the records where NAME starts with 'Ki', does not matter what
comes after 'Ki'.

5
Data Analysis And Tools MScIT Part 1 | Sem 1

5. List down all the records where AGE value is either 25 or 27.

6. List down all the records where AGE value is neither 25 nor 27.

7. List down all the records where AGE value is in BETWEEN 25 AND 27.

8. Find all the records with AGE field having SALARY > 65000

6
Data Analysis And Tools MScIT Part 1 | Sem 1

9. Find the total amount of salary on each customer

10. List the name of the person and find the sum of their salaries. Group and Order
their names.

11. List all records from the company table where name count is less than 2.

12. List all records from the company table where name count is greater than 2.

7
Data Analysis And Tools MScIT Part 1 | Sem 1

13. List all records from the company table in the descending order of the names of
the people.

14. Fetch only 6 rows from the company table.

15. Fetch only 3 rows from the company table starting from the 3rd record.

8
Data Analysis And Tools MScIT Part 1 | Sem 1

16. Perform a cross join between the company table and the department table.

17. Perform a inner join between the company table and the department table.

9
Data Analysis And Tools MScIT Part 1 | Sem 1

18. Perform a inner join between the company table and the department table.

10
Data Analysis And Tools MScIT Part 1 | Sem 1

Practical 2

Part 1:-

Write a program to print a multiplication table

Code:-

#include < apop.h >


int main()
{
int i=0;
int r,c;
printf("Enter the numbers of rows and columns \n") ;
scanf("%d %d",&r,&c);
gsl_matrix *mul_table=gsl_matrix_alloc(r,c);
gsl_matrix_set_all( mul_table,1);
for(i=0;i<mul_table->size1;i++)
{
APOP_MATRIX_ROW(mul_table,i,row_mul);
gsl_vector_scale(row_mul,i+1);
}
for(i=0;i<mul_table->size2;i++)
{
APOP_MATRIX_COL(mul_table,i,col_mul);
gsl_vector_scale(col_mul,i+1);
}
printf("\nThe generated multiplication table is shown below : \n") ;
apop_matrix_show(mul_table);
gsl_matrix_free(mul_table);
return 0;
}

11
Data Analysis And Tools MScIT Part 1 | Sem 1

Output:-

12
Data Analysis And Tools MScIT Part 1 | Sem 1

Part 2:-

The function in will take in a double indicating taxable income and will return US
income taxes owed, assuming a head of household with two dependents taking
the standard deduction

Code:-

#include <apop.h>
double tax_cal(double income)
{
double tax=0;
double range[]={0, 11200, 42650, 110100, 178350, 349700, INFINITY};
double rate[]={0, .10, .15, .25, .28, .33, .35};
int bracket=1;
income-=7850;
income-=3400*3;
while(income>0)
{
tax+=rate[bracket]*GSL_MIN(income,range[bracket]-range[bracket-1]);
income-=range[bracket];
bracket++;
}
return tax;
}

int main()
{
apop_db_open("data-census.db");
char column[]="geo_name";.
strncpy(apop_opts.db_name_column,column,sizeof(column));
apop_data *data=apop_query_to_data("select
geo_name,household_median_in”
"as Income from income where sumlevel='040' order by household_median_in
desc");
Apop_col_t(data,"Income",income);
13
Data Analysis And Tools MScIT Part 1 | Sem 1

data->vector=apop_vector_map(income,tax_cal);
apop_name_add(data->names,"Tax Owed",'v');
apop_data_show(data);
return 0;
}
Output:-

14
Data Analysis And Tools MScIT Part 1 | Sem 1

Practical 3

Part 1:-

Write a code to plot a vector.

Code:-

#include <apop.h>
void plot_graph(gsl_matrix *g_data)
{
FILE *terminal;
terminal=popen("gnuplot -persist","w");
fprintf(terminal,"reset;plot '-' \n");
apop_matrix_print(g_data,.output_pipe=terminal);
fflush(terminal); //Flush the stream named terminal
}
int main(int argc, char **argv)
{
apop_db_open("data-climate.db");
plot_graph(apop_query_to_matrix("select year,temp from temp"));
return 0;
}

15
Data Analysis And Tools MScIT Part 1 | Sem 1

Output:-

16
Data Analysis And Tools MScIT Part 1 | Sem 1

Part 2:-

Write a code to plot a lattice i.e. Eigen Vector

Code:-

#include <apop.h>
apop_data *query_data()
{
apop_db_open("data-census.db");
return apop_query_to_data("select postcode as row_names, m_per_100_f, "
"population/1e6 as population, median_age "
"from geography g, income i,demos d,postcodes p "
"where i.sumlevel= '040' and g.geo_id = d.geo_id "
"and i.geo_name = p.state and g.geo_id = i.geo_id");
}
int main()
{
apop_plot_lattice(query_data(), "eigen_txt");
popen("gnuplot -persist <eigen_txt","w");
return 0;
}

17
Data Analysis And Tools MScIT Part 1 | Sem 1

Output:-

18
Data Analysis And Tools MScIT Part 1 | Sem 1

Practical No 4

Part 1:- Discrete Distributions

1) Bernoulli distribution

Code:-

#include <apop.h>
int main(int argc, char **argv)
{
int i;
double p = 0.6;
float sum = 0;
printf("random variable||probability||cumulative prob \n");
for (i = 0; i <= 1; i++)
{
float k = gsl_ran_bernoulli_pdf(i, p);
sum = sum + k;
printf("%d \t\t %f \t\t %f \t \n", i, k, sum);
}
}

Output:-

19
Data Analysis And Tools MScIT Part 1 | Sem 1

2) Binomial Distribution

Code:-

#include <apop.h>
int main(int argc, char **argv)
{
int i;
double p = 0.6;
int n = 5;
float sum = 0;
printf("random variable||probability||cumulative prob \n");
for (i = 0; i <= n; i++)
{
float k = gsl_ran_binomial_pdf(i, p, n);
sum = sum + k;
printf("%d \t\t %f \t %f \t \n", i, k, sum);
}
printf("\n");
return 0;
}

Output:-

20
Data Analysis And Tools MScIT Part 1 | Sem 1

3) Poisson Distribution

Code:-

#include <apop.h>
int main(int argc, char **argv)
{
int i;
double mu = 3.0;
int n = 10;
float sum = 0;
printf("random variable||probability||cumulative prob \n");
for (i = 0; i <= n; i++)
{
float k = gsl_ran_poisson_pdf(i, mu);
sum = sum + k;
printf("%d \t\t %f \t %f \t \n", i, k, sum);
}
printf("\n");
return 0;
}

Output:-

21
Data Analysis And Tools MScIT Part 1 | Sem 1

4) Uniform Distribution

Code:-

#include <apop.h>
int main(int argc, char **argv)
{
float x;
int a, b;
printf("Enter value for x,a and b\n");
scanf("%f %d %d", &x, &a, &b);
float sum = 0;
printf("random variable||probability \n");
float k = gsl_ran_flat_pdf(x, a, b);
printf("%f \t\t %f \n", x, k);
}
Output:-

22
Data Analysis And Tools MScIT Part 1 | Sem 1

5) Multinomial Distribution

Code:-

#include <apop.h>
int main(void)
{
int k = 3;
const double p[] = { 0.2, 0.4, 0.4 };
const unsigned int n[] = { 2, 3, 4 };
printf("probability \n");
printf("----------------\n");
double pmf = gsl_ran_multinomial_pdf(k, p, n);
printf("%3.9f\n", pmf);
return 0;
}

Output:-

23
Data Analysis And Tools MScIT Part 1 | Sem 1

6) Hypergeometric Distribution

Code:-

#include <stdio.h>
#include <gsl/gsl_randist.h>
int main (void)
{
int x,s,f,n;
n=6;
x=2;
s=13;
f=39;
printf("random variable |||probability\n");
printf("-----------------\n");
double pmf=gsl_ran_hypergeometric_pdf(x,s,f,n);
printf("%d%3.6f\n",x,pmf);
return 0;
}

Output:-

24
Data Analysis And Tools MScIT Part 1 | Sem 1

Part 2:- Continuous Distributions

1) Normal Distribution

Code:-

#include <apop.h>
void main()
{
double P, Q;
double x = 10;
double sigma = 5;
double pdf;
printf("Normal distribution :x=%f sigma=%f\n", x, sigma);
pdf = gsl_ran_gaussian_pdf(x, sigma);
printf("prob(x = %f) = %f\n", x, pdf);
P = gsl_cdf_gaussian_P(x, sigma);
printf("prob(x < %f) = %f\n", x, P);
Q = gsl_cdf_gaussian_Q(x, sigma);
printf("prob(x > %f) = %f\n", x, Q);
x = gsl_cdf_gaussian_Pinv(P, sigma);
printf("Pinv(%f) = %f\n", P, x);
x = gsl_cdf_gaussian_Qinv(Q, sigma);
printf("Qinv(%f) = %f\n", Q, x);
}

Output:-

25
Data Analysis And Tools MScIT Part 1 | Sem 1

2) Gamma Distribution

Code:-

#include <apop.h>
void main()
{
double P, Q;
double x = 1.5;
double a = 1;
double b = 2;
double pdf;
printf("Gamma distribution :x=%f a=%f b=%f\n", x, a, b);
pdf = gsl_ran_gamma_pdf(x, a, b);
printf("prob(x = %f) = %f\n", x, pdf);
P = gsl_cdf_gamma_P(x, a, b);
printf("prob(x < %f) = %f\n", x, P);
Q = gsl_cdf_gamma_Q(x, a, b);
printf("prob(x > %f) = %f\n", x, Q);
x = gsl_cdf_gamma_Pinv(P, a, b);
printf("Pinv(%f) = %f\n", P, x);
x = gsl_cdf_gamma_Qinv(Q, a, b);
printf("Qinv(%f) = %f\n", Q, x);
}

Output:-

26
Data Analysis And Tools MScIT Part 1 | Sem 1

3) Exponential Distribution

Code:-

#include <apop.h>
void main()
{
double P, Q;
double x = 0.05;
double lambda = 2;
double pdf;
printf("Exponential distribution :x=%f lambda=%f\n", x, lambda);
pdf = gsl_ran_exponential_pdf(x, lambda);
printf("prob(x = %f) = %f\n", x, pdf);
P = gsl_cdf_exponential_P(x, lambda);
printf("prob(x < %f) = %f\n", x, P);
Q = gsl_cdf_exponential_Q(x, lambda);
printf("prob(x > %f) = %f\n", x, Q);
x = gsl_cdf_exponential_Pinv(P, lambda);
printf("Pinv(%f) = %f\n", P, x);
x = gsl_cdf_exponential_Qinv(Q, lambda);
printf("Qinv(%f) = %f\n", Q, x);
}

Output:-

27
Data Analysis And Tools MScIT Part 1 | Sem 1

4) Beta Distribution

Code:-

#include <apop.h>
void main()
{
double P, Q;
double x = 0.8;
double a = 0.5;
double b = 0.5;
double pdf;
printf("Beta distribution :x=%f a=%f b=%f\n", x, a, b);
pdf = gsl_ran_beta_pdf(x, a, b);
printf("prob(x = %f) = %f\n", x, pdf);
P = gsl_cdf_beta_P(x, a, b);
printf("prob(x < %f) = %f\n", x, P);
Q = gsl_cdf_beta_Q(x, a, b);
printf("prob(x > %f) = %f\n", x, Q);
x = gsl_cdf_beta_Pinv(P, a, b);
printf("Pinv(%f) = %f\n", P, x);
x = gsl_cdf_beta_Qinv(Q, a, b);
printf("Qinv(%f) = %f\n", Q, x);
}

Output:-

28
Data Analysis And Tools MScIT Part 1 | Sem 1

5) Lognormal Distribution

Code:-

#include <apop.h>
void main()
{
double P, Q;
double x = 4;
double zeta = 2;
double sigma = 1.5;
double pdf;
printf("Lognormal distribution :x=%f zeta=%f sigma=%f\n", x, zeta,
sigma);
pdf = gsl_ran_lognormal_pdf(x, zeta, sigma);
printf("prob(x = %f) = %f\n", x, pdf);
P = gsl_cdf_lognormal_P(x, zeta, sigma);
printf("prob(x < %f) = %f\n", x, P);
Q = gsl_cdf_lognormal_Q(x, zeta, sigma);
printf("prob(x > %f) = %f\n", x, Q);
x = gsl_cdf_lognormal_Pinv(P, zeta, sigma);
printf("Pinv(%f) = %f\n", P, x);
x = gsl_cdf_lognormal_Qinv(Q, zeta, sigma);
printf("Qinv(%f) = %f\n", Q, x);
}

Output:-

29
Data Analysis And Tools MScIT Part 1 | Sem 1

Practical No 5

Implement Regression and Goodness of Fit

Code:-

#include <stdio.h>
#include <gsl/gsl_fit.h>
int main(void)
{
int i, n = 4;
double x[4] = { 1970, 1980, 1990, 2000 };
double y[4] = { 12, 11, 14, 13 };
double w[4] = { 0.1, 0.2, 0.3, 0.4 };
double c0, c1, cov00, cov01, cov11, chisq;
gsl_fit_wlinear(x, 1, w, 1, y, 1, n, &c0, &c1, &cov00, &cov01, &cov11,&chisq);
printf("# best fit: Y = %g + %g X\n", c0, c1);
printf("# covariance matrix:\n");
printf("# [ %g, %g\n# %g, %g]\n", cov00, cov01, cov01, cov11);
printf("# chisq = %g\n", chisq);
for (i = 0; i < n; i++)
{
printf("data: %g %g %g\n", x[i], y[i], 1 / sqrt(w[i]));
printf("\n");
}
for (i = -30; i < 130; i++)
{
double xf = x[0] + (i / 100.0) * (x[n - 1] - x[0]);
double yf, yf_err;
gsl_fit_linear_est(xf, c0, c1, cov00, cov01, cov11, &yf, &yf_err);
printf("fit: %g %g\n", xf, yf);
printf("hi : %g %g\n", xf, yf + yf_err);
printf("lo : %g %g\n", xf, yf - yf_err);
}
return 0;

30
Data Analysis And Tools MScIT Part 1 | Sem 1

31
Data Analysis And Tools MScIT Part 1 | Sem 1

Output:-

32
Data Analysis And Tools MScIT Part 1 | Sem 1

Practical No 6

Part 1:-

Generate a Random Number using the Default algorithm and Default Seed Value

Code:-

#include <apop.h>
gsl_rng * r;
void main()
{
const gsl_rng_type * T;
gsl_rng_env_setup();
T = gsl_rng_default;
r = gsl_rng_alloc (T);
printf ("generator type: %s\n", gsl_rng_name (r));
printf ("seed = %lu\n", gsl_rng_default_seed);
printf ("first value = %lu\n", gsl_rng_get (r));
gsl_rng_free (r);
}

Output:-

we can override the default generator and seed value by typing this command:-
GSL_RNG_TYPE="taus" GSL_RNG_SEED=123 ./out.o

33
Data Analysis And Tools MScIT Part 1 | Sem 1

Write a program which uses taus generation algorithm and a seed value of 123.

Code:-

#include <apop.h>
gsl_rng * r;
void main()
{
const gsl_rng_type * T;
gsl_rng_env_setup();
gsl_rng_default_seed=123;
T = gsl_rng_taus;
r = gsl_rng_alloc(T);
printf("generator type: %s\n", gsl_rng_name(r));
printf("seed = %lu\n", gsl_rng_default_seed);
printf("first value = %lu\n", gsl_rng_get(r));
gsl_rng_free(r);
}

Output:-

34
Data Analysis And Tools MScIT Part 1 | Sem 1

Part 2:-

Generate random numbers using Exponential Distribution

Code:-

#include <apop.h>
int main(int argc, char *argv[])
{
int i, n;
float x, alpha;
gsl_rng *r = gsl_rng_alloc(gsl_rng_mt19937);
printf("Enter the value of n and alpha\n");
scanf("%d %f",&n,&alpha);
x = 0;
printf("\nThe Random Numbers are:-\n");
for (i = 0; i < n; i++)
{
x = alpha * x + gsl_ran_exponential(r, 1);
printf(" %2.4f \n", x);
}
return (0);
}

Output:-

35
Data Analysis And Tools MScIT Part 1 | Sem 1

36
Data Analysis And Tools MScIT Part 1 | Sem 1

Part 3:-
Generating uniform random numbers in the range [0.0, 1.0] using Uniform
Distribution

Code:-

#include <apop.h>
int main(void)
{
const gsl_rng_type * T;
gsl_rng * r;
int i, n = 10;
gsl_rng_env_setup();
T = gsl_rng_default;
r = gsl_rng_alloc(T);
for (i = 0; i < n; i++)
{
double u = gsl_rng_uniform(r);
printf("%.5f\n", u);
}
gsl_rng_free(r);
return 0;
}

Output:-

37
Data Analysis And Tools MScIT Part 1 | Sem 1

Part 4:-

Generating uniform random numbers using Binomial Distribution

Code:-

#include <apop.h>
int main(void)
{
const gsl_rng_type * T;
gsl_rng * r;
int i, n = 10;
gsl_rng_env_setup();
T = gsl_rng_default;
r = gsl_rng_alloc(T);
float p = 0.3;
for (i = 0; i < n; i++)
{
unsigned int k = gsl_ran_binomial(r, p, n);
printf(" %u", k);
}
printf("\n");
gsl_rng_free(r);
return 0;
}

Output:-

38
Data Analysis And Tools MScIT Part 1 | Sem 1

Practical No 7

Part 1:-

Implement the T-Test.

Code:-

#include <apop.h>
void main()
{
apop_db_open("data-census.db");
gsl_vector *n = apop_query_to_vector("select in_per_capita from income ",
"where state=(select state from geography where name =('North Dakota')");
gsl_vector *s = apop_query_to_vector("select in_per_capita from income",
"where state= (select state from geography where name =('South Dakota')");
apop_data *t = apop_t_test(n, s);
apop_data_show(t);
printf("\n confidence: %g\n", apop_data_get(t, .rowname = "conf.*2.tail"));
}

Output:-

39
Data Analysis And Tools MScIT Part 1 | Sem 1

Part 2:-

Implement the F-Test.

Code:-

#include <apop.h>
apop_data *query_data()
{
apop_db_open("data-census.db");
return apop_query_to_data("select postcode as row_names, m_per_100_f, "
"population/1e6 as population, median_age "
"from geography g, income i,demos d,postcodes p "
"where i.sumlevel= '040' and g.geo_id = d.geo_id "
"and i.geo_name = p.state and g.geo_id = i.geo_id");
}
void main()
{
apop_data *constr = apop_data_falloc((1, 1, 3), 0, 0, 0, 1);
apop_data *d = query_data();
apop_model *est = apop_estimate(d, apop_ols);
apop_model_show(est);
apop_data_show(apop_f_test(est, constr));
}

40
Data Analysis And Tools MScIT Part 1 | Sem 1

Output:-

41
Data Analysis And Tools MScIT Part 1 | Sem 1

Practical No 8

Implement Non-parametric Testing

Code:-

#include <apop.h>
int main(int argc, char **argv)
{
apop_db_open("data-metro.db");
char *joinedtab="(select year,riders,line from riders,lines where
riders.station=lines.station)";
apop_data_show(apop_anova(joinedtab,"riders","line","year"));
return 0;
}

Output:-

42

You might also like