Download as txt, pdf, or txt
Download as txt, pdf, or txt
You are on page 1of 7

Step 1 --- --- Download winscp (Windows) or Filezilla for (MAC)

Windows Download Winscp and install

https://winscp.net/download/WinSCP-5.19.6-Setup.exe

MAC download Filezilla

https://dl3.cdn.filezilla-project.org/client/FileZilla_3.60.2_macosx-
x86.app.tar.bz2?h=34t8FN97h2LIqhxo_qJKgA&x=1659190792

*Cloudera Folks*

Step 2 ------ Cloudera Folks download Winscp and copy prodata to the
/home/cloudera/
Lab folks not required

Step 3 ------ Cloudera Folks Goto Mysql and create a table

mysql -uroot -pcloudera

Create database prodb;


use prodb;

create table customer_total(id int(10),username varchar(100),sub_port


varchar(100),host varchar(100),date_time varchar(100),hit_count_val_1
varchar(100),hit_count_val_2 varchar(100),hit_count_val_3 varchar(100),timezone
varchar(100),method varchar(100),`procedure` varchar(100),value
varchar(100),sub_product varchar(100),web_info varchar(100),status_code
varchar(100));

load data infile '/home/cloudera/prodata.txt' into table customer_total fields


terminated by ',';
select * from customer_total;

create table customer_src(id int(10),username varchar(100),sub_port


varchar(100),host varchar(100),date_time varchar(100),hit_count_val_1
varchar(100),hit_count_val_2 varchar(100),hit_count_val_3 varchar(100),timezone
varchar(100),method varchar(100),`procedure` varchar(100),value
varchar(100),sub_product varchar(100),web_info varchar(100),status_code
varchar(100));

insert into customer_src select * From customer_total where id>0 and id<101;
quit

=============================
Edge Node
=============================
rm -rf /home/cloudera/avsrcdir
mkdir /home/cloudera/avsrcdir
cd /home/cloudera/avsrcdir
echo -n cloudera>/home/cloudera/passfile

sqoop job --delete inpjob


sqoop job --create inpjob -- import --connect jdbc:mysql://localhost/projdb --
username root --password-file file:///home/cloudera/passfile -m 1 --table
customer_src --target-dir /user/cloudera/customer_stage_loc --incremental append
--check-column id --last-value 0 --as-avrodatafile
sqoop job --list
sqoop job --exec inpjob

hadoop fs -mkdir /user/cloudera/avscdirpro


hadoop fs -put /home/cloudera/avsrcdir/customer_src.avsc /user/cloudera/avscdirpro

===================================
Hive shell
====================================

hive
create database prodb;
use prodb;

create table customer_src ROW FORMAT SERDE


'org.apache.hadoop.hive.serde2.avro.AvroSerDe' STORED AS AVRO LOCATION
'/user/cloudera/customer_stage_loc' TBLPROPERTIES
('avro.schema.url'='/user/cloudera/avscdirpro/customer_src.avsc');

select * from customer_src; === U will see the data

create external table customer_target_tab partitioned by (current_day string,year


string,month string,day string) ROW FORMAT SERDE
'org.apache.hadoop.hive.serde2.avro.AvroSerDe' STORED AS AVRO LOCATION
'/user/cloudera/customer_target_tab' TBLPROPERTIES
('avro.schema.url'='/user/cloudera/avscdirpro/customer_src.avsc');

select * from customer_target_tab; ==== U will not see the data

*Lab Folks*

*Lab Folks*

===============
Go to Mysql
===============

mysql -u nyse_user -h ms.itversity.com -pitversity

use nyse_export;

create table customer_src_<LABUSER>(id int(10),username varchar(100),sub_port


varchar(100),host varchar(100),date_time varchar(100),hit_count_val_1
varchar(100),hit_count_val_2 varchar(100),hit_count_val_3 varchar(100),timezone
varchar(100),method varchar(100),`procedure` varchar(100),value
varchar(100),sub_product varchar(100),web_info varchar(100),status_code
varchar(100));

insert into customer_src_<LABUSER> select * From customer_total where id>0 and


id<101;

quit
=============================
Edge Node
=============================
rm -rf /home/<LABUSER>/avsrcdir
mkdir /home/<LABUSER>/avsrcdir
cd /home/<LABUSER>/avsrcdir
echo -n itversity>/home/<LABUSER>/passfile

sqoop import -Dmapreduce.job.user.classpath.first=true --connect


jdbc:mysql://ms.itversity.com/nyse_export --username nyse_user --password-file
file:///home/<LABUSER>/passfile -m 1 --table customer_src_<LABUSER> --target-dir
/user/<LABUSER>/customer_stage_loc --incremental append --check-column id --last-
value 0 --as-avrodatafile

hadoop fs -mkdir /user/<LABUSER>/avscdirpro


hadoop fs -put /home/<LABUSER>/avsrcdir/customer_src_<LABUSER>.avsc
/user/<LABUSER>/avscdirpro

====================================
Hive shell
====================================

hive
SET hive.metastore.warehouse.dir = /user/<LABUSER>/warehouse;
create database prodb_<LABUSER>;
use prodb_<LABUSER>;

create table customer_src ROW FORMAT SERDE


'org.apache.hadoop.hive.serde2.avro.AvroSerDe' STORED AS AVRO LOCATION
'/user/<LABUSER>/customer_stage_loc' TBLPROPERTIES
('avro.schema.url'='/user/<LABUSER>/avscdirpro/customer_src_<LABUSER>.avsc');

select * from customer_src; === U will see the data

create external table customer_target_tab partitioned by (current_day string,year


string,month string,day string) ROW FORMAT SERDE
'org.apache.hadoop.hive.serde2.avro.AvroSerDe' STORED AS AVRO LOCATION
'/user/<LABUSER>/customer_target_tab' TBLPROPERTIES
('avro.schema.url'='/user/<LABUSER>/avscdirpro/customer_src_<LABUSER>.avsc');

select * from customer_target_tab; ==== U will not see the data

Step 1 --- --- Download winscp (Windows) or Filezilla for (MAC)


Windows Download Winscp and install

https://winscp.net/download/WinSCP-5.19.6-Setup.exe

MAC download Filezilla

https://dl3.cdn.filezilla-project.org/client/FileZilla_3.60.2_macosx-
x86.app.tar.bz2?h=34t8FN97h2LIqhxo_qJKgA&x=1659190792

Cloudera Folks

Step 2 ------ Cloudera Folks download Winscp and copy prodata to the
/home/cloudera/
Lab folks not required

Step 3 ------ Cloudera Folks Goto Mysql and create a table

mysql -uroot -pcloudera

Create database prodb;


use prodb;

create table customer_total(id int(10),username varchar(100),sub_port


varchar(100),host varchar(100),date_time varchar(100),hit_count_val_1
varchar(100),hit_count_val_2 varchar(100),hit_count_val_3 varchar(100),timezone
varchar(100),method varchar(100),`procedure` varchar(100),value
varchar(100),sub_product varchar(100),web_info varchar(100),status_code
varchar(100));

load data infile '/home/cloudera/prodata.txt' into table customer_total fields


terminated by ',';
select * from customer_total;

create table customer_src(id int(10),username varchar(100),sub_port


varchar(100),host varchar(100),date_time varchar(100),hit_count_val_1
varchar(100),hit_count_val_2 varchar(100),hit_count_val_3 varchar(100),timezone
varchar(100),method varchar(100),`procedure` varchar(100),value
varchar(100),sub_product varchar(100),web_info varchar(100),status_code
varchar(100));

insert into customer_src select * From customer_total where id>0 and id<101;
quit

=============================
Edge Node
=============================
rm -rf /home/cloudera/avsrcdir
mkdir /home/cloudera/avsrcdir
cd /home/cloudera/avsrcdir
echo -n cloudera>/home/cloudera/passfile
sqoop job --delete inpjob
sqoop job --create inpjob -- import --connect jdbc:mysql://localhost/prodb --
username root --password-file file:///home/cloudera/passfile -m 1 --table
customer_src --target-dir /user/cloudera/customer_stage_loc --incremental append
--check-column id --last-value 0 --as-avrodatafile
sqoop job --list
sqoop job --exec inpjob

hadoop fs -mkdir /user/cloudera/avscdirpro


hadoop fs -put /home/cloudera/avsrcdir/customer_src.avsc /user/cloudera/avscdirpro

===================================
Hive shell
====================================

hive
create database prodb;
use prodb;

create table customer_src ROW FORMAT SERDE


'org.apache.hadoop.hive.serde2.avro.AvroSerDe' STORED AS AVRO LOCATION
'/user/cloudera/customer_stage_loc' TBLPROPERTIES
('avro.schema.url'='/user/cloudera/avscdirpro/customer_src.avsc');

select * from customer_src; === U will see the data

create external table customer_target_tab partitioned by (current_day string,year


string,month string,day string) ROW FORMAT SERDE
'org.apache.hadoop.hive.serde2.avro.AvroSerDe' STORED AS AVRO LOCATION
'/user/cloudera/customer_target_tab' TBLPROPERTIES
('avro.schema.url'='/user/cloudera/avscdirpro/customer_src.avsc');

select * from customer_target_tab; ==== U will not see the data

Lab Folks

Lab Folks

===============
Go to Mysql
===============

mysql -u nyse_user -h ms.itversity.com -pitversity

use nyse_export;

create table customer_src_<LABUSER>(id int(10),username varchar(100),sub_port


varchar(100),host varchar(100),date_time varchar(100),hit_count_val_1
varchar(100),hit_count_val_2 varchar(100),hit_count_val_3 varchar(100),timezone
varchar(100),method varchar(100),`procedure` varchar(100),value
varchar(100),sub_product varchar(100),web_info varchar(100),status_code
varchar(100));

insert into customer_src_<LABUSER> select * From customer_total where id>0 and


id<101;

quit
=============================
Edge Node
=============================
rm -rf /home/<LABUSER>/avsrcdir
mkdir /home/<LABUSER>/avsrcdir
cd /home/<LABUSER>/avsrcdir
echo -n itversity>/home/<LABUSER>/passfile

sqoop import -Dmapreduce.job.user.classpath.first=true --connect


jdbc:mysql://ms.itversity.com/nyse_export --username nyse_user --password-file
file:///home/<LABUSER>/passfile -m 1 --table customer_src_<LABUSER> --target-dir
/user/<LABUSER>/customer_stage_loc --incremental append --check-column id --last-
value 0 --as-avrodatafile

hadoop fs -mkdir /user/<LABUSER>/avscdirpro


hadoop fs -put /home/<LABUSER>/avsrcdir/customer_src_<LABUSER>.avsc
/user/<LABUSER>/avscdirpro

====================================
Hive shell
====================================

hive
SET hive.metastore.warehouse.dir = /user/<LABUSER>/warehouse;
create database prodb_<LABUSER>;
use prodb_<LABUSER>;

create table customer_src ROW FORMAT SERDE


'org.apache.hadoop.hive.serde2.avro.AvroSerDe' STORED AS AVRO LOCATION
'/user/<LABUSER>/customer_stage_loc' TBLPROPERTIES
('avro.schema.url'='/user/<LABUSER>/avscdirpro/customer_src_<LABUSER>.avsc');

select * from customer_src; === U will see the data

create external table customer_target_tab partitioned by (current_day string,year


string,month string,day string) ROW FORMAT SERDE
'org.apache.hadoop.hive.serde2.avro.AvroSerDe' STORED AS AVRO LOCATION
'/user/<LABUSER>/customer_target_tab' TBLPROPERTIES
('avro.schema.url'='/user/<LABUSER>/avscdirpro/customer_src_<LABUSER>.avsc');

select * from customer_target_tab; ==== U will not see the data

You might also like