HDFS Notes

You might also like

Download as txt, pdf, or txt
Download as txt, pdf, or txt
You are on page 1of 5

=============TABLE IMPORT================================

sqoop import --connect jdbc:mysql://localhost:3306/vaibhav --table employees


--username root --password hr --target-dir /scoop/sunny/1

==============TABLE IMPORT + COLUMNS + WHERE ===============


sqoop import --connect jdbc:mysql://localhost:3306/vaibhav -table employees
--columns 'FIRST_NAME,SALARY' --where 'SALARY >12000' --username root --password
hr --target-dir /sqoop/sunny/3

==============COLUMNARY IMPORT============================
sqoop import --connect jdbc:mysql://localhost:3306/vaibhav --columns 'FIRST_NAME'
--table employees --password hr --username root --target-dir /sqoop/sunny/6

==============COLUMNARY IMPORT + MAP PARAMETER m=1=========


sqoop import --connect jdbc:mysql://localhost:3306/vaibhav -table employees
--columns 'FIRST_NAME,SALARY' --where 'SALARY >12000' --username root --password
hr --target-dir /sqoop/sunny/3 -m 1

================TABLE IMPORT + JUMBLED UP PARAMETERS========


sqoop import --connect jdbc:mysql://localhost:3306/vaibhav --query 'SELECT * FROM
employees JOIN DEPT on (employees.DEPARTMENT_ID = DEPT.ID) AND $CONDITIONS'
--username root --password hr --target-dir /sqoop/sunny/13 --split-by 'EMPLOYEE_ID'

================QUERY IMPORT + JOIN ======================


sqoop import --connect jdbc:mysql://localhost:3306/vaibhav --query 'SELECT * FROM
employees JOIN DEPT on (employees.DEPARTMENT_ID = DEPT.ID) AND $CONDITIONS'
--username root --password hr --target-dir /sqoop/sunny/13 --split-by 'EMPLOYEE_ID'

=================QUERY IMPORT + JOIN + WHERE=============


sqoop import --connect jdbc:mysql://localhost:3306/vaibhav --query 'SELECT * FROM
employees JOIN DEPT on (employees.DEPARTMENT_ID = DEPT.ID) AND $CONDITIONS'
--username root --password hr --target-dir /sqoop/sunny/13 --split-by 'EMPLOYEE_ID'

=================QUERY IMPORT + JOIN + WHERE + AND====


sqoop import --connect jdbc:mysql://localhost:3306/vaibhav --query 'SELECT * FROM
employees JOIN DEPT on (employees.DEPARTMENT_ID = DEPT.ID) AND $CONDITIONS'
--username root --password hr --target-dir /sqoop/sunny/13 --split-by 'EMPLOYEE_ID'

=================QUERY IMPORT + SINGLE QUERY + WHERE + JUMBLED=========


sqoop import --query 'SELECT * FROM employees where SALARY >12000 AND $CONDITIONS'
--username root --password hr --target-dir /sqoop/sunny/17 --connect
jdbc:mysql://localhost:3306/vaibhav --split-by 'EMPLOYEE_ID'

================QUERY IMPORT + SINGLE QUERY + WHERE (1>0) + JUMBLED===


sqoop import --query 'SELECT * FROM employees WHERE 1>0 AND $CONDITIONS'
--username root --password hr --target-dir /sqoop/sunny/172 --connect
jdbc:mysql://localhost:3306/vaibhav --split-by 'EMPLOYEE_ID'

====== Delete Target Dir =================================


Note: It delete output directory first & let create new directory
sqoop import --query 'SELECT * FROM employees WHERE 1>0 AND $CONDITIONS'
--username root --password hr --target-dir /sqoop/sunny/172 --connect
jdbc:mysql://localhost:3306/vaibhav --split-by 'EMPLOYEE_ID' --delete-target-dir

================QUERY IMPORT + SINGLE QUERY + NO WHERE > Gives Error ===


sqoop import --query 'SELECT * FROM employees AND $CONDITIONS' --username root
--password hr --target-dir /sqoop/sunny/172 --connect
jdbc:mysql://localhost:3306/vaibhav --split-by 'EMPLOYEE_ID'

**output > Error - Wrong SQL Syntax

*******************************************************************

============ SQOOP Import - INcremental - Append ================


sqoop import --connect jdbc:mysql://localhost/vaibhav --username root --password hr
--table user_detail --target-dir /user/sunny/user_detail_output --incremental
append --last-value 5 --check-column user_id

output : No new rows detected since last import (coz we have given 5 total there
and 5 last value)

sqoop import --connect jdbc:mysql://localhost/vaibhav --username root --password hr


--table user_detail --target-dir /user/sunny/user_detail_output --incremental
append --last-value 6 --check-column user_id

output :
18/02/03 02:59:31 INFO tool.ImportTool: Maximal id query for free form incremental
import: SELECT MAX(`user_id`) FROM user_detail
18/02/03 02:59:31 INFO tool.ImportTool: Incremental import based on column
`user_id`
18/02/03 02:59:31 INFO tool.ImportTool: Lower bound value: 6
18/02/03 02:59:31 INFO tool.ImportTool: Upper bound value: 7

*imp - not good practive to give it for strings as it sort and to min max
caluculations

======== Last modified ===============


sqoop import --connect jdbc:mysql://localhost/vaibhav --username root --password hr
--table user_detail --target-dir /user/sunny/user_detail_increment_output
--incremental lastmodified --last-value "2017-02-03 00:00:00" --check-column
reg_date

output:
18/02/03 03:14:42 INFO tool.ImportTool: Lower bound value: '2017-02-03 00:00:00'
18/02/03 03:14:42 INFO tool.ImportTool: Upper bound value: '2018-02-03 03:14:42.0'

18/02/03 03:14:44 INFO db.DataDrivenDBInputFormat: BoundingValsQuery: SELECT


MIN(`user_id`), MAX(`user_id`) FROM `user_detail` WHERE ( `reg_date` >= '2017-02-03
00:00:00' AND `reg_date` < '2018-02-03 03:14:42.0' )

##query 2

mysql> select * from user_detail;


+---------+-----------+------+---------------------+
| user_id | user_name | age | reg_date |
+---------+-----------+------+---------------------+
| 1 | A1 | 30 | 2018-02-03 00:00:00 |
| 2 | A2 | 31 | 2018-02-03 00:00:00 |
| 3 | A3 | 32 | 2018-02-03 00:00:00 |
| 4 | A4 | 33 | 2018-02-03 00:00:00 |
| 5 | A5 | 34 | 2018-02-03 00:00:00 |
| 6 | A6 | 35 | 2018-02-03 00:00:00 |
| 7 | A7 | 36 | 2018-02-03 00:00:00 |
| 8 | A8 | 37 | 2018-02-03 10:00:00 |
| 9 | A9 | 38 | 2018-02-02 10:00:00 |
| 10 | A10 | 39 | 2018-02-04 00:00:00 |
| 11 | A10 | 39 | 2018-02-05 00:00:00 |
| 12 | A12 | 42 | 2018-02-06 00:00:00 |
+---------+-----------+------+---------------------+
12 rows in set (0.00 sec)

sqoop import --connect jdbc:mysql://localhost/vaibhav --username root --password hr


--table user_detail --target-dir /user/sunny/last_modified --incremental
lastmodified --last-value "2018-02-04 00:00:00" --check-column reg_date

18/02/03 07:27:02 INFO mapreduce.ImportJobBase: Beginning import of user_detail


18/02/03 07:27:05 INFO db.DataDrivenDBInputFormat: BoundingValsQuery: SELECT
MIN(`user_id`), MAX(`user_id`) FROM `user_detail` WHERE ( `reg_date` >= '2018-02-04
00:00:00' AND `reg_date` < '2018-02-03 07:27:02.0' )

??why its takign reg >= what i gave and < system time?????
in production date- generally future date wont be dere
but what about columns where future date is there?
l

sqoop import --connect jdbc:mysql://localhost/vaibhav --username root --password hr


--table user_detail --target-dir /user/sunny/last_modified2 --incremental
lastmodified --last-value "2018-02-02 00:00:00" --check-column reg_date

output:
18/02/03 07:47:17 INFO tool.ImportTool: Lower bound value: '2018-02-02 00:00:00'
18/02/03 07:47:17 INFO tool.ImportTool: Upper bound value: '2018-02-03 07:47:17.0'

18/02/03 07:47:46 INFO mapreduce.ImportJobBase: Retrieved 8 records.

1,A1,30,2018-02-03 00:00:00.0
2,A2,31,2018-02-03 00:00:00.0
3,A3,32,2018-02-03 00:00:00.0
4,A4,33,2018-02-03 00:00:00.0
5,A5,34,2018-02-03 00:00:00.0
6,A6,35,2018-02-03 00:00:00.0
7,A7,36,2018-02-03 00:00:00.0
9,A9,38,2018-02-02 10:00:00.0

********************* SQOOP JOBS ***************************


=======================CREATE JOB===============================
sqoop job --create myjob1 -- import --query 'SELECT * FROM employees WHERE 1>0
AND $CONDITIONS' --username root --password hr --target-dir /sqoop/sunny/172
--connect jdbc:mysql://localhost:3306/vaibhav --split-by 'EMPLOYEE_ID' --delete-
target-dir

Note:
sqoop job --create <jobname> -- <mandatory_space> <import/export> <query>
output : no specific output, check in all job list
================== LIST All JOB=================================
sqoop job --list

output:
Available jobs:
myjob1

================ SHOW DETAILS OF A JOB=========================


sqoop job --show myjob1

output: (it asks for password and print all the paramters)
Enter password:
Job: myjob1
Tool: import
Options:
----------------------------
verbose = false
db.connect.string = jdbc:mysql://localhost:3306/vaibhav
codegen.output.delimiters.escape = 0
codegen.output.delimiters.enclose.required = false
codegen.input.delimiters.field = 0
hbase.create.table = false
db.require.password = true
hdfs.append.dir = false
import.fetch.size = null
codegen.input.delimiters.escape = 0
codegen.input.delimiters.enclose.required = false
db.username = root
codegen.output.delimiters.record = 10
import.max.inline.lob.size = 16777216
hcatalog.create.table = false
db.clear.staging.table = false
codegen.input.delimiters.record = 0
enable.compression = false
hive.overwrite.table = false
hive.import = false
codegen.input.delimiters.enclose = 0
hive.drop.delims = false
codegen.output.delimiters.enclose = 0
hdfs.delete-target.dir = true
codegen.output.dir = .
codegen.auto.compile.dir = true
db.query = SELECT * FROM employees WHERE 1>0 AND $CONDITIONS
mapreduce.num.mappers = 4
import.direct.split.size = 0
export.new.update = UpdateOnly
codegen.output.delimiters.field = 44
incremental.mode = None
hdfs.file.format = TextFile
codegen.compile.dir = /tmp/sqoop-
itelligence/compile/c43e0f1df78448d09edc5b36e35ee5ea
direct.import = false
db.split.column = EMPLOYEE_ID
hdfs.target.dir = /sqoop/sunny/172
hive.fail.table.exists = false
db.batch = false

=========== Execute Sqoop job ====================


sqoop job --exec myjob1

note : will ask for password if configured for db to run

**IMP - You can not edit the job, Delete and create new one

========== Delete Sqoop job- =======================


sqoop job --delete myjob1

=============== SQOOL EVAL ================================


sqoop eval --connect jdbc:mysql://localhost/vaibhav --username root --password hr
--query "SELECT * from user_detail limit 5"

sqoop eval --connect jdbc:mysql://localhost/vaibhav --username root --password hr


--query "SELECT * from user_detail"

You might also like