-
Notifications
You must be signed in to change notification settings - Fork 6
/
mysql_to_hive
executable file
·91 lines (84 loc) · 5.78 KB
/
mysql_to_hive
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
#!/usr/bin/python
# pluo, Apr 1, 2013
# PATH=$PATH:/home/dataproc/bi/lib/python # add python lib to path
# mysql_to_hive --mysql_host='bidbs' --mysql_db='bi_staging' --mysql_table='userstatsreport' --hive_create_table='Y'
# mysql_to_hive --mysql_db='bi' --mysql_table='lkp_state' --hive_create_table='Y'
# sqoop-import --connect jdbc:mysql://bidbs/bi_staging -username root -password *** --table userstatsreport --hive-table userstatsreport --create-hive-table --hive-import --hive-home /usr/lib/hive
# sqoop-import --connect jdbc:mysql://maildb-slave/Email -username root -password *** --table FE_EmailRecord --hive-table FE_EmailRecord --where "time_stamp < '2013-02-02'" --create-hive-table --hive-import --hive-home /usr/lib/hive
# time mysql_to_hive --mysql_host='maildb-slave' --mysql_db='Email' --mysql_table='FE_EmailRecord' --mysql_query="select * from Email.FE_EmailRecord where time_stamp < '2013-02-02'" --hive_create_table='Y' --remove_carriage_return='Y'
# mysql_to_hive --mysql_host='bidbs' --mysql_db='bi' --mysql_table='dim_time' --hive_create_table='Y'
# add relative path
import os, sys, inspect
# realpath() with make your script run, even if you symlink it :)
cmd_folder = os.path.realpath(os.path.abspath(os.path.split(inspect.getfile( inspect.currentframe() ))[0]))
if cmd_folder not in sys.path:
sys.path.insert(0, cmd_folder)
import legoo
import sys
from optparse import OptionParser
def main():
parser = OptionParser()
parser.add_option("--mysql_ini", dest="mysql_ini",
help="mysql initial file for user, password and default db, default: [mysql.ini]", default='mysql.ini')
parser.add_option("--mysql_host", dest="mysql_host",
help="mysql host for source data, default: [bidbs]", default='bidbs')
parser.add_option("--mysql_db", dest="mysql_db",
help="mysql database for source data, default: [bi]", default='bi')
parser.add_option("--mysql_user", dest="mysql_user",
help="MANDATORY: mysql user, if not specified, get user from mysql_ini")
parser.add_option("--mysql_password", dest="mysql_password",
help="MANDATORY OPTIONAL: mysql password, if not specified, get password from mysql_ini")
parser.add_option("--mysql_quick", dest="mysql_quick",
help="--quick option for mysql client, default:[N]", default='N')
parser.add_option("--mysql_table", dest="mysql_table",
help="mysql table to be exported")
parser.add_option("--mysql_query", dest="mysql_query",
help="query results to be exported")
parser.add_option("--hive_node", dest="hive_node",
help="target hive node. default: [namenode1]", default='namenode1')
parser.add_option("--hive_port", dest="hive_port",
help="target hive port. default: 10000", default='10000')
parser.add_option("--hive_db", dest="hive_db",
help="target hive db. default: [staging]", default='staging')
parser.add_option("--hive_ddl", dest="hive_ddl",
help="hive DDL for target hive table. default created from source mysql table")
parser.add_option("--hive_create_table", dest="hive_create_table",
help="CREATE_TABLE flag for hive table DDL. default: [N]", default='N')
parser.add_option("--hive_overwrite", dest="hive_overwrite",
help="OVERWRITE flag for hive table loading. default to [N] as append mode", default='N')
parser.add_option("--mapred_job_priority", dest="mapred_job_priority",
help="map reduce job priority [VERY_HIGH, HIGH, NORMAL, LOW, VERY_LOW]", default='NORMAL')
parser.add_option("--hive_table", dest="hive_table",
help="hive table name. default: created from csv file name")
parser.add_option("--hive_partition", dest="hive_partition",
help="partition name i.e. date_int=20130428")
parser.add_option("--remove_carriage_return", dest="remove_carriage_return",
help="remove carriage return from mysql source table. default: [N]", default='N')
parser.add_option("-q", "--quiet", "--silent", dest="quiet",
help="suppress messages to stdout. default: [N]", default='N')
parser.add_option("-d", "--debug", dest="debug", help="OPTIONAL: debug flag [Y|N], default: [N]", default='N')
(options, args) = parser.parse_args()
legoo.mysql_to_hive(mysql_ini = options.mysql_ini, \
mysql_host = options.mysql_host, \
mysql_db = options.mysql_db, \
mysql_user = options.mysql_user, \
mysql_password = options.mysql_password, \
mysql_quick = options.mysql_quick, \
mysql_table = options.mysql_table, \
mysql_query = options.mysql_query, \
hive_node = options.hive_node, \
hive_port = options.hive_port, \
hive_db = options.hive_db, \
hive_create_table = options.hive_create_table, \
hive_table = options.hive_table, \
hive_partition = options.hive_partition, \
hive_ddl = options.hive_ddl, \
hive_overwrite = options.hive_overwrite, \
mapred_job_priority = options.mapred_job_priority, \
remove_carriage_return = options.remove_carriage_return, \
quiet = options.quiet, \
debug = options.debug
)
# pass
if __name__ == '__main__':
main()