Skip to content

Commit

Permalink
Reflecting tables with sqlalchemy populates table indices with partit…
Browse files Browse the repository at this point in the history
…ions like PyHive does
  • Loading branch information
Ivan Korhner committed Dec 11, 2016
1 parent c0b06cf commit 283aed4
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 3 deletions.
24 changes: 21 additions & 3 deletions impala/sqlalchemy.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,9 +182,27 @@ def get_foreign_keys(self, connection, table_name, schema=None, **kw):
return []

def get_indexes(self, connection, table_name, schema=None, **kw):
# no indexes in impala
# TODO(laserson): handle partitions, like in PyHive
return []
name = table_name
if schema is not None:
name = '%s.%s' % (schema, name)
query = 'DESCRIBE FORMATTED %s' % name
cursor = connection.execute(query)
rows = cursor.fetchall()
# Strip whitespace
rows = [[col.strip() if col else None for col in row] for row in rows]
# Filter out empty rows and comment
rows = [row for row in rows if row[0]]
for i, (col_name, _col_type, _comment) in enumerate(rows):
if col_name == '# Partition Information':
break
# Handle partition columns
col_names = []
for col_name, _col_type, _comment in rows[i + 1:]:
col_names.append(col_name)
if col_names:
return [{'name': 'partition', 'column_names': col_names, 'unique': False}]
else:
return []

def do_rollback(self, dbapi_connection):
# no transactions in impala
Expand Down
36 changes: 36 additions & 0 deletions impala/tests/test_sqlalchemy.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,42 @@

from impala.sqlalchemy import STRING, INT, DOUBLE, TINYINT

def table_metadata_from_ddl_template(con, ddl, table_name):
"""
Helper for loading table metadata from ddl create table.
"""
cur = con.cursor()
cur.execute(ddl.format(table=table_name))
cur.close()
engine = create_engine('impala://', creator=lambda x: con)
metadata = MetaData()
return Table(table_name, metadata, autoload=True, autoload_with=engine)

def test_no_partitions_no_indexes(con):
"""
Assert that table with no partitions contains no indices.
"""
ddl = 'CREATE TABLE {table} (a STRING)'
table = table_metadata_from_ddl_template(con, ddl, 'no_partitions')
assert len(table.indexes) == 0

def test_one_partitions_indexes(con):
"""
Assert that table with one partition has one index with one column.
"""
ddl = 'CREATE TABLE {table} (a STRING) PARTITIONED BY (b INT);'
table = table_metadata_from_ddl_template(con, ddl, 'one_partition')
assert len(table.indexes) == 1
assert str(list(table.indexes)[0].columns) == "['one_partition.b']"

def test_two_partitions_indexes(con):
"""
Assert that table with two partitions has one index with two columns.
"""
ddl = 'CREATE TABLE {table} (a STRING) PARTITIONED BY (b INT, c INT);'
table = table_metadata_from_ddl_template(con, ddl, 'two_partitions')
assert len(table.indexes) == 1
assert str(list(table.indexes)[0].columns) == "['two_partitions.b', 'two_partitions.c']"

def test_sqlalchemy_compilation():
engine = create_engine('impala://localhost')
Expand Down

0 comments on commit 283aed4

Please sign in to comment.