diff --git a/impala/sqlalchemy.py b/impala/sqlalchemy.py index 2c9cbd997..ceed6e944 100644 --- a/impala/sqlalchemy.py +++ b/impala/sqlalchemy.py @@ -182,9 +182,27 @@ def get_foreign_keys(self, connection, table_name, schema=None, **kw): return [] def get_indexes(self, connection, table_name, schema=None, **kw): - # no indexes in impala - # TODO(laserson): handle partitions, like in PyHive - return [] + name = table_name + if schema is not None: + name = '%s.%s' % (schema, name) + query = 'DESCRIBE FORMATTED %s' % name + cursor = connection.execute(query) + rows = cursor.fetchall() + # Strip whitespace + rows = [[col.strip() if col else None for col in row] for row in rows] + # Filter out empty rows and comment + rows = [row for row in rows if row[0]] + for i, (col_name, _col_type, _comment) in enumerate(rows): + if col_name == '# Partition Information': + break + # Handle partition columns + col_names = [] + for col_name, _col_type, _comment in rows[i + 1:]: + col_names.append(col_name) + if col_names: + return [{'name': 'partition', 'column_names': col_names, 'unique': False}] + else: + return [] def do_rollback(self, dbapi_connection): # no transactions in impala diff --git a/impala/tests/test_sqlalchemy.py b/impala/tests/test_sqlalchemy.py index df2b446b4..d179ef5c1 100644 --- a/impala/tests/test_sqlalchemy.py +++ b/impala/tests/test_sqlalchemy.py @@ -20,6 +20,42 @@ from impala.sqlalchemy import STRING, INT, DOUBLE, TINYINT +def table_metadata_from_ddl_template(con, ddl, table_name): + """ + Helper for loading table metadata from ddl create table. + """ + cur = con.cursor() + cur.execute(ddl.format(table=table_name)) + cur.close() + engine = create_engine('impala://', creator=lambda x: con) + metadata = MetaData() + return Table(table_name, metadata, autoload=True, autoload_with=engine) + +def test_no_partitions_no_indexes(con): + """ + Assert that table with no partitions contains no indices. + """ + ddl = 'CREATE TABLE {table} (a STRING)' + table = table_metadata_from_ddl_template(con, ddl, 'no_partitions') + assert len(table.indexes) == 0 + +def test_one_partitions_indexes(con): + """ + Assert that table with one partition has one index with one column. + """ + ddl = 'CREATE TABLE {table} (a STRING) PARTITIONED BY (b INT);' + table = table_metadata_from_ddl_template(con, ddl, 'one_partition') + assert len(table.indexes) == 1 + assert str(list(table.indexes)[0].columns) == "['one_partition.b']" + +def test_two_partitions_indexes(con): + """ + Assert that table with two partitions has one index with two columns. + """ + ddl = 'CREATE TABLE {table} (a STRING) PARTITIONED BY (b INT, c INT);' + table = table_metadata_from_ddl_template(con, ddl, 'two_partitions') + assert len(table.indexes) == 1 + assert str(list(table.indexes)[0].columns) == "['two_partitions.b', 'two_partitions.c']" def test_sqlalchemy_compilation(): engine = create_engine('impala://localhost')