forked from chdb-io/chdb
-
Notifications
You must be signed in to change notification settings - Fork 0
/
to_pandas.py
31 lines (25 loc) · 884 Bytes
/
to_pandas.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
#!python3
import os
import pyarrow as pa
import chdb
# get current file dir
current_dir = os.path.dirname(os.path.abspath(__file__))
test_parquet = current_dir + "/../tests/data/alltypes_dictionary.parquet"
# run SQL on parquet file and return arrow format
res = chdb.query(f"select * from file('{test_parquet}', Parquet)", "Arrow")
print("\nresult from chdb:")
print(res.bytes())
def to_arrowTable(res):
# convert arrow format to arrow table
paTable = pa.RecordBatchFileReader(res.bytes()).read_all()
return paTable
def to_df(res):
# convert arrow format to arrow table
paTable = to_arrowTable(res)
# convert arrow table to pandas dataframe
return paTable.to_pandas(use_threads=True)
print("\nresult from chdb to pyarrow:")
print(to_arrowTable(res))
# convert arrow table to pandas dataframe
print("\nresult from chdb to pandas:")
print(to_df(res))