Skip to content

Commit

Permalink
black applied.
Browse files Browse the repository at this point in the history
  • Loading branch information
ypriverol committed Nov 30, 2024
1 parent 2b5cc7f commit 71784a6
Showing 1 changed file with 24 additions and 20 deletions.
44 changes: 24 additions & 20 deletions quantmsutils/mzml/mzml_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,8 +283,32 @@ def batch_write_bruker_d(file_name: str, output_path: str, batch_size: int = 100
"SELECT Value FROM GlobalMetadata WHERE key='AcquisitionDateTime'"
).fetchone()[0]

# Check which optional columns exist
columns = column_exists(conn, "frames")

# Get allowed columns from the schema
allowed_columns = {
'Id': 'Id',
'MsMsType': 'CASE WHEN MsMsType IN (8, 9) THEN 2 WHEN MsMsType = 0 THEN 1 ELSE NULL END',
'NumPeaks': 'NumPeaks',
'MaxIntensity': 'MaxIntensity',
'SummedIntensities': 'SummedIntensities',
'Time': 'Time',
'Charge': 'Charge',
'MonoisotopicMz': 'MonoisotopicMz'
}

# Construct safe column list
safe_columns = []
column_mapping = {}
for schema_col_name, sql_expr in allowed_columns.items():
if schema_col_name in columns or schema_col_name == 'Id':
safe_columns.append(sql_expr)
column_mapping[schema_col_name] = sql_expr

# Construct the query using parameterized safe columns
query = f"""SELECT {', '.join(safe_columns)} FROM frames"""

schema = pa.schema(
[
pa.field("Id", pa.int32(), nullable=False),
Expand All @@ -302,26 +326,6 @@ def batch_write_bruker_d(file_name: str, output_path: str, batch_size: int = 100
# Set up parquet writer
parquet_writer = pq.ParquetWriter(output_path, schema=schema, compression="gzip")

base_columns = [
"Id",
"CASE WHEN MsMsType IN (8, 9) THEN 2 WHEN MsMsType = 0 THEN 1 ELSE NULL END as MsMsType",
"NumPeaks",
"MaxIntensity",
"SummedIntensities",
"Time",
]

if "Charge" in columns:
base_columns.insert(-1, "Charge") # Add before the last column for logical flow

if "MonoisotopicMz" in columns:
base_columns.insert(-1, "MonoisotopicMz")

safe_columns = [
col for col in base_columns if col.replace(" ", "").isalnum()
] # Remove spaces
query = f"""SELECT {', '.join(safe_columns)} FROM frames """

try:
# Stream data in batches
for chunk in pd.read_sql_query(query, conn, chunksize=batch_size):
Expand Down

0 comments on commit 71784a6

Please sign in to comment.