Skip to content

Commit

Permalink
add filename validation
Browse files Browse the repository at this point in the history
  • Loading branch information
amakunin committed Aug 18, 2023
1 parent 5946725 commit 6eb7f13
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 3 deletions.
6 changes: 4 additions & 2 deletions work/validate_bioscan.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@
"\n",
" # orange cols\n",
" validate_regex('CATCH_LOT', df, na_values=[])\n",
" df, gal = validate_plates_wells(df, contrib_df, 'RACK_OR_PLATE_ID', 'TUBE_OR_WELL_ID', bioscan=True)\n",
" df, gal, partner_code = validate_plates_wells(\n",
" df, contrib_df, 'RACK_OR_PLATE_ID', 'TUBE_OR_WELL_ID', bioscan=True)\n",
" # check and exclude blanks\n",
" is_blank = check_blanks(df)\n",
" if df[~is_blank].shape[0] == 0:\n",
Expand Down Expand Up @@ -106,6 +107,7 @@
" validate_freetext('OTHER_INFORMATION', df)\n",
" validate_freetext('MISC_METADATA', df)\n",
" validate_identifier('IDENTIFIED_BY', df, contrib_df, na_values=[''])\n",
" validate_input_filename(fn, partner_code, v)\n",
" \n",
" df = expand_plate_only(df)\n",
" df = add_sts_cols(df, contrib_df, gal, bioscan=True, v=v)\n",
Expand Down Expand Up @@ -166,7 +168,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "9259b231",
"id": "5ae435ce",
"metadata": {},
"outputs": [],
"source": []
Expand Down
29 changes: 28 additions & 1 deletion work/validate_partner_manifest_dev.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -489,6 +489,7 @@
" else:\n",
" # anospp\n",
" unknown_prefixes = (~plate_prefixes.isin(contrib_df['PARTNER_CODE']))\n",
" selected_partner_code = 'SANG'\n",
" gal = \"Sanger Institute\"\n",
" if unknown_prefixes.any():\n",
" logging.error(f'plate ID prefixes not recognised for {plates[unknown_prefixes].to_list()}')\n",
Expand Down Expand Up @@ -532,7 +533,7 @@
" logging.info(f'{df.shape[0]} samples found across {df[plate_col].nunique()} plates')\n",
" \n",
"\n",
" return df, gal\n",
" return df, gal, selected_partner_code\n",
" \n",
"# df = validate_plates_wells(df, contrib_df, 'RACK_OR_PLATE_ID', 'TUBE_OR_WELL_ID')"
]
Expand Down Expand Up @@ -1144,6 +1145,25 @@
"# validate_freetext('IDENTIFIED_HOW', df)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def validate_input_filename(input_fn, partner_code, bioscan_version):\n",
" \n",
" # ABCD_YYMM_\n",
" v = bioscan_version.strip('v')\n",
" fn_regex = f'^{partner_code}_(2[0-3])(0[1-9]|1[0-2])_BIOSCAN_Manifest_V{v}.*xlsx$'\n",
" \n",
" fn_basename = os.path.basename(input_fn)\n",
" \n",
" if not re.match(fn_regex, fn_basename):\n",
" logging.warning(f'input filename {fn_basename} does not match '\n",
" f'{partner_code}_YYMM_BIOSCAN_Manifest_V{v}*xlsx')"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand Down Expand Up @@ -1195,6 +1215,13 @@
"# add_sts_cols(df, contrib_df, gal='Sanger Institute');"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
Expand Down

0 comments on commit 6eb7f13

Please sign in to comment.