Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/COG-UK/civet
Browse files Browse the repository at this point in the history
  • Loading branch information
aineniamh committed Sep 15, 2020
2 parents 1e816b0 + 63656ce commit 8b1738b
Show file tree
Hide file tree
Showing 16 changed files with 520 additions and 184 deletions.
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,9 @@ optional arguments:
--label-fields LABEL_FIELDS
Comma separated string of fields to add to tree report
labels.
--date-fields DATE_FIELDS
Comma separated string of date information to include.
Default = "sample_date"
--node-summary COLUMN Choose which metadata column to summarise collapsed nodes by.
--search-field SEARCH_FIELD
Option to search COG database for a different id type.
Expand Down
65 changes: 45 additions & 20 deletions civet/command.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ def main(sysargs = sys.argv[1:]):
parser.add_argument('--fields', action="store",help="Comma separated string of fields to display in the trees in the report. Default: country")
parser.add_argument('--display', action="store", help="Comma separated string of fields to display as coloured dots rather than text in report trees. Optionally add colour scheme eg adm1=viridis", dest="display")
parser.add_argument('--label-fields', action="store", help="Comma separated string of fields to add to tree report labels.", dest="label_fields")
parser.add_argument("--date-fields", action="store", help="Comma separated string of metadata headers containing date information.", dest="date_fields")
parser.add_argument("--node-summary", action="store", help="Column to summarise collapsed nodes by. Default = Global lineage", dest="node_summary")
parser.add_argument('--search-field', action="store",help="Option to search COG database for a different id type. Default: COG-UK ID", dest="search_field",default="central_sample_id")
parser.add_argument('--distance', action="store",help="Extraction from large tree radius. Default: 2", dest="distance",default=2)
Expand All @@ -68,9 +69,8 @@ def main(sysargs = sys.argv[1:]):
parser.add_argument('--date-window',action="store",default=7, type=int, dest="date_window",help="Define the window +- either side of cluster sample collection date-range. Default is 7 days.")
parser.add_argument("-v","--version", action='version', version=f"civet {__version__}")

parser.add_argument("--map-sequences", action="store_true", dest="map_sequences", help="Map the coordinate points of sequences, coloured by a triat.")
parser.add_argument("--x-col", required=False, dest="x_col", help="column containing x coordinate for mapping sequences")
parser.add_argument("--y-col", required=False, dest="y_col", help="column containing y coordinate for mapping sequences")
parser.add_argument("--map-sequences", action="store_true", dest="map_sequences", help="Map the coordinate points of sequences, coloured by a trait.")
parser.add_argument("--map-inputs", required=False, dest="map_inputs", help="columns containing EITHER x and y coordinates as a comma separated string OR outer postcodes for mapping sequences")
parser.add_argument("--input-crs", required=False, dest="input_crs", help="Coordinate reference system of sequence coordinates")
parser.add_argument("--mapping-trait", required=False, dest="mapping_trait", help="Column to colour mapped sequences by")

Expand All @@ -91,6 +91,9 @@ def main(sysargs = sys.argv[1:]):
'gnuplot', 'gnuplot2', 'CMRmap', 'cubehelix', 'brg',
'gist_rainbow', 'rainbow', 'jet', 'nipy_spectral', 'gist_ncar']

full_metadata_headers = ["central_sample_id", "biosample_source_id","sequence_name","secondary_identifier","sample_date","epi_week","country","adm1","adm2","outer_postcode","is_surveillance","is_community","is_hcw","is_travel_history","travel_history","lineage","lineage_support","uk_lineage","acc_lineage","del_lineage","phylotype"]


# Exit with help menu if no args supplied
if len(sysargs)<1:
parser.print_help()
Expand Down Expand Up @@ -178,6 +181,7 @@ def main(sysargs = sys.argv[1:]):
labels = []
queries = []
graphics_list = []
date_lst = []

with open(query, newline="") as f:
reader = csv.DictReader(f)
Expand All @@ -192,10 +196,10 @@ def main(sysargs = sys.argv[1:]):
else:
desired_fields = args.fields.split(",")
for field in desired_fields:
if field in reader.fieldnames:
if field in reader.fieldnames or field in full_metadata_headers:
fields.append(field)
else:
sys.stderr.write(f"Error: {field} field not found in metadata file")
sys.stderr.write(f"Error: {field} field not found in metadata file or full metadata file")
sys.exit(-1)


Expand All @@ -204,10 +208,21 @@ def main(sysargs = sys.argv[1:]):
else:
label_fields = args.label_fields.split(",")
for label_f in label_fields:
if label_f in reader.fieldnames:
if label_f in reader.fieldnames or label_f in full_metadata_headers:
labels.append(label_f)
else:
sys.stderr.write(f"Error: {label_f} field not found in metadata file")
sys.stderr.write(f"Error: {label_f} field not found in metadata file or full metadata file")
sys.exit(-1)

if not args.date_fields:
date_lst.append("NONE")
else:
date_fields = args.date_fields.split(",")
for date_f in date_fields:
if date_f in reader.fieldnames or date_f in full_metadata_headers:
date_lst.append(date_f)
else:
sys.stderr.write(f"Error: {date_f} field not found in query metadata file or full metadata file")
sys.exit(-1)

if args.display:
Expand Down Expand Up @@ -251,6 +266,7 @@ def main(sysargs = sys.argv[1:]):
"query":query,
"fields":",".join(fields),
"label_fields":",".join(labels),
"date_fields":",".join(date_lst),
"outdir":outdir,
"tempdir":tempdir,
"trim_start":265, # where to pad to using datafunk
Expand All @@ -269,26 +285,34 @@ def main(sysargs = sys.argv[1:]):
config["add_boxplots"]= True
else:
config["add_boxplots"]= False

if args.map_sequences:
config["map_sequences"] = True
if not args.x_col or not args.y_col:
sys.stderr.write('Error: coordinates not supplied for mapping sequences. Please provide --x-col and --y-col')
sys.exit(-1)
elif not args.input_crs:
sys.stderr.write('Error: input coordinate system not provided for mapping. Please provide --input-crs eg EPSG:3395')
if not args.map_inputs:
sys.stderr.write('Error: coordinates or outer postcode not supplied for mapping sequences. Please provide either x and y columns as a comma separated string, or column header containing outer postcode.')
sys.exit(-1)
if len(args.map_inputs.split(",")) == 2:
if not args.input_crs:
sys.stderr.write('Error: input coordinate system not provided for mapping. Please provide --input-crs eg EPSG:3395')
sys.exit(-1)
else:
input_crs = args.input_crs
else:
config["x_col"] = args.x_col
config["y_col"] = args.y_col
config["input_crs"] = args.input_crs
input_crs = "EPSG:4326"

config["map_cols"] = args.map_inputs
config["input_crs"] = input_crs

relevant_cols = []
with open(query, newline="") as f:
reader = csv.DictReader(f)
column_names = reader.fieldnames
relevant_cols = [args.x_col, args.y_col, args.mapping_trait]
map_cols = args.map_inputs.split(",")
for i in map_cols:
relevant_cols.append(i)
relevant_cols.append(args.mapping_trait)

for map_arg in relevant_cols:

if map_arg and map_arg not in reader.fieldnames:
sys.stderr.write(f"Error: {map_arg} field not found in metadata file")
sys.exit(-1)
Expand All @@ -300,8 +324,7 @@ def main(sysargs = sys.argv[1:]):

else:
config["map_sequences"] = False
config["x_col"] = False
config["y_col"] = False
config["map_cols"] = False
config["input_crs"] = False
config["mapping_trait"] = False

Expand Down Expand Up @@ -516,6 +539,7 @@ def main(sysargs = sys.argv[1:]):
map_input_3 = pkg_resources.resource_filename('civet', 'data/mapping_files/NI_counties.geojson')
map_input_4 = pkg_resources.resource_filename('civet', 'data/mapping_files/Mainland_HBs_gapclosed_mapshaped_d3.json')
map_input_5 = pkg_resources.resource_filename('civet', 'data/mapping_files/urban_areas_UK.geojson')
map_input_6 = pkg_resources.resource_filename('civet', 'data/mapping_files/UK_outPC_coords.csv')
spatial_translations_1 = pkg_resources.resource_filename('civet', 'data/mapping_files/HB_Translation.pkl')
spatial_translations_2 = pkg_resources.resource_filename('civet', 'data/mapping_files/adm2_regions_to_coords.csv')

Expand Down Expand Up @@ -555,6 +579,7 @@ def main(sysargs = sys.argv[1:]):
config["ni_map"] = map_input_3
config["uk_map_d3"] = map_input_4
config["urban_centres"] = map_input_5
config["pc_file"] = map_input_6
config["HB_translations"] = spatial_translations_1
config["PC_translations"] = spatial_translations_2

Expand Down
2 changes: 1 addition & 1 deletion civet/scripts/COG_template.pmd
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ else:
Their location is shown on the following map, with dots sized by the number of sequences from each location.

```python, name="make_map", echo=False, include=False, results='tex'
map = mapping.run_map_functions(query_dict, clean_locs_file, mapping_json_files)
map = mapping.map_adm2(query_dict, clean_locs_file, mapping_json_files)
```

```python, name="show_map", echo=False, results='raw'
Expand Down
21 changes: 12 additions & 9 deletions civet/scripts/assess_input_file.smk
Original file line number Diff line number Diff line change
Expand Up @@ -285,16 +285,17 @@ rule find_snps:
tempdir= config["tempdir"],
path = workflow.current_basedir,
threshold = config["threshold"],

fasta = config["fasta"],
tree_dir = os.path.join(config["outdir"],"local_trees"),

cores = workflow.cores,
force = config["force"],
quiet_mode = config["quiet_mode"]

output:
genome_graph = os.path.join(config["outdir"],"figures","genome_graph.png"),
report = os.path.join(config["outdir"],"snp_reports","snp_reports.txt")
genome_graphs = os.path.join(config["outdir"],"snp_reports","tree_subtree_1.snps.txt"), #this obviously isn't ideal because it's not flexible to name stem changes
reports = os.path.join(config["outdir"],"figures","genome_graph_tree_subtree_1.png")
run:
local_trees = []
for r,d,f in os.walk(params.tree_dir):
Expand Down Expand Up @@ -410,9 +411,10 @@ rule make_report:
uk_map = config["uk_map"],
channels_map = config["channels_map"],
ni_map = config["ni_map"],
pc_file = config["pc_file"],
urban_centres = config["urban_centres"],
genome_graph = rules.find_snps.output.genome_graph,
snp_report = rules.find_snps.output.report,
genome_graph = rules.find_snps.output.genome_graphs, #do these two arguments need to be here?
snp_report = rules.find_snps.output.reports,
central = os.path.join(config["outdir"], 'figures', "central_map_ukLin.png"),
neighboring = os.path.join(config["outdir"], 'figures', "neighboring_map_ukLin.png"),
region = os.path.join(config["outdir"], 'figures', "region_map_ukLin.png")
Expand All @@ -421,6 +423,7 @@ rule make_report:
outdir = config["rel_outdir"],
fields = config["fields"],
label_fields = config["label_fields"],
date_fields = config["date_fields"],
node_summary = config["node_summary"],
sc_source = config["sequencing_centre"],
sc = config["sequencing_centre_file"],
Expand All @@ -430,8 +433,7 @@ rule make_report:
figdir = os.path.join(config["outdir"],"figures"),
failure = config["qc_fail_report"],
map_sequences = config["map_sequences"],
x_col = config["x_col"],
y_col = config["y_col"],
map_cols = config["map_cols"],
input_crs = config["input_crs"],
mapping_trait = config["mapping_trait"],
add_boxplots = config["add_boxplots"],
Expand Down Expand Up @@ -473,6 +475,7 @@ rule make_report:
"-f {params.fields:q} "
"--graphic_dict {params.graphic_dict:q} "
"--label-fields {params.label_fields:q} "
"--date-fields {params.date_fields:q} "
"--node-summary {params.node_summary} "
"--figdir {params.rel_figdir:q} "
"{params.sc_flag} "
Expand All @@ -485,12 +488,12 @@ rule make_report:
"--uk-map {input.uk_map:q} "
"--channels-map {input.channels_map:q} "
"--ni-map {input.ni_map:q} "
"--pc-file {input.pc_file:q} "
"--outfile {output.outfile:q} "
"--outdir {params.outdir:q} "
"--map-sequences {params.map_sequences} "
"--snp-report {input.snp_report:q} "
"--x-col {params.x_col} "
"--y-col {params.y_col} "
"--map-cols {params.map_cols} "
"--input-crs {params.input_crs} "
"--mapping-trait {params.mapping_trait} "
"--urban-centres {input.urban_centres} "
Expand Down
Loading

0 comments on commit 8b1738b

Please sign in to comment.