Skip to content

Commit

Permalink
Write a JSON file of GO-CAM info for Chado loading
Browse files Browse the repository at this point in the history
We now parse the GO-CAM JSON files and pull out the data needed for
Chado into a single JSON file.

Refs #1174
  • Loading branch information
kimrutherford committed Jan 9, 2025
1 parent 15e53dd commit a52564e
Showing 1 changed file with 37 additions and 10 deletions.
47 changes: 37 additions & 10 deletions etc/generate_gocam_data_files.pl
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,10 @@

my $gene_mapping_filename = shift;
my $term_mapping_filename = shift;
my $go_cam_json_filename = shift;
my $model_directory = shift;

my $ua = LWP::UserAgent->new();
my $ua = LWP::UserAgent->new(keep_alive => 1);

my $request = HTTP::Request->new(GET => "https://live-go-cam.geneontology.io/product/json/provider-to-model.json");
$request->header("user-agent" => "Evil");
Expand All @@ -37,7 +38,7 @@
die "no contents\n";
}

my $metadata_result = decode_json $contents;;
my $metadata_result = decode_json $contents;

my %all_details = ();

Expand Down Expand Up @@ -92,11 +93,9 @@ sub get_process_terms_and_genes

my $term_count = 0;

my @api_failed_ids = ();
my @failed_ids = ();

for my $gocam_id (keys %all_details) {
my $model_title;

print "requesting details of $gocam_id from API\n";

$request = HTTP::Request->new(GET => "https://live-go-cam.geneontology.io/product/json/low-level/$gocam_id.json");
Expand All @@ -106,7 +105,7 @@ sub get_process_terms_and_genes

if (!$response->is_success()) {
print " request failed: ", $response->status_line(), " - skipping\n";
push @api_failed_ids, $gocam_id;
push @failed_ids, $gocam_id;
next;
}

Expand All @@ -121,27 +120,51 @@ sub get_process_terms_and_genes

my $api_model = decode_json $content;

my %model_annotations = ();

map {
if ($_->{key} && $_->{key} eq 'title') {
$model_title = $_->{value};
}
push @{$model_annotations{$_->{key}}}, $_->{value};
} @{$api_model->{annotations} // []};

my $model_title = undef;

if (exists $model_annotations{title}) {
$model_title = $model_annotations{title}->[0];
}

my @contributors = ();

if (exists $model_annotations{contributor}) {
@contributors = map {
s|.*orcid.org/||;
$_
} @{$model_annotations{contributor}};
}

if ($model_title) {
$model_title =~ s/\n/ /g;
$model_title =~ s/[\t ]+/ /g;
$model_title =~ s/^\s+//;
$model_title =~ s/\s+$//;
$all_details{$gocam_id}->{title} = $model_title;
}

my ($process_terms, $genes) = get_process_terms_and_genes($api_model);

if (!@$genes) {
print "$gocam_id has no pombe genes, skipping\n";
push @failed_ids, $gocam_id;
next;
}

$term_count += scalar(@$process_terms);

$all_details{$gocam_id}->{process_terms} = $process_terms;
$all_details{$gocam_id}->{genes} = $genes;
$all_details{$gocam_id}->{contributors} = \@contributors;
}

for my $gocam_id (@api_failed_ids) {
for my $gocam_id (@failed_ids) {
delete $all_details{$gocam_id};
}

Expand Down Expand Up @@ -177,3 +200,7 @@ sub get_process_terms_and_genes

close $gene_output_file;
close $term_output_file;

open my $go_cam_json_file, '>', $go_cam_json_filename or die;
print $go_cam_json_file encode_json \%all_details, "\n";
close $go_cam_json_file;

0 comments on commit a52564e

Please sign in to comment.