Skip to content

Commit

Permalink
Merge branch 'main' of github.com:DIAGNijmegen/Literature
Browse files Browse the repository at this point in the history
  • Loading branch information
keelinm committed Nov 7, 2023
2 parents 3bdf9a8 + 552bc5d commit 3002ae3
Showing 1 changed file with 196 additions and 0 deletions.
196 changes: 196 additions & 0 deletions scripts/MergeChanges.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "d377a755",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import os\n",
"from bib_handling_code.processbib import read_bibfile\n",
"from bib_handling_code.processbib import save_to_file"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f6cf2984",
"metadata": {},
"outputs": [],
"source": [
"def from_bib_to_csv(diag_bib_raw):\n",
" bib_data = []\n",
" bib_columns = ['bibkey', 'type', 'title', 'authors', 'doi', 'gs_citations', 'journal', 'year', 'all_ss_ids', 'pmid']\n",
" \n",
" for bib_entry in diag_bib_raw:\n",
" if bib_entry.type == 'string':\n",
" continue\n",
"\n",
" bibkey = bib_entry.key\n",
" bib_type = bib_entry.type\n",
" fields = bib_entry.fields\n",
" \n",
" bib_authors = fields.get('author', '').strip('{}')\n",
" bib_title = fields.get('title', '').strip('{}')\n",
" bib_doi = fields.get('doi', '').strip('{}')\n",
" bib_gscites = fields.get('gscites', '').strip('{}')\n",
" bib_journal = fields.get('journal', '').strip('{}')\n",
" bib_year = fields.get('year', '').strip('{}')\n",
" bib_all_ss_ids = fields.get('all_ss_ids', '').strip('{}')\n",
" bib_pmid = fields.get('pmid', '').strip('{}')\n",
" \n",
" bib_data.append([bibkey, bib_type, bib_title, bib_authors, bib_doi, bib_gscites, bib_journal, bib_year, bib_all_ss_ids, bib_pmid])\n",
"\n",
" df_bib_data = pd.DataFrame(bib_data, columns=bib_columns)\n",
" return df_bib_data"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5b5596e1",
"metadata": {},
"outputs": [],
"source": [
"path_diag_bib = os.path.join('..', 'diag.bib')\n",
"diag_bib_raw = read_bibfile(None, path_diag_bib)\n",
"bibfile = from_bib_to_csv(diag_bib_raw)\n",
"path_new_bib = os.path.join('script_data/', 'diag_ss_new.bib')\n",
"updated_bib_raw = read_bibfile(None, path_new_bib)\n",
"newbibfile = from_bib_to_csv(updated_bib_raw)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "98489bba",
"metadata": {},
"outputs": [],
"source": [
"new_bibkeys = newbibfile['bibkey'].tolist()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2374dcc9",
"metadata": {},
"outputs": [],
"source": [
"old_bibkeys = bibfile['bibkey'].tolist()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f38e2c4c",
"metadata": {},
"outputs": [],
"source": [
"def get_entry(bibfile, bibkey_toupdate):\n",
" for entry in bibfile:\n",
" bibkey = entry.key\n",
" if bibkey == bibkey_toupdate:\n",
" return entry"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "366debbe",
"metadata": {},
"outputs": [],
"source": [
"# Update existing bibitems\n",
"for entry in diag_bib_raw:\n",
" if entry.type == 'string':\n",
" continue\n",
" bibkey = entry.key\n",
" if bibkey in new_bibkeys:\n",
" corresponding_entry = get_entry(updated_bib_raw, bibkey)\n",
" if 'all_ss_ids' in corresponding_entry.fields:\n",
" entry.fields['all_ss_ids'] = corresponding_entry.fields['all_ss_ids']\n",
" if 'pmid' in corresponding_entry.fields:\n",
" entry.fields['pmid'] = corresponding_entry.fields['pmid']"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "21522e57",
"metadata": {},
"outputs": [],
"source": [
"bibkeys_toadd = set(new_bibkeys)-set(old_bibkeys)\n",
"df_to_add = newbibfile[newbibfile['bibkey'].isin(bibkeys_toadd)]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3c023620",
"metadata": {},
"outputs": [],
"source": [
"bibkeys_toadd"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "82d6aeeb",
"metadata": {},
"outputs": [],
"source": [
"for entry in updated_bib_raw:\n",
" bibkey = entry.key\n",
" if bibkey in bibkeys_toadd:\n",
" diag_bib_raw.append(entry)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6224df11",
"metadata": {},
"outputs": [],
"source": [
"csv=from_bib_to_csv(diag_bib_raw)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cf941ddf",
"metadata": {},
"outputs": [],
"source": [
"path_diag_bib = os.path.join('..', 'diag_latest_try.bib')\n",
"save_to_file(diag_bib_raw, path_diag_bib)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

0 comments on commit 3002ae3

Please sign in to comment.