featured.bib

@article{scheible_gottbert_2020,
 abstract = {Lately, pre-trained language models advanced the field of natural language processing (NLP). The introduction of Bidirectional Encoders for Transformers (BERT) and its optimized version RoBERTa have had significant impact and increased the relevance of pre-trained models. First, research in this field mainly started on English data followed by models trained with multilingual text corpora. However, current research shows that multilingual models are inferior to monolingual models. Currently, no German single language RoBERTa model is yet published, which we introduce in this work (GottBERT). The German portion of the OSCAR data set was used as text corpus. In an evaluation we compare its performance on the two Named Entity Recognition (NER) tasks Conll 2003 and GermEval 2014 as well as on the text classification tasks GermEval 2018 (fine and coarse) and GNAD with existing German single language BERT models and two multilingual ones. GottBERT was pre-trained related to the original RoBERTa model using fairseq. All downstream tasks were trained using hyperparameter presets taken from the benchmark of German BERT. The experiments were setup utilizing FARM. Performance was measured by the \$F_\1\\$ score. GottBERT was successfully pre-trained on a 256 core TPU pod using the RoBERTa BASE architecture. Even without extensive hyper-parameter optimization, in all NER and one text classification task, GottBERT already outperformed all other tested German and multilingual models. In order to support the German NLP field, we publish GottBERT under the AGPLv3 license.},
 author = {Scheible, Raphael and Thomczyk, Fabian and Tippmann, Patric and Jaravine, Victor and Boeker, Martin},
 date = {2020-12-03},
 eprint = {2012.02110},
 eprinttype = {arxiv},
 journaltitle = {arXiv:2012.02110 [cs]},
 keywords = {Computer Science - Computation and Language, Computer Science - Machine Learning},
 shorttitle = {GottBERT},
 title = {GottBERT: a pure German Language Model},
 url = {http://arxiv.org/abs/2012.02110}
}

@article{10.1093/jamiaopen/ooad068,
    author = {Scheible, Raphael and Thomczyk, Fabian and Blum, Marco and Rautenberg, Micha and Prunotto, Andrea and Yazijy, Suhail and Boeker, Martin},
    title = "{Integrating row level security in i2b2: segregation of medical records into data marts without data replication and synchronization}",
    journal = {JAMIA Open},
    volume = {6},
    number = {3},
    pages = {ooad068},
    year = {2023},
    month = {08},
    abstract = "{i2b2 offers the possibility to store biomedical data of different projects in subject oriented data marts of the data warehouse, which potentially requires data replication between different projects and also data synchronization in case of data changes. We present an approach that can save this effort and assess its query performance in a case study that reflects real-world scenarios.For data segregation, we used PostgreSQL’s row level security (RLS) feature, the unit test framework pgTAP for validation and testing as well as the i2b2 application. No change of the i2b2 code was required. Instead, to leverage orchestration and deployment, we additionally implemented a command line interface (CLI). We evaluated performance using 3 different queries generated by i2b2, which we performed on an enlarged Harvard demo dataset.We introduce the open source Python CLI i2b2rls, which orchestrates and manages security roles to implement data marts so that they do not need to be replicated and synchronized as different i2b2 projects. Our evaluation showed that our approach is on average 3.55 and on median 2.71 times slower compared to classic i2b2 data marts, but has more flexibility and easier setup.The RLS-based approach is particularly useful in a scenario with many projects, where data is constantly updated, user and group requirements change frequently or complex user authorization requirements have to be defined. The approach applies to both the i2b2 interface and direct database access.Today, data warehouses form the foundation of modern data science in all areas where insights are gained from data. In the medical and healthcare sector, these warehouses require data access rules. i2b2 is a prominent and widespread data warehouse software providing project specific data segregation and thus access management. However, i2b2’s current approach often involves data replication and synchronization, leading to significant effort. Based on the main components shipped within i2b2 and their features, we present a command line tool which orchestrates a new data segregation approach without the requirement of data replication which we released under MIT open source license. Additionally, we evaluated our approach’s performance using 3 different queries on a real-world warehouse scenario based on a pre-existent enlarged demo dataset of i2b2. Our evaluation showed that our approach is on average 3.55 and on median 2.71 times slower compared to classic i2b2 approach but has more flexibility and easier setup. Our approach is particularly useful in a scenario with many projects, where data is constantly updated, user and group requirements change frequently or complex user authorization requirements have to be defined. The approach applies to both the i2b2 interface and direct database access.}",
    issn = {2574-2531},
    doi = {10.1093/jamiaopen/ooad068},
    url = {https://doi.org/10.1093/jamiaopen/ooad068},
}

@inproceedings{scheible_fhir_2023,
	title = {{FHIR} {DataProvider} for {ReactAdmin}: Leveraging User Interface Creation for Medical Web Applications},
	url = {https://ebooks.iospress.nl/doi/10.3233/SHTI230436},
	shorttitle = {{FHIR} {DataProvider} for {ReactAdmin}},
	pages = {110--114},
	booktitle = {Healthcare Transformation with Informatics and Artificial Intelligence},
	publisher = {{IOS} Press},
	author = {Scheible, Raphael and Alkier, David and Wendroth, Justus and Mayer, Julian and Boeker, Martin},
	urldate = {2023-07-03},
	date = {2023},
	doi = {10.3233/SHTI230436},
	file = {Full Text PDF:C\:\\Users\\scheible\\Zotero\\storage\\LN69PWWR\\Scheible et al. - 2023 - FHIR DataProvider for ReactAdmin Leveraging User .pdf:application/pdf},
}

@article{metzger_miracum-pipe_2023,
	title = {{MIRACUM}-Pipe: An Adaptable Pipeline for Next-Generation Sequencing Analysis, Reporting, and Visualization for Clinical Decision Making},
	volume = {15},
	rights = {http://creativecommons.org/licenses/by/3.0/},
	issn = {2072-6694},
	url = {https://www.mdpi.com/2072-6694/15/13/3456},
	doi = {10.3390/cancers15133456},
	shorttitle = {{MIRACUM}-Pipe},
	abstract = {(1) Background: Next-generation sequencing ({NGS}) of patients with advanced tumors is becoming an established method in Molecular Tumor Boards. However, somatic variant detection, interpretation, and report generation, require in-depth knowledge of both bioinformatics and oncology. (2) Methods: {MIRACUM}-Pipe combines many individual tools into a seamless workflow for comprehensive analyses and annotation of {NGS} data including quality control, alignment, variant calling, copy number variation estimation, evaluation of complex biomarkers, and {RNA} fusion detection. (3) Results: {MIRACUM}-Pipe offers an easy-to-use, one-prompt standardized solution to analyze {NGS} data, including quality control, variant calling, copy number estimation, annotation, visualization, and report generation. (4) Conclusions: {MIRACUM}-Pipe, a versatile pipeline for {NGS}, can be customized according to bioinformatics and clinical needs and to support clinical decision-making with visual processing and interactive reporting.},
	pages = {3456},
	number = {13},
	journaltitle = {Cancers},
	author = {Metzger, Patrick and Hess, Maria Elena and Blaumeiser, Andreas and Pauli, Thomas and Schipperges, Vincent and Mertes, Ralf and Christoph, Jan and Unberath, Philipp and Reimer, Niklas and Scheible, Raphael and Illert, Anna L. and Busch, Hauke and Andrieux, Geoffroy and Boerries, Melanie},
	urldate = {2023-07-01},
	date = {2023-01},
	langid = {english},
	note = {Number: 13
Publisher: Multidisciplinary Digital Publishing Institute},
	keywords = {bioinformatics, computational biology, molecular tumor board, next-generation sequencing, pipeline, precision oncology, software, somatic variant calling, workflow},
	file = {Full Text PDF:C\:\\Users\\scheible\\Zotero\\storage\\DLEYMIQ5\\Metzger et al. - 2023 - MIRACUM-Pipe An Adaptable Pipeline for Next-Gener.pdf:application/pdf},
}

@article{scheible_enabling_2020,
 abstract = {Background:  The German Network on Primary Immunodeficiency Diseases (PID-NET) utilizes the European Society for Immunodeficiencies (ESID) registry as a platform for collecting data. In the context of PID-NET data, we show how registries based on custom software can be made interoperable for better collaborative access to precollected data.  The Open Source Registry System for Rare Diseases ( Open-Source-Registersystem für Seltene Erkrankungen  [OSSE], in German) provides patient organizations, physicians, scientists, and other parties with open source software for the creation of patient registries. In addition, the necessary interoperability between different registries based on the OSSE, as well as existing registries, is supported, which allows those registries to be confederated at both the national and international levels.
Objective:  Data from the PID-NET registry should be made available in an interoperable manner without losing data sovereignty by extending the existing custom software of the registry using the OSSE registry framework.
Methods:  This paper describes the following: (1) the installation and configuration of the OSSE bridgehead, (2) an approach using a free toolchain to set up the required interfaces to connect a registry with the OSSE bridgehead, and (3) the decentralized search,  which allows the formulation of inquiries that are sent to a selected set of registries of interest.
Results:  PID-NET uses the established and highly customized ESID registry software. By setting up a so-called OSSE bridgehead, PID-NET data are made interoperable according to a federated approach, and centrally formulated inquiries for data can be received. As the first registry to use the OSSE bridgehead, the authors introduce an approach using a free toolchain to efficiently implement and maintain the required interfaces. Finally, to test and demonstrate the system, two inquiries are realized using the graphical query builder. By establishing and interconnecting an OSSE bridgehead with the underlying ESID registry, confederated queries for data can be received and, if desired, the inquirer can be contacted to further discuss any requirements for cooperation.
Conclusions:  The OSSE offers an infrastructure that provides the possibility of more collaborative and transparent research. The decentralized search functionality includes registries into one search application while still maintaining data sovereignty. The OSSE bridgehead enables any registry software to be integrated into the OSSE network. The proposed toolchain to set up the required interfaces consists of freely available software components that are well documented. The use of the decentralized search is uncomplicated to use and offers a well-structured, yet still improvable, graphical user interface to formulate queries.
[JMIR Med Inform 2020;8(10):e17420]},
 author = {Scheible, Raphael and Kadioglu, Dennis and Ehl, Stephan and Blum, Marco and Boeker, Martin and Folz, Michael and Grimbacher, Bodo and Göbel, Jens and Klein, Christoph and Nieters, Alexandra and Rusch, Stephan and Kindle, Gerhard and Storf, Holger},
 date = {2020},
 doi = {10.2196/17420},
 journaltitle = {JMIR Medical Informatics},
 langid = {english},
 note = {Company: JMIR Medical Informatics
Distributor: JMIR Medical Informatics
Institution: JMIR Medical Informatics
Label: JMIR Medical Informatics
Publisher: JMIR Publications Inc., Toronto, Canada},
 number = {10},
 pages = {e17420},
 rights = {Unless stated otherwise, all articles are open-access distributed under the terms of the Creative Commons Attribution License (http://creativecommons.org/licenses/by/2.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work (},
 shorttitle = {Enabling External Inquiries to an Existing Patient Registry by Using the Open Source Registry System for Rare Diseases},
 title = {Enabling External Inquiries to an Existing Patient Registry by Using the Open Source Registry System for Rare Diseases: Demonstration of the System Using the European Society for Immunodeficiencies Registry},
 url = {https://medinform.jmir.org/2020/10/e17420/},
 volume = {8}
}