diff --git a/docs/nbs/tutorial_HLA_prediction.rst b/docs/nbs/tutorial_HLA_prediction.rst
deleted file mode 100644
index 544073bf..00000000
--- a/docs/nbs/tutorial_HLA_prediction.rst
+++ /dev/null
@@ -1,5 +0,0 @@
-Tutorial: HLA prediction
-==========================
-
-Check `HLA1_Classifier.ipynb <https://github.com/MannLabs/PeptDeep-HLA/blob/master/nbs/HLA1_Classifier.ipynb>`_
-in `PeptDeep-HLA <https://github.com/MannLabs/PeptDeep-HLA>`_ repo.
diff --git a/docs/notebooks.rst b/docs/notebooks.rst
index 1ba96c8a..701040d6 100644
--- a/docs/notebooks.rst
+++ b/docs/notebooks.rst
@@ -6,10 +6,10 @@ Tutorials and notebooks about how to use AlphaPeptDeep
 .. toctree::
    :maxdepth: 1
 
+   tutorials/tutorial_immunopeptidomics
    nbs/tutorial_models_from_scratch
    nbs/tutorial_speclib_from_fasta
    nbs/alphapeptdeep_hdf_to_tsv
-   nbs/tutorial_HLA_prediction
    nbs/tutorial_model_manager
    nbs/tutorial_building_rt_model
    nbs/tutorial_building_ccs_model
diff --git a/docs/tutorials/example.fasta b/docs/tutorials/example.fasta
new file mode 100644
index 00000000..5619e28a
--- /dev/null
+++ b/docs/tutorials/example.fasta
@@ -0,0 +1,9 @@
+>tr|A0A024R161|A0A024R161_HUMAN Guanine nucleotide-binding protein subunit gamma OS=Homo sapiens GN=DNAJC25-GNG10 PE=3 SV=1
+MGAPLLSPGWGAGAAGRRWWMLLAPLLPALLLVRPAGALVEGLYCGTRDCYEVLGVSRSA
+GKAEIARAYRQLARRYHPDRYRPQPGDEGPGRTPQSAEEAFLLVATAYETLKVSQAAAEL
+QQYCMQNACKDALLVGVPAGSNPFREPRSCALL
+>tr|A0A024RAP8|A0A024RAP8_HUMAN HCG2009644, isoform CRA_b OS=Homo sapiens GN=KLRC4-KLRK1 PE=4 SV=1
+MGWIRGRRSRHSWEMSEFHNYNLDLKKSDFSTRWQKQRCPVVKSKCRENASPFFFCCFIA
+VAMGIRFIIMVTIWSAVFLNSLFNQEVQIPLTESYCGPCPKNWICYKNNCYQFFDESKNW
+YESQASCMSQNASLLKVYSKEDQDLLKLVKSYHWMGLVHIPTNGSWQWEDGSILSPNLLT
+IIEMQKGDCALYASSFKGYIENCSTPNTYICMQRTV
diff --git a/docs/tutorials/tutorial_immunopeptidomics.ipynb b/docs/tutorials/tutorial_immunopeptidomics.ipynb
new file mode 100644
index 00000000..eb536a8a
--- /dev/null
+++ b/docs/tutorials/tutorial_immunopeptidomics.ipynb
@@ -0,0 +1,3758 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Using peptdeep for MHC class I immunopeptidomics"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This notebook introduces how to generate spectral libraries for immunopeptidomics analysis from a list of protein sequences. This entails several steps:\n",
+    "\n",
+    "1. unspecific digestion of protein sequences\n",
+    "2. selection of peptide sequences used for library prediction by peptdeep-hla predicition\n",
+    "   2.1 using the pretrained model\n",
+    "   2.2 using an improved model by including a transfer learning step\n",
+    "3. spectral library prediction\n",
+    "4. matching the peptides back to the proteins (this can be done before or after library prediction or seach)  \n",
+    "\n",
+    "\n",
+    "\n",
+    "Note that pydivsufsort package is not installed by peptdeep by default. Install by:\n",
+    "```\n",
+    "pip install \"peptdeep[development,hla]\"\n",
+    "```\n",
+    "\n",
+    "Or install within jupyter notebook:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Note: you may need to restart the kernel to use updated packages.\n"
+     ]
+    }
+   ],
+   "source": [
+    "%pip install -q pydivsufsort"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch # noqa: 401, to prevent crash in Mac Arm"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 1. Unspecific digestion in alphabase"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The unspecific digestion workflow uses the longest common prefix (LCP) algorithm, which is based on suffix array data structure, has been proven to be very efficient for unspecific digestion [https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-11-577]. Here we used `pydivsufsort`, a Python wrapper of a high-performance C library libdivsufsort [https://github.com/y-256/libdivsufsort], to facilitate LCP-based digestion.\n",
+    "\n",
+    "This means, the digestion is performed on a single sequence of strings and retrives both the peptide sequence as well as the start and stop indices of the peptide within the complete sequence. Therefore, unspecific digestion in alphabase involves two steps:\n",
+    "\n",
+    "1. concatenation of protein sequences into a single sequence\n",
+    "2. unspecific digestion\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### 1.1 Concatenate protein sequences into a single sequence\n",
+    "\n",
+    "The protein sequences are concatenated into a single sequence. The sequences are seperated by a sentinel character, in this case '$', so that no peptides across proteins are formed. Note that the first and last sentinel characters are crutial as well.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>protein_id</th>\n",
+       "      <th>full_name</th>\n",
+       "      <th>gene_name</th>\n",
+       "      <th>gene_org</th>\n",
+       "      <th>description</th>\n",
+       "      <th>sequence</th>\n",
+       "      <th>nAA</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>tr|A0A024R161|A0A024R161_HUMAN</th>\n",
+       "      <td>A0A024R161</td>\n",
+       "      <td>tr|A0A024R161|A0A024R161_HUMAN</td>\n",
+       "      <td>DNAJC25-GNG10</td>\n",
+       "      <td>A0A024R161_HUMAN</td>\n",
+       "      <td>tr|A0A024R161|A0A024R161_HUMAN Guanine nucleot...</td>\n",
+       "      <td>MGAPLLSPGWGAGAAGRRWWMLLAPLLPALLLVRPAGALVEGLYCG...</td>\n",
+       "      <td>153</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>tr|A0A024RAP8|A0A024RAP8_HUMAN</th>\n",
+       "      <td>A0A024RAP8</td>\n",
+       "      <td>tr|A0A024RAP8|A0A024RAP8_HUMAN</td>\n",
+       "      <td>KLRC4-KLRK1</td>\n",
+       "      <td>A0A024RAP8_HUMAN</td>\n",
+       "      <td>tr|A0A024RAP8|A0A024RAP8_HUMAN HCG2009644, iso...</td>\n",
+       "      <td>MGWIRGRRSRHSWEMSEFHNYNLDLKKSDFSTRWQKQRCPVVKSKC...</td>\n",
+       "      <td>216</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                protein_id                       full_name  \\\n",
+       "tr|A0A024R161|A0A024R161_HUMAN  A0A024R161  tr|A0A024R161|A0A024R161_HUMAN   \n",
+       "tr|A0A024RAP8|A0A024RAP8_HUMAN  A0A024RAP8  tr|A0A024RAP8|A0A024RAP8_HUMAN   \n",
+       "\n",
+       "                                    gene_name          gene_org  \\\n",
+       "tr|A0A024R161|A0A024R161_HUMAN  DNAJC25-GNG10  A0A024R161_HUMAN   \n",
+       "tr|A0A024RAP8|A0A024RAP8_HUMAN    KLRC4-KLRK1  A0A024RAP8_HUMAN   \n",
+       "\n",
+       "                                                                      description  \\\n",
+       "tr|A0A024R161|A0A024R161_HUMAN  tr|A0A024R161|A0A024R161_HUMAN Guanine nucleot...   \n",
+       "tr|A0A024RAP8|A0A024RAP8_HUMAN  tr|A0A024RAP8|A0A024RAP8_HUMAN HCG2009644, iso...   \n",
+       "\n",
+       "                                                                         sequence  \\\n",
+       "tr|A0A024R161|A0A024R161_HUMAN  MGAPLLSPGWGAGAAGRRWWMLLAPLLPALLLVRPAGALVEGLYCG...   \n",
+       "tr|A0A024RAP8|A0A024RAP8_HUMAN  MGWIRGRRSRHSWEMSEFHNYNLDLKKSDFSTRWQKQRCPVVKSKC...   \n",
+       "\n",
+       "                                nAA  \n",
+       "tr|A0A024R161|A0A024R161_HUMAN  153  \n",
+       "tr|A0A024RAP8|A0A024RAP8_HUMAN  216  "
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from peptdeep.hla.hla_utils import load_prot_df\n",
+    "fasta_path = \"example.fasta\"\n",
+    "protein_df = load_prot_df(fasta_path)\n",
+    "protein_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'$MGAPLLSPGWGAGAAGRRWWMLLAPLLPALLLVRPAGALVEGLYCGTRDCYEVLGVSRSAGKAEIARAYRQLARRYHPDRYRPQPGDEGPGRTPQSAEEAFLLVATAYETLKVSQAAAELQQYCMQNACKDALLVGVPAGSNPFREPRSCALL$MGWIRGRRSRHSWEMSEFHNYNLDLKKSDFSTRWQKQRCPVVKSKCRENASPFFFCCFIAVAMGIRFIIMVTIWSAVFLNSLFNQEVQIPLTESYCGPCPKNWICYKNNCYQFFDESKNWYESQASCMSQNASLLKVYSKEDQDLLKLVKSYHWMGLVHIPTNGSWQWEDGSILSPNLLTIIEMQKGDCALYASSFKGYIENCSTPNTYICMQRTV$'"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from peptdeep.hla.hla_utils import cat_proteins\n",
+    "cat_sequence = cat_proteins(protein_df[\"sequence\"])\n",
+    "cat_sequence"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### 1.2 Unspecific digestion\n",
+    "\n",
+    "Use `alphabase.protein.lcp_digest.get_substring_indices` to get all non-redundant non-specific peptide sequences from the concatenated protein sequence. The digested peptide sequences are stored in a dataframe based on their start and stop indices in the concantenated protein sequence string. To save the RAM, the `peptdeep.hla` module works on start and stop indices instead of on peptide sequences directly. This will save about 8 times of the RAM for HLA-I peptides (length from 7 to 14, deomnstrated below). For a large protein sequence database, there will be millions of unspecific peptides, so working with strings is not feasible for a complete human fasta due to the requirements of extremely large RAM. (~ 70M unspecific sequences from the reviewed swissprot fasta require ~ 4-5 GB RAM already).\n",
+    "\n",
+    "Using the get_substring_indices function we extract the start and stop indices of all peptide sequences between 7 and 14 aa (min_len, max_len) from the concatenated protein sequences. All peptides sequences are unique, guranteed by the LCP algorithm."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>start_pos</th>\n",
+       "      <th>stop_pos</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>9</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>10</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1</td>\n",
+       "      <td>11</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1</td>\n",
+       "      <td>12</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1</td>\n",
+       "      <td>13</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2438</th>\n",
+       "      <td>361</td>\n",
+       "      <td>370</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2439</th>\n",
+       "      <td>361</td>\n",
+       "      <td>371</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2440</th>\n",
+       "      <td>362</td>\n",
+       "      <td>370</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2441</th>\n",
+       "      <td>362</td>\n",
+       "      <td>371</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2442</th>\n",
+       "      <td>363</td>\n",
+       "      <td>371</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>2443 rows × 2 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "      start_pos  stop_pos\n",
+       "0             1         9\n",
+       "1             1        10\n",
+       "2             1        11\n",
+       "3             1        12\n",
+       "4             1        13\n",
+       "...         ...       ...\n",
+       "2438        361       370\n",
+       "2439        361       371\n",
+       "2440        362       370\n",
+       "2441        362       371\n",
+       "2442        363       371\n",
+       "\n",
+       "[2443 rows x 2 columns]"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from alphabase.protein.lcp_digest import get_substring_indices\n",
+    "import pandas as pd\n",
+    "import sys\n",
+    "\n",
+    "start_idxes, stop_idxes = get_substring_indices(\n",
+    "    cat_sequence, min_len=8, max_len=14, stop_char=\"$\"\n",
+    ")\n",
+    "digest_pos_df = pd.DataFrame({\n",
+    "    \"start_pos\": start_idxes,\n",
+    "    \"stop_pos\": stop_idxes,\n",
+    "})\n",
+    "digest_pos_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "RAM_use_idxes = sys.getsizeof(digest_pos_df)*1e-6"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The unspecific peptide sequences can be localted by the `start_pos` and `stop_pos`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>start_pos</th>\n",
+       "      <th>stop_pos</th>\n",
+       "      <th>sequence</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>9</td>\n",
+       "      <td>MGAPLLSP</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>10</td>\n",
+       "      <td>MGAPLLSPG</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1</td>\n",
+       "      <td>11</td>\n",
+       "      <td>MGAPLLSPGW</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1</td>\n",
+       "      <td>12</td>\n",
+       "      <td>MGAPLLSPGWG</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1</td>\n",
+       "      <td>13</td>\n",
+       "      <td>MGAPLLSPGWGA</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2438</th>\n",
+       "      <td>361</td>\n",
+       "      <td>370</td>\n",
+       "      <td>NTYICMQRT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2439</th>\n",
+       "      <td>361</td>\n",
+       "      <td>371</td>\n",
+       "      <td>NTYICMQRTV</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2440</th>\n",
+       "      <td>362</td>\n",
+       "      <td>370</td>\n",
+       "      <td>TYICMQRT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2441</th>\n",
+       "      <td>362</td>\n",
+       "      <td>371</td>\n",
+       "      <td>TYICMQRTV</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2442</th>\n",
+       "      <td>363</td>\n",
+       "      <td>371</td>\n",
+       "      <td>YICMQRTV</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>2443 rows × 3 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "      start_pos  stop_pos      sequence\n",
+       "0             1         9      MGAPLLSP\n",
+       "1             1        10     MGAPLLSPG\n",
+       "2             1        11    MGAPLLSPGW\n",
+       "3             1        12   MGAPLLSPGWG\n",
+       "4             1        13  MGAPLLSPGWGA\n",
+       "...         ...       ...           ...\n",
+       "2438        361       370     NTYICMQRT\n",
+       "2439        361       371    NTYICMQRTV\n",
+       "2440        362       370      TYICMQRT\n",
+       "2441        362       371     TYICMQRTV\n",
+       "2442        363       371      YICMQRTV\n",
+       "\n",
+       "[2443 rows x 3 columns]"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "digest_pos_df[\"sequence\"] = digest_pos_df[\n",
+    "    [\"start_pos\",\"stop_pos\"]\n",
+    "].apply(lambda x: cat_sequence[slice(*x)], axis=1)\n",
+    "digest_pos_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "RAM_use_seqs = sys.getsizeof(digest_pos_df[\"sequence\"])*1e-6"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'seq RAM = 0.16623 Mb, idxes RAM = 0.01971, ratio = 8.43475'"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "f\"seq RAM = {RAM_use_seqs:.5f} Mb, idxes RAM = {RAM_use_idxes:.5f}, ratio = {RAM_use_seqs/RAM_use_idxes:.5f}\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 2. Selection of peptide sequences used for library prediction\n",
+    "The digest_prot_df contains all unspecifically digested peptide sequences between 7 and 14 aa generatable from the concatenated protein sequences. This list is reduced using a HLA1_Binding_Classifier from peptdeep.hla.hla_class1. Two different model architectures are available, an LSTM model (HLA_Class_I_LSTM) and a BERT model (HLA_Class_I_BERT). A pretrained model is only available for the LSTM model architecture.\n",
+    "The HLA1_Binding_Classifer can be used with a pretrained model, tuned with existing peptide data or trained from scratch. Training of a new model should be considered carefully and will not be covered in this tutorial.\n",
+    "   "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 2.1 Selection of peptide seqeuence candidates without transferlearning\n",
+    "\n",
+    "Selection of peptide sequences for library predicition using the pretrained model can be done in a few steps. First, the Classifier model needs to be initialized and the pretrained model is loaded. Next, we can use any kind of dataframe containing peptide sequences to predict how likely there are HLA peptides, the only requirement beeing that the column containing the peptides is called 'sequence'.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>start_pos</th>\n",
+       "      <th>stop_pos</th>\n",
+       "      <th>sequence</th>\n",
+       "      <th>nAA</th>\n",
+       "      <th>HLA_prob_pred</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>9</td>\n",
+       "      <td>MGAPLLSP</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0.239477</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>145</td>\n",
+       "      <td>153</td>\n",
+       "      <td>REPRSCAL</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0.061692</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>146</td>\n",
+       "      <td>154</td>\n",
+       "      <td>EPRSCALL</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0.137313</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>155</td>\n",
+       "      <td>163</td>\n",
+       "      <td>MGWIRGRR</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0.056462</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>156</td>\n",
+       "      <td>164</td>\n",
+       "      <td>GWIRGRRS</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0.001298</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2438</th>\n",
+       "      <td>112</td>\n",
+       "      <td>126</td>\n",
+       "      <td>KVSQAAAELQQYCM</td>\n",
+       "      <td>14</td>\n",
+       "      <td>0.243115</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2439</th>\n",
+       "      <td>317</td>\n",
+       "      <td>331</td>\n",
+       "      <td>NGSWQWEDGSILSP</td>\n",
+       "      <td>14</td>\n",
+       "      <td>0.021114</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2440</th>\n",
+       "      <td>79</td>\n",
+       "      <td>93</td>\n",
+       "      <td>DRYRPQPGDEGPGR</td>\n",
+       "      <td>14</td>\n",
+       "      <td>0.060634</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2441</th>\n",
+       "      <td>113</td>\n",
+       "      <td>127</td>\n",
+       "      <td>VSQAAAELQQYCMQ</td>\n",
+       "      <td>14</td>\n",
+       "      <td>0.355900</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2442</th>\n",
+       "      <td>190</td>\n",
+       "      <td>204</td>\n",
+       "      <td>KQRCPVVKSKCREN</td>\n",
+       "      <td>14</td>\n",
+       "      <td>0.000362</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>2443 rows × 5 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "      start_pos  stop_pos        sequence  nAA  HLA_prob_pred\n",
+       "0             1         9        MGAPLLSP    8       0.239477\n",
+       "1           145       153        REPRSCAL    8       0.061692\n",
+       "2           146       154        EPRSCALL    8       0.137313\n",
+       "3           155       163        MGWIRGRR    8       0.056462\n",
+       "4           156       164        GWIRGRRS    8       0.001298\n",
+       "...         ...       ...             ...  ...            ...\n",
+       "2438        112       126  KVSQAAAELQQYCM   14       0.243115\n",
+       "2439        317       331  NGSWQWEDGSILSP   14       0.021114\n",
+       "2440         79        93  DRYRPQPGDEGPGR   14       0.060634\n",
+       "2441        113       127  VSQAAAELQQYCMQ   14       0.355900\n",
+       "2442        190       204  KQRCPVVKSKCREN   14       0.000362\n",
+       "\n",
+       "[2443 rows x 5 columns]"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from peptdeep.hla.hla_class1 import HLA1_Binding_Classifier\n",
+    "\n",
+    "model = HLA1_Binding_Classifier()\n",
+    "model.load_pretrained_hla_model()\n",
+    "manual_prediction = model.predict(digest_pos_df)\n",
+    "manual_prediction\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Next, we can filter the list based on the HLA_prob_pred. The higher the probability, the more likely it is for the peptide sequence to be present in a immunopeptidomics sample. It is not recommended to use a cut-off below 0.7 as this inflates the spectral library. It is rather recommended to use more conservative cut-offs. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>start_pos</th>\n",
+       "      <th>stop_pos</th>\n",
+       "      <th>sequence</th>\n",
+       "      <th>nAA</th>\n",
+       "      <th>HLA_prob_pred</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>17</th>\n",
+       "      <td>168</td>\n",
+       "      <td>176</td>\n",
+       "      <td>EMSEFHNY</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0.793702</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>24</th>\n",
+       "      <td>130</td>\n",
+       "      <td>138</td>\n",
+       "      <td>KDALLVGV</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0.817415</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>31</th>\n",
+       "      <td>137</td>\n",
+       "      <td>145</td>\n",
+       "      <td>VPAGSNPF</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0.751329</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>37</th>\n",
+       "      <td>170</td>\n",
+       "      <td>178</td>\n",
+       "      <td>SEFHNYNL</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0.940019</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>67</th>\n",
+       "      <td>181</td>\n",
+       "      <td>189</td>\n",
+       "      <td>KSDFSTRW</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0.895964</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2318</th>\n",
+       "      <td>95</td>\n",
+       "      <td>109</td>\n",
+       "      <td>QSAEEAFLLVATAY</td>\n",
+       "      <td>14</td>\n",
+       "      <td>0.969541</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2378</th>\n",
+       "      <td>329</td>\n",
+       "      <td>343</td>\n",
+       "      <td>SPNLLTIIEMQKGD</td>\n",
+       "      <td>14</td>\n",
+       "      <td>0.756001</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2382</th>\n",
+       "      <td>5</td>\n",
+       "      <td>19</td>\n",
+       "      <td>LLSPGWGAGAAGRR</td>\n",
+       "      <td>14</td>\n",
+       "      <td>0.733784</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2408</th>\n",
+       "      <td>110</td>\n",
+       "      <td>124</td>\n",
+       "      <td>TLKVSQAAAELQQY</td>\n",
+       "      <td>14</td>\n",
+       "      <td>0.891976</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2419</th>\n",
+       "      <td>6</td>\n",
+       "      <td>20</td>\n",
+       "      <td>LSPGWGAGAAGRRW</td>\n",
+       "      <td>14</td>\n",
+       "      <td>0.842583</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>148 rows × 5 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "      start_pos  stop_pos        sequence  nAA  HLA_prob_pred\n",
+       "17          168       176        EMSEFHNY    8       0.793702\n",
+       "24          130       138        KDALLVGV    8       0.817415\n",
+       "31          137       145        VPAGSNPF    8       0.751329\n",
+       "37          170       178        SEFHNYNL    8       0.940019\n",
+       "67          181       189        KSDFSTRW    8       0.895964\n",
+       "...         ...       ...             ...  ...            ...\n",
+       "2318         95       109  QSAEEAFLLVATAY   14       0.969541\n",
+       "2378        329       343  SPNLLTIIEMQKGD   14       0.756001\n",
+       "2382          5        19  LLSPGWGAGAAGRR   14       0.733784\n",
+       "2408        110       124  TLKVSQAAAELQQY   14       0.891976\n",
+       "2419          6        20  LSPGWGAGAAGRRW   14       0.842583\n",
+       "\n",
+       "[148 rows x 5 columns]"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "manual_prediction[manual_prediction['HLA_prob_pred'] > 0.7]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "As described above, directly using the sequences for classification can be memory intense for large lists of sequences. Thereby, the manual concatenation, unspecific digestion, predicition and filtering is only suggested for small sets of proteins or integration of selected sequences (e.g mutations, nuORFs etc.). This can be circumvented by directly predicting and filtering from a fasta using model.predict_from_proteins(). This executes the concatenation, unspecific digestion, predicition and filtering automatically in batches. Thereby the whole process can be done more efficient and be performed without a specialized computation infrastructure."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|██████████| 1/1 [00:01<00:00,  1.27s/it]\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>start_pos</th>\n",
+       "      <th>stop_pos</th>\n",
+       "      <th>nAA</th>\n",
+       "      <th>HLA_prob_pred</th>\n",
+       "      <th>sequence</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>168</td>\n",
+       "      <td>176</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0.793702</td>\n",
+       "      <td>EMSEFHNY</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>130</td>\n",
+       "      <td>138</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0.817415</td>\n",
+       "      <td>KDALLVGV</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>137</td>\n",
+       "      <td>145</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0.751329</td>\n",
+       "      <td>VPAGSNPF</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>170</td>\n",
+       "      <td>178</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0.940019</td>\n",
+       "      <td>SEFHNYNL</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>181</td>\n",
+       "      <td>189</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0.895964</td>\n",
+       "      <td>KSDFSTRW</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>143</th>\n",
+       "      <td>95</td>\n",
+       "      <td>109</td>\n",
+       "      <td>14</td>\n",
+       "      <td>0.969541</td>\n",
+       "      <td>QSAEEAFLLVATAY</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>144</th>\n",
+       "      <td>329</td>\n",
+       "      <td>343</td>\n",
+       "      <td>14</td>\n",
+       "      <td>0.756001</td>\n",
+       "      <td>SPNLLTIIEMQKGD</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>145</th>\n",
+       "      <td>5</td>\n",
+       "      <td>19</td>\n",
+       "      <td>14</td>\n",
+       "      <td>0.733784</td>\n",
+       "      <td>LLSPGWGAGAAGRR</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>146</th>\n",
+       "      <td>110</td>\n",
+       "      <td>124</td>\n",
+       "      <td>14</td>\n",
+       "      <td>0.891976</td>\n",
+       "      <td>TLKVSQAAAELQQY</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>147</th>\n",
+       "      <td>6</td>\n",
+       "      <td>20</td>\n",
+       "      <td>14</td>\n",
+       "      <td>0.842583</td>\n",
+       "      <td>LSPGWGAGAAGRRW</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>148 rows × 5 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "     start_pos  stop_pos  nAA  HLA_prob_pred        sequence\n",
+       "0          168       176    8       0.793702        EMSEFHNY\n",
+       "1          130       138    8       0.817415        KDALLVGV\n",
+       "2          137       145    8       0.751329        VPAGSNPF\n",
+       "3          170       178    8       0.940019        SEFHNYNL\n",
+       "4          181       189    8       0.895964        KSDFSTRW\n",
+       "..         ...       ...  ...            ...             ...\n",
+       "143         95       109   14       0.969541  QSAEEAFLLVATAY\n",
+       "144        329       343   14       0.756001  SPNLLTIIEMQKGD\n",
+       "145          5        19   14       0.733784  LLSPGWGAGAAGRR\n",
+       "146        110       124   14       0.891976  TLKVSQAAAELQQY\n",
+       "147          6        20   14       0.842583  LSPGWGAGAAGRRW\n",
+       "\n",
+       "[148 rows x 5 columns]"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "sequence_df = model.predict_from_proteins(protein_df, prob_threshold=0.7)\n",
+    "sequence_df"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 2.2 Selection of peptide seqeuence candidates with transferlearning\n",
+    "\n",
+    "To perform transferlearning we need a list of peptide sequences we expect to be present in our sample. These peptides can be retrived from several different sources like DDA or directDIA search results. It is recommended to use at the very least 1000 sequences for transferlearning. The more sequences available the better the transferlearning step works. The model performance can be assessed after transferlearning and should be assessed before predicition. \n",
+    "\n",
+    "First, the Classifier model needs to be initialized and the pretrained model is loaded. Next, a protein dataframe is added, in this example the previousely loaded fasta file. The protein dataframe is used by the Classifier internaly to draw negative training data during model training and testing."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model = HLA1_Binding_Classifier()\n",
+    "model.load_pretrained_hla_model()\n",
+    "model.load_proteins(fasta_path)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Next, we load the peptide sequences wee use for transferlearning and split it into a training and testing dataset. This step is very important to assess the model performance after transferlearning. Here, we use the digest_pos_df generated above. As these are no immunopeptides, but a list of unspecifically digested proteins, the model performance will not improve, but the pronciples remain the same.  \n",
+    "@ Feng should we include a example file so that the model is actually improved or just use this? "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(1954, 489)"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "test_seq_df = digest_pos_df.sample(frac=0.2)\n",
+    "train_seq_df = digest_pos_df.drop(index=test_seq_df.index)\n",
+    "len(train_seq_df), len(test_seq_df)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now, we train the model using the training sequence dataframe. In this example we use 10 training epochs, in a real experiment more should be used. Good starting points are 40 epochs for a training dataset of around 10000 sequences or 100 epochs for a training dataset of around 1000 sequences. For a real experiment the warmup_epochs can be increased to 10.  "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2024-07-23 14:22:06> Training with fixed sequence length: 0\n",
+      "[Training] Epoch=1, lr=4e-05, loss=1.39779794216156\n",
+      "[Training] Epoch=2, lr=6e-05, loss=1.0070140702383858\n",
+      "[Training] Epoch=3, lr=8e-05, loss=0.7982760497501918\n",
+      "[Training] Epoch=4, lr=0.0001, loss=0.7397338407380241\n",
+      "[Training] Epoch=5, lr=0.0001, loss=0.7099559647696358\n",
+      "[Training] Epoch=6, lr=9.045084971874738e-05, loss=0.7016251683235168\n",
+      "[Training] Epoch=7, lr=6.545084971874738e-05, loss=0.6965694086892265\n",
+      "[Training] Epoch=8, lr=3.4549150281252636e-05, loss=0.697939566203526\n",
+      "[Training] Epoch=9, lr=9.549150281252633e-06, loss=0.6959438664572579\n",
+      "[Training] Epoch=10, lr=1.0000000000000002e-14, loss=0.6928229417119708\n"
+     ]
+    }
+   ],
+   "source": [
+    "model.train(train_seq_df,\n",
+    "            epoch=10, warmup_epoch=5, \n",
+    "            verbose=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We can assess the model performance after transferlearning using the model.test() function on the training and testing data. This can also be done before transferlearning to assess how well the model fits the available data already. The test assesses the precision, recall and fals positive rate of the model at different probability cut offs. As a rule of thumb a false postitve rate above 7% (@FENG adjust in case lower/higher) is not recomendable because the peptide list gets disproportionally larger, leading to lower IDs during the search. In case of a high false postitive rate, the probability cut off at which the peptides are predicted should be increased.  "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>HLA_prob_pred</th>\n",
+       "      <th>precision</th>\n",
+       "      <th>recall</th>\n",
+       "      <th>false_positive</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0.5</td>\n",
+       "      <td>0.511434</td>\n",
+       "      <td>0.595189</td>\n",
+       "      <td>0.568577</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>0.6</td>\n",
+       "      <td>0.416667</td>\n",
+       "      <td>0.017912</td>\n",
+       "      <td>0.025077</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>0.7</td>\n",
+       "      <td>0.333333</td>\n",
+       "      <td>0.000512</td>\n",
+       "      <td>0.001024</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>0.8</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>0.9</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   HLA_prob_pred  precision    recall  false_positive\n",
+       "0            0.5   0.511434  0.595189        0.568577\n",
+       "1            0.6   0.416667  0.017912        0.025077\n",
+       "2            0.7   0.333333  0.000512        0.001024\n",
+       "3            0.8        NaN  0.000000        0.000000\n",
+       "4            0.9        NaN  0.000000        0.000000"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.test(train_seq_df)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>HLA_prob_pred</th>\n",
+       "      <th>precision</th>\n",
+       "      <th>recall</th>\n",
+       "      <th>false_positive</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0.5</td>\n",
+       "      <td>0.450192</td>\n",
+       "      <td>0.480573</td>\n",
+       "      <td>0.586912</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>0.6</td>\n",
+       "      <td>0.470588</td>\n",
+       "      <td>0.016360</td>\n",
+       "      <td>0.018405</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>0.7</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>0.8</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>0.9</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   HLA_prob_pred  precision    recall  false_positive\n",
+       "0            0.5   0.450192  0.480573        0.586912\n",
+       "1            0.6   0.470588  0.016360        0.018405\n",
+       "2            0.7        NaN  0.000000        0.000000\n",
+       "3            0.8        NaN  0.000000        0.000000\n",
+       "4            0.9        NaN  0.000000        0.000000"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.test(test_seq_df)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "After transferlearning and testing the new model, peptides can be predicted as with the pretrained model. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|██████████| 1/1 [00:01<00:00,  1.32s/it]\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>start_pos</th>\n",
+       "      <th>stop_pos</th>\n",
+       "      <th>nAA</th>\n",
+       "      <th>HLA_prob_pred</th>\n",
+       "      <th>sequence</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>170</td>\n",
+       "      <td>178</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0.711809</td>\n",
+       "      <td>SEFHNYNL</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>62</td>\n",
+       "      <td>70</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0.627015</td>\n",
+       "      <td>KAEIARAY</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>106</td>\n",
+       "      <td>114</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0.628822</td>\n",
+       "      <td>TAYETLKV</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>299</td>\n",
+       "      <td>307</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0.605544</td>\n",
+       "      <td>LLKLVKSY</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>346</td>\n",
+       "      <td>354</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0.646759</td>\n",
+       "      <td>YASSFKGY</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>258</td>\n",
+       "      <td>266</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0.624555</td>\n",
+       "      <td>ICYKNNCY</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>294</td>\n",
+       "      <td>303</td>\n",
+       "      <td>9</td>\n",
+       "      <td>0.610476</td>\n",
+       "      <td>KEDQDLLKL</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>298</td>\n",
+       "      <td>307</td>\n",
+       "      <td>9</td>\n",
+       "      <td>0.645020</td>\n",
+       "      <td>DLLKLVKSY</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>235</td>\n",
+       "      <td>244</td>\n",
+       "      <td>9</td>\n",
+       "      <td>0.629079</td>\n",
+       "      <td>SLFNQEVQI</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>257</td>\n",
+       "      <td>266</td>\n",
+       "      <td>9</td>\n",
+       "      <td>0.623247</td>\n",
+       "      <td>WICYKNNCY</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>267</td>\n",
+       "      <td>276</td>\n",
+       "      <td>9</td>\n",
+       "      <td>0.611738</td>\n",
+       "      <td>FFDESKNWY</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>17</td>\n",
+       "      <td>26</td>\n",
+       "      <td>9</td>\n",
+       "      <td>0.605875</td>\n",
+       "      <td>RRWWMLLAP</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>327</td>\n",
+       "      <td>336</td>\n",
+       "      <td>9</td>\n",
+       "      <td>0.616737</td>\n",
+       "      <td>ILSPNLLTI</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>13</th>\n",
+       "      <td>74</td>\n",
+       "      <td>83</td>\n",
+       "      <td>9</td>\n",
+       "      <td>0.611590</td>\n",
+       "      <td>RRYHPDRYR</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>14</th>\n",
+       "      <td>344</td>\n",
+       "      <td>354</td>\n",
+       "      <td>10</td>\n",
+       "      <td>0.662783</td>\n",
+       "      <td>ALYASSFKGY</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>15</th>\n",
+       "      <td>232</td>\n",
+       "      <td>242</td>\n",
+       "      <td>10</td>\n",
+       "      <td>0.651600</td>\n",
+       "      <td>FLNSLFNQEV</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16</th>\n",
+       "      <td>221</td>\n",
+       "      <td>231</td>\n",
+       "      <td>10</td>\n",
+       "      <td>0.617175</td>\n",
+       "      <td>FIIMVTIWSA</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>17</th>\n",
+       "      <td>222</td>\n",
+       "      <td>232</td>\n",
+       "      <td>10</td>\n",
+       "      <td>0.600623</td>\n",
+       "      <td>IIMVTIWSAV</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>18</th>\n",
+       "      <td>74</td>\n",
+       "      <td>84</td>\n",
+       "      <td>10</td>\n",
+       "      <td>0.614895</td>\n",
+       "      <td>RRYHPDRYRP</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19</th>\n",
+       "      <td>221</td>\n",
+       "      <td>232</td>\n",
+       "      <td>11</td>\n",
+       "      <td>0.608950</td>\n",
+       "      <td>FIIMVTIWSAV</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>20</th>\n",
+       "      <td>353</td>\n",
+       "      <td>364</td>\n",
+       "      <td>11</td>\n",
+       "      <td>0.613787</td>\n",
+       "      <td>YIENCSTPNTY</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>21</th>\n",
+       "      <td>74</td>\n",
+       "      <td>85</td>\n",
+       "      <td>11</td>\n",
+       "      <td>0.605368</td>\n",
+       "      <td>RRYHPDRYRPQ</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>22</th>\n",
+       "      <td>112</td>\n",
+       "      <td>124</td>\n",
+       "      <td>12</td>\n",
+       "      <td>0.612270</td>\n",
+       "      <td>KVSQAAAELQQY</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>23</th>\n",
+       "      <td>42</td>\n",
+       "      <td>54</td>\n",
+       "      <td>12</td>\n",
+       "      <td>0.607715</td>\n",
+       "      <td>GLYCGTRDCYEV</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>24</th>\n",
+       "      <td>351</td>\n",
+       "      <td>363</td>\n",
+       "      <td>12</td>\n",
+       "      <td>0.616891</td>\n",
+       "      <td>KGYIENCSTPNT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25</th>\n",
+       "      <td>74</td>\n",
+       "      <td>86</td>\n",
+       "      <td>12</td>\n",
+       "      <td>0.602210</td>\n",
+       "      <td>RRYHPDRYRPQP</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>26</th>\n",
+       "      <td>86</td>\n",
+       "      <td>99</td>\n",
+       "      <td>13</td>\n",
+       "      <td>0.644656</td>\n",
+       "      <td>GDEGPGRTPQSAE</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>27</th>\n",
+       "      <td>351</td>\n",
+       "      <td>364</td>\n",
+       "      <td>13</td>\n",
+       "      <td>0.603497</td>\n",
+       "      <td>KGYIENCSTPNTY</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>28</th>\n",
+       "      <td>73</td>\n",
+       "      <td>86</td>\n",
+       "      <td>13</td>\n",
+       "      <td>0.622453</td>\n",
+       "      <td>ARRYHPDRYRPQP</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>29</th>\n",
+       "      <td>74</td>\n",
+       "      <td>87</td>\n",
+       "      <td>13</td>\n",
+       "      <td>0.611441</td>\n",
+       "      <td>RRYHPDRYRPQPG</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>30</th>\n",
+       "      <td>334</td>\n",
+       "      <td>347</td>\n",
+       "      <td>13</td>\n",
+       "      <td>0.604354</td>\n",
+       "      <td>TIIEMQKGDCALY</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>31</th>\n",
+       "      <td>141</td>\n",
+       "      <td>154</td>\n",
+       "      <td>13</td>\n",
+       "      <td>0.601309</td>\n",
+       "      <td>SNPFREPRSCALL</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>32</th>\n",
+       "      <td>32</td>\n",
+       "      <td>45</td>\n",
+       "      <td>13</td>\n",
+       "      <td>0.622797</td>\n",
+       "      <td>LVRPAGALVEGLY</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>33</th>\n",
+       "      <td>130</td>\n",
+       "      <td>143</td>\n",
+       "      <td>13</td>\n",
+       "      <td>0.604786</td>\n",
+       "      <td>KDALLVGVPAGSN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>34</th>\n",
+       "      <td>333</td>\n",
+       "      <td>347</td>\n",
+       "      <td>14</td>\n",
+       "      <td>0.613545</td>\n",
+       "      <td>LTIIEMQKGDCALY</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>35</th>\n",
+       "      <td>60</td>\n",
+       "      <td>74</td>\n",
+       "      <td>14</td>\n",
+       "      <td>0.607648</td>\n",
+       "      <td>AGKAEIARAYRQLA</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>36</th>\n",
+       "      <td>85</td>\n",
+       "      <td>99</td>\n",
+       "      <td>14</td>\n",
+       "      <td>0.606241</td>\n",
+       "      <td>PGDEGPGRTPQSAE</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>37</th>\n",
+       "      <td>229</td>\n",
+       "      <td>243</td>\n",
+       "      <td>14</td>\n",
+       "      <td>0.606759</td>\n",
+       "      <td>SAVFLNSLFNQEVQ</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>38</th>\n",
+       "      <td>86</td>\n",
+       "      <td>100</td>\n",
+       "      <td>14</td>\n",
+       "      <td>0.622891</td>\n",
+       "      <td>GDEGPGRTPQSAEE</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>39</th>\n",
+       "      <td>167</td>\n",
+       "      <td>181</td>\n",
+       "      <td>14</td>\n",
+       "      <td>0.611953</td>\n",
+       "      <td>WEMSEFHNYNLDLK</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>40</th>\n",
+       "      <td>117</td>\n",
+       "      <td>131</td>\n",
+       "      <td>14</td>\n",
+       "      <td>0.619257</td>\n",
+       "      <td>AAELQQYCMQNACK</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>41</th>\n",
+       "      <td>73</td>\n",
+       "      <td>87</td>\n",
+       "      <td>14</td>\n",
+       "      <td>0.608767</td>\n",
+       "      <td>ARRYHPDRYRPQPG</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>42</th>\n",
+       "      <td>329</td>\n",
+       "      <td>343</td>\n",
+       "      <td>14</td>\n",
+       "      <td>0.600299</td>\n",
+       "      <td>SPNLLTIIEMQKGD</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "    start_pos  stop_pos  nAA  HLA_prob_pred        sequence\n",
+       "0         170       178    8       0.711809        SEFHNYNL\n",
+       "1          62        70    8       0.627015        KAEIARAY\n",
+       "2         106       114    8       0.628822        TAYETLKV\n",
+       "3         299       307    8       0.605544        LLKLVKSY\n",
+       "4         346       354    8       0.646759        YASSFKGY\n",
+       "5         258       266    8       0.624555        ICYKNNCY\n",
+       "6         294       303    9       0.610476       KEDQDLLKL\n",
+       "7         298       307    9       0.645020       DLLKLVKSY\n",
+       "8         235       244    9       0.629079       SLFNQEVQI\n",
+       "9         257       266    9       0.623247       WICYKNNCY\n",
+       "10        267       276    9       0.611738       FFDESKNWY\n",
+       "11         17        26    9       0.605875       RRWWMLLAP\n",
+       "12        327       336    9       0.616737       ILSPNLLTI\n",
+       "13         74        83    9       0.611590       RRYHPDRYR\n",
+       "14        344       354   10       0.662783      ALYASSFKGY\n",
+       "15        232       242   10       0.651600      FLNSLFNQEV\n",
+       "16        221       231   10       0.617175      FIIMVTIWSA\n",
+       "17        222       232   10       0.600623      IIMVTIWSAV\n",
+       "18         74        84   10       0.614895      RRYHPDRYRP\n",
+       "19        221       232   11       0.608950     FIIMVTIWSAV\n",
+       "20        353       364   11       0.613787     YIENCSTPNTY\n",
+       "21         74        85   11       0.605368     RRYHPDRYRPQ\n",
+       "22        112       124   12       0.612270    KVSQAAAELQQY\n",
+       "23         42        54   12       0.607715    GLYCGTRDCYEV\n",
+       "24        351       363   12       0.616891    KGYIENCSTPNT\n",
+       "25         74        86   12       0.602210    RRYHPDRYRPQP\n",
+       "26         86        99   13       0.644656   GDEGPGRTPQSAE\n",
+       "27        351       364   13       0.603497   KGYIENCSTPNTY\n",
+       "28         73        86   13       0.622453   ARRYHPDRYRPQP\n",
+       "29         74        87   13       0.611441   RRYHPDRYRPQPG\n",
+       "30        334       347   13       0.604354   TIIEMQKGDCALY\n",
+       "31        141       154   13       0.601309   SNPFREPRSCALL\n",
+       "32         32        45   13       0.622797   LVRPAGALVEGLY\n",
+       "33        130       143   13       0.604786   KDALLVGVPAGSN\n",
+       "34        333       347   14       0.613545  LTIIEMQKGDCALY\n",
+       "35         60        74   14       0.607648  AGKAEIARAYRQLA\n",
+       "36         85        99   14       0.606241  PGDEGPGRTPQSAE\n",
+       "37        229       243   14       0.606759  SAVFLNSLFNQEVQ\n",
+       "38         86       100   14       0.622891  GDEGPGRTPQSAEE\n",
+       "39        167       181   14       0.611953  WEMSEFHNYNLDLK\n",
+       "40        117       131   14       0.619257  AAELQQYCMQNACK\n",
+       "41         73        87   14       0.608767  ARRYHPDRYRPQPG\n",
+       "42        329       343   14       0.600299  SPNLLTIIEMQKGD"
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.predict_from_proteins(fasta_path, prob_threshold=0.6)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 3. Spectral library prediciton"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now the spectral library for the filtered peptide list can be predicted using PredictSpecLibFasta. First, one needs to select the models for rt/ccs/ms2 prediction using the ModelManager. One can select from a set of pretrained models or load externally trained models. Here we load the 'HLA' model (at the moment this still loads the generic model, but in the futer this is supposed to be replaced by an HLA specfic internal model). "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from peptdeep.spec_lib.predict_lib import  ModelManager\n",
+    "from peptdeep.protein.fasta import PredictSpecLibFasta\n",
+    "\n",
+    "model_mgr = ModelManager()\n",
+    "model_mgr.load_installed_models(model_type='HLA')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In the next step, the PredictSpecLibFasta is initialized using the preloaded model. The presettings here are selected for the prediction of tryptic libraries so some parameters need to be adjusted, in particular precursor_charge_min, precursor_charge_max. By default Carbamidomethylation is set as a fixed modification (fix_mod) and Acetylation and Oxidation are set as variable modifications (var_mod). Those can be removed by adding an empty list as shown for the variable modifications. \n",
+    "\n",
+    "Of note, PredictSpecLibFasta can also be used to predict a library from a fasta file. Therfore one can also set the protease (default trypsin) and the minimum and maximum peptide length (7 to 35). Wee dont need to change those parameters here, as we wont make use of the digestion functions but rather provide a already digested sequence table. \n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "speclib = PredictSpecLibFasta(model_manager=model_mgr,\n",
+    "                              precursor_charge_min=1,\n",
+    "                              precursor_charge_max=3,\n",
+    "                              fix_mods=[])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "To reduce the size of the dataframe and predicted library we give each peptide sequence a unique protein identifier (number). This enables the use of search engines that rely on protein information (such as AlphaDIA) but one needs to keep in mind to remove filtering steps based on how many peptides per protein are identified during data analysis. Alternatively, proteins of the peptide sequences may originate from can be infered using `alphabase.protein.fasta.annotate_precursor_df()` (demonstrated below)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>start_pos</th>\n",
+       "      <th>stop_pos</th>\n",
+       "      <th>nAA</th>\n",
+       "      <th>HLA_prob_pred</th>\n",
+       "      <th>sequence</th>\n",
+       "      <th>protein_id</th>\n",
+       "      <th>protein_idxes</th>\n",
+       "      <th>full_name</th>\n",
+       "      <th>gene_org</th>\n",
+       "      <th>gene_name</th>\n",
+       "      <th>is_prot_nterm</th>\n",
+       "      <th>is_prot_cterm</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>168</td>\n",
+       "      <td>176</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0.793702</td>\n",
+       "      <td>EMSEFHNY</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>130</td>\n",
+       "      <td>138</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0.817415</td>\n",
+       "      <td>KDALLVGV</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>137</td>\n",
+       "      <td>145</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0.751329</td>\n",
+       "      <td>VPAGSNPF</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>170</td>\n",
+       "      <td>178</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0.940019</td>\n",
+       "      <td>SEFHNYNL</td>\n",
+       "      <td>3</td>\n",
+       "      <td>3</td>\n",
+       "      <td>3</td>\n",
+       "      <td>3</td>\n",
+       "      <td>3</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>181</td>\n",
+       "      <td>189</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0.895964</td>\n",
+       "      <td>KSDFSTRW</td>\n",
+       "      <td>4</td>\n",
+       "      <td>4</td>\n",
+       "      <td>4</td>\n",
+       "      <td>4</td>\n",
+       "      <td>4</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>143</th>\n",
+       "      <td>95</td>\n",
+       "      <td>109</td>\n",
+       "      <td>14</td>\n",
+       "      <td>0.969541</td>\n",
+       "      <td>QSAEEAFLLVATAY</td>\n",
+       "      <td>143</td>\n",
+       "      <td>143</td>\n",
+       "      <td>143</td>\n",
+       "      <td>143</td>\n",
+       "      <td>143</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>144</th>\n",
+       "      <td>329</td>\n",
+       "      <td>343</td>\n",
+       "      <td>14</td>\n",
+       "      <td>0.756001</td>\n",
+       "      <td>SPNLLTIIEMQKGD</td>\n",
+       "      <td>144</td>\n",
+       "      <td>144</td>\n",
+       "      <td>144</td>\n",
+       "      <td>144</td>\n",
+       "      <td>144</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>145</th>\n",
+       "      <td>5</td>\n",
+       "      <td>19</td>\n",
+       "      <td>14</td>\n",
+       "      <td>0.733784</td>\n",
+       "      <td>LLSPGWGAGAAGRR</td>\n",
+       "      <td>145</td>\n",
+       "      <td>145</td>\n",
+       "      <td>145</td>\n",
+       "      <td>145</td>\n",
+       "      <td>145</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>146</th>\n",
+       "      <td>110</td>\n",
+       "      <td>124</td>\n",
+       "      <td>14</td>\n",
+       "      <td>0.891976</td>\n",
+       "      <td>TLKVSQAAAELQQY</td>\n",
+       "      <td>146</td>\n",
+       "      <td>146</td>\n",
+       "      <td>146</td>\n",
+       "      <td>146</td>\n",
+       "      <td>146</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>147</th>\n",
+       "      <td>6</td>\n",
+       "      <td>20</td>\n",
+       "      <td>14</td>\n",
+       "      <td>0.842583</td>\n",
+       "      <td>LSPGWGAGAAGRRW</td>\n",
+       "      <td>147</td>\n",
+       "      <td>147</td>\n",
+       "      <td>147</td>\n",
+       "      <td>147</td>\n",
+       "      <td>147</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>148 rows × 12 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "     start_pos  stop_pos  nAA  HLA_prob_pred        sequence protein_id  \\\n",
+       "0          168       176    8       0.793702        EMSEFHNY          0   \n",
+       "1          130       138    8       0.817415        KDALLVGV          1   \n",
+       "2          137       145    8       0.751329        VPAGSNPF          2   \n",
+       "3          170       178    8       0.940019        SEFHNYNL          3   \n",
+       "4          181       189    8       0.895964        KSDFSTRW          4   \n",
+       "..         ...       ...  ...            ...             ...        ...   \n",
+       "143         95       109   14       0.969541  QSAEEAFLLVATAY        143   \n",
+       "144        329       343   14       0.756001  SPNLLTIIEMQKGD        144   \n",
+       "145          5        19   14       0.733784  LLSPGWGAGAAGRR        145   \n",
+       "146        110       124   14       0.891976  TLKVSQAAAELQQY        146   \n",
+       "147          6        20   14       0.842583  LSPGWGAGAAGRRW        147   \n",
+       "\n",
+       "    protein_idxes full_name gene_org gene_name  is_prot_nterm  is_prot_cterm  \n",
+       "0               0         0        0         0          False          False  \n",
+       "1               1         1        1         1          False          False  \n",
+       "2               2         2        2         2          False          False  \n",
+       "3               3         3        3         3          False          False  \n",
+       "4               4         4        4         4          False          False  \n",
+       "..            ...       ...      ...       ...            ...            ...  \n",
+       "143           143       143      143       143          False          False  \n",
+       "144           144       144      144       144          False          False  \n",
+       "145           145       145      145       145          False          False  \n",
+       "146           146       146      146       146          False          False  \n",
+       "147           147       147      147       147          False          False  \n",
+       "\n",
+       "[148 rows x 12 columns]"
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "sequence_df['protein_id'] = [str(i) for i in range(len(sequence_df))]\n",
+    "sequence_df['protein_idxes'] = sequence_df.protein_id.astype(\"U\")\n",
+    "sequence_df['full_name'] = sequence_df['protein_id'] \n",
+    "sequence_df['gene_org'] = sequence_df['protein_id'] \n",
+    "sequence_df['gene_name'] = sequence_df['protein_id']\n",
+    "sequence_df[\"is_prot_nterm\"] = False\n",
+    "sequence_df[\"is_prot_cterm\"] = False\n",
+    "sequence_df"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The sequence dataframe contains all the relevant information to be passed to the protein_df and the precursor_df."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>sequence</th>\n",
+       "      <th>protein_id</th>\n",
+       "      <th>nAA</th>\n",
+       "      <th>full_name</th>\n",
+       "      <th>gene_org</th>\n",
+       "      <th>gene_name</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>EMSEFHNY</td>\n",
+       "      <td>0</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>KDALLVGV</td>\n",
+       "      <td>1</td>\n",
+       "      <td>8</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>VPAGSNPF</td>\n",
+       "      <td>2</td>\n",
+       "      <td>8</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>SEFHNYNL</td>\n",
+       "      <td>3</td>\n",
+       "      <td>8</td>\n",
+       "      <td>3</td>\n",
+       "      <td>3</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>KSDFSTRW</td>\n",
+       "      <td>4</td>\n",
+       "      <td>8</td>\n",
+       "      <td>4</td>\n",
+       "      <td>4</td>\n",
+       "      <td>4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>143</th>\n",
+       "      <td>QSAEEAFLLVATAY</td>\n",
+       "      <td>143</td>\n",
+       "      <td>14</td>\n",
+       "      <td>143</td>\n",
+       "      <td>143</td>\n",
+       "      <td>143</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>144</th>\n",
+       "      <td>SPNLLTIIEMQKGD</td>\n",
+       "      <td>144</td>\n",
+       "      <td>14</td>\n",
+       "      <td>144</td>\n",
+       "      <td>144</td>\n",
+       "      <td>144</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>145</th>\n",
+       "      <td>LLSPGWGAGAAGRR</td>\n",
+       "      <td>145</td>\n",
+       "      <td>14</td>\n",
+       "      <td>145</td>\n",
+       "      <td>145</td>\n",
+       "      <td>145</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>146</th>\n",
+       "      <td>TLKVSQAAAELQQY</td>\n",
+       "      <td>146</td>\n",
+       "      <td>14</td>\n",
+       "      <td>146</td>\n",
+       "      <td>146</td>\n",
+       "      <td>146</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>147</th>\n",
+       "      <td>LSPGWGAGAAGRRW</td>\n",
+       "      <td>147</td>\n",
+       "      <td>14</td>\n",
+       "      <td>147</td>\n",
+       "      <td>147</td>\n",
+       "      <td>147</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>148 rows × 6 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "           sequence protein_id  nAA full_name gene_org gene_name\n",
+       "0          EMSEFHNY          0    8         0        0         0\n",
+       "1          KDALLVGV          1    8         1        1         1\n",
+       "2          VPAGSNPF          2    8         2        2         2\n",
+       "3          SEFHNYNL          3    8         3        3         3\n",
+       "4          KSDFSTRW          4    8         4        4         4\n",
+       "..              ...        ...  ...       ...      ...       ...\n",
+       "143  QSAEEAFLLVATAY        143   14       143      143       143\n",
+       "144  SPNLLTIIEMQKGD        144   14       144      144       144\n",
+       "145  LLSPGWGAGAAGRR        145   14       145      145       145\n",
+       "146  TLKVSQAAAELQQY        146   14       146      146       146\n",
+       "147  LSPGWGAGAAGRRW        147   14       147      147       147\n",
+       "\n",
+       "[148 rows x 6 columns]"
+      ]
+     },
+     "execution_count": 22,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "speclib.protein_df = sequence_df[\n",
+    "    [\"sequence\",\"protein_id\",\"nAA\", 'full_name', 'gene_org', 'gene_name']\n",
+    "].copy()\n",
+    "speclib.protein_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>sequence</th>\n",
+       "      <th>protein_idxes</th>\n",
+       "      <th>start_pos</th>\n",
+       "      <th>stop_pos</th>\n",
+       "      <th>nAA</th>\n",
+       "      <th>HLA_prob_pred</th>\n",
+       "      <th>is_prot_nterm</th>\n",
+       "      <th>is_prot_cterm</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>EMSEFHNY</td>\n",
+       "      <td>0</td>\n",
+       "      <td>168</td>\n",
+       "      <td>176</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0.793702</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>KDALLVGV</td>\n",
+       "      <td>1</td>\n",
+       "      <td>130</td>\n",
+       "      <td>138</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0.817415</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>VPAGSNPF</td>\n",
+       "      <td>2</td>\n",
+       "      <td>137</td>\n",
+       "      <td>145</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0.751329</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>SEFHNYNL</td>\n",
+       "      <td>3</td>\n",
+       "      <td>170</td>\n",
+       "      <td>178</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0.940019</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>KSDFSTRW</td>\n",
+       "      <td>4</td>\n",
+       "      <td>181</td>\n",
+       "      <td>189</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0.895964</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>143</th>\n",
+       "      <td>QSAEEAFLLVATAY</td>\n",
+       "      <td>143</td>\n",
+       "      <td>95</td>\n",
+       "      <td>109</td>\n",
+       "      <td>14</td>\n",
+       "      <td>0.969541</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>144</th>\n",
+       "      <td>SPNLLTIIEMQKGD</td>\n",
+       "      <td>144</td>\n",
+       "      <td>329</td>\n",
+       "      <td>343</td>\n",
+       "      <td>14</td>\n",
+       "      <td>0.756001</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>145</th>\n",
+       "      <td>LLSPGWGAGAAGRR</td>\n",
+       "      <td>145</td>\n",
+       "      <td>5</td>\n",
+       "      <td>19</td>\n",
+       "      <td>14</td>\n",
+       "      <td>0.733784</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>146</th>\n",
+       "      <td>TLKVSQAAAELQQY</td>\n",
+       "      <td>146</td>\n",
+       "      <td>110</td>\n",
+       "      <td>124</td>\n",
+       "      <td>14</td>\n",
+       "      <td>0.891976</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>147</th>\n",
+       "      <td>LSPGWGAGAAGRRW</td>\n",
+       "      <td>147</td>\n",
+       "      <td>6</td>\n",
+       "      <td>20</td>\n",
+       "      <td>14</td>\n",
+       "      <td>0.842583</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>148 rows × 8 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "           sequence protein_idxes  start_pos  stop_pos  nAA  HLA_prob_pred  \\\n",
+       "0          EMSEFHNY             0        168       176    8       0.793702   \n",
+       "1          KDALLVGV             1        130       138    8       0.817415   \n",
+       "2          VPAGSNPF             2        137       145    8       0.751329   \n",
+       "3          SEFHNYNL             3        170       178    8       0.940019   \n",
+       "4          KSDFSTRW             4        181       189    8       0.895964   \n",
+       "..              ...           ...        ...       ...  ...            ...   \n",
+       "143  QSAEEAFLLVATAY           143         95       109   14       0.969541   \n",
+       "144  SPNLLTIIEMQKGD           144        329       343   14       0.756001   \n",
+       "145  LLSPGWGAGAAGRR           145          5        19   14       0.733784   \n",
+       "146  TLKVSQAAAELQQY           146        110       124   14       0.891976   \n",
+       "147  LSPGWGAGAAGRRW           147          6        20   14       0.842583   \n",
+       "\n",
+       "     is_prot_nterm  is_prot_cterm  \n",
+       "0            False          False  \n",
+       "1            False          False  \n",
+       "2            False          False  \n",
+       "3            False          False  \n",
+       "4            False          False  \n",
+       "..             ...            ...  \n",
+       "143          False          False  \n",
+       "144          False          False  \n",
+       "145          False          False  \n",
+       "146          False          False  \n",
+       "147          False          False  \n",
+       "\n",
+       "[148 rows x 8 columns]"
+      ]
+     },
+     "execution_count": 23,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "speclib.precursor_df = sequence_df[\n",
+    "    [\"sequence\",\"protein_idxes\",\"start_pos\",\"stop_pos\",\n",
+    "     \"nAA\",\"HLA_prob_pred\", 'is_prot_nterm', 'is_prot_cterm']\n",
+    "].copy()\n",
+    "speclib.precursor_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>sequence</th>\n",
+       "      <th>protein_idxes</th>\n",
+       "      <th>start_pos</th>\n",
+       "      <th>stop_pos</th>\n",
+       "      <th>nAA</th>\n",
+       "      <th>HLA_prob_pred</th>\n",
+       "      <th>is_prot_nterm</th>\n",
+       "      <th>is_prot_cterm</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>EMSEFHNY</td>\n",
+       "      <td>0</td>\n",
+       "      <td>168</td>\n",
+       "      <td>176</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0.793702</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>KDALLVGV</td>\n",
+       "      <td>1</td>\n",
+       "      <td>130</td>\n",
+       "      <td>138</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0.817415</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>VPAGSNPF</td>\n",
+       "      <td>2</td>\n",
+       "      <td>137</td>\n",
+       "      <td>145</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0.751329</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>SEFHNYNL</td>\n",
+       "      <td>3</td>\n",
+       "      <td>170</td>\n",
+       "      <td>178</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0.940019</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>KSDFSTRW</td>\n",
+       "      <td>4</td>\n",
+       "      <td>181</td>\n",
+       "      <td>189</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0.895964</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>143</th>\n",
+       "      <td>QSAEEAFLLVATAY</td>\n",
+       "      <td>143</td>\n",
+       "      <td>95</td>\n",
+       "      <td>109</td>\n",
+       "      <td>14</td>\n",
+       "      <td>0.969541</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>144</th>\n",
+       "      <td>SPNLLTIIEMQKGD</td>\n",
+       "      <td>144</td>\n",
+       "      <td>329</td>\n",
+       "      <td>343</td>\n",
+       "      <td>14</td>\n",
+       "      <td>0.756001</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>145</th>\n",
+       "      <td>LLSPGWGAGAAGRR</td>\n",
+       "      <td>145</td>\n",
+       "      <td>5</td>\n",
+       "      <td>19</td>\n",
+       "      <td>14</td>\n",
+       "      <td>0.733784</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>146</th>\n",
+       "      <td>TLKVSQAAAELQQY</td>\n",
+       "      <td>146</td>\n",
+       "      <td>110</td>\n",
+       "      <td>124</td>\n",
+       "      <td>14</td>\n",
+       "      <td>0.891976</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>147</th>\n",
+       "      <td>LSPGWGAGAAGRRW</td>\n",
+       "      <td>147</td>\n",
+       "      <td>6</td>\n",
+       "      <td>20</td>\n",
+       "      <td>14</td>\n",
+       "      <td>0.842583</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>148 rows × 8 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "           sequence protein_idxes  start_pos  stop_pos  nAA  HLA_prob_pred  \\\n",
+       "0          EMSEFHNY             0        168       176    8       0.793702   \n",
+       "1          KDALLVGV             1        130       138    8       0.817415   \n",
+       "2          VPAGSNPF             2        137       145    8       0.751329   \n",
+       "3          SEFHNYNL             3        170       178    8       0.940019   \n",
+       "4          KSDFSTRW             4        181       189    8       0.895964   \n",
+       "..              ...           ...        ...       ...  ...            ...   \n",
+       "143  QSAEEAFLLVATAY           143         95       109   14       0.969541   \n",
+       "144  SPNLLTIIEMQKGD           144        329       343   14       0.756001   \n",
+       "145  LLSPGWGAGAAGRR           145          5        19   14       0.733784   \n",
+       "146  TLKVSQAAAELQQY           146        110       124   14       0.891976   \n",
+       "147  LSPGWGAGAAGRRW           147          6        20   14       0.842583   \n",
+       "\n",
+       "     is_prot_nterm  is_prot_cterm  \n",
+       "0            False          False  \n",
+       "1            False          False  \n",
+       "2            False          False  \n",
+       "3            False          False  \n",
+       "4            False          False  \n",
+       "..             ...            ...  \n",
+       "143          False          False  \n",
+       "144          False          False  \n",
+       "145          False          False  \n",
+       "146          False          False  \n",
+       "147          False          False  \n",
+       "\n",
+       "[148 rows x 8 columns]"
+      ]
+     },
+     "execution_count": 24,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "speclib.precursor_df"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Next, the modifications and charges can be added to the peptide dataframe using add_modifications and add_charge. This creates a unique entry for every combination of charge and modification for all the sequences in the precursor dataframe. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>sequence</th>\n",
+       "      <th>protein_idxes</th>\n",
+       "      <th>start_pos</th>\n",
+       "      <th>stop_pos</th>\n",
+       "      <th>nAA</th>\n",
+       "      <th>HLA_prob_pred</th>\n",
+       "      <th>is_prot_nterm</th>\n",
+       "      <th>is_prot_cterm</th>\n",
+       "      <th>mods</th>\n",
+       "      <th>mod_sites</th>\n",
+       "      <th>charge</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>EMSEFHNY</td>\n",
+       "      <td>0</td>\n",
+       "      <td>168</td>\n",
+       "      <td>176</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0.793702</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>Oxidation@M</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>EMSEFHNY</td>\n",
+       "      <td>0</td>\n",
+       "      <td>168</td>\n",
+       "      <td>176</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0.793702</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>Oxidation@M</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>EMSEFHNY</td>\n",
+       "      <td>0</td>\n",
+       "      <td>168</td>\n",
+       "      <td>176</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0.793702</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>Oxidation@M</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>EMSEFHNY</td>\n",
+       "      <td>0</td>\n",
+       "      <td>168</td>\n",
+       "      <td>176</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0.793702</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>EMSEFHNY</td>\n",
+       "      <td>0</td>\n",
+       "      <td>168</td>\n",
+       "      <td>176</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0.793702</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>493</th>\n",
+       "      <td>TLKVSQAAAELQQY</td>\n",
+       "      <td>146</td>\n",
+       "      <td>110</td>\n",
+       "      <td>124</td>\n",
+       "      <td>14</td>\n",
+       "      <td>0.891976</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>494</th>\n",
+       "      <td>TLKVSQAAAELQQY</td>\n",
+       "      <td>146</td>\n",
+       "      <td>110</td>\n",
+       "      <td>124</td>\n",
+       "      <td>14</td>\n",
+       "      <td>0.891976</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>495</th>\n",
+       "      <td>LSPGWGAGAAGRRW</td>\n",
+       "      <td>147</td>\n",
+       "      <td>6</td>\n",
+       "      <td>20</td>\n",
+       "      <td>14</td>\n",
+       "      <td>0.842583</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>496</th>\n",
+       "      <td>LSPGWGAGAAGRRW</td>\n",
+       "      <td>147</td>\n",
+       "      <td>6</td>\n",
+       "      <td>20</td>\n",
+       "      <td>14</td>\n",
+       "      <td>0.842583</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>497</th>\n",
+       "      <td>LSPGWGAGAAGRRW</td>\n",
+       "      <td>147</td>\n",
+       "      <td>6</td>\n",
+       "      <td>20</td>\n",
+       "      <td>14</td>\n",
+       "      <td>0.842583</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>498 rows × 11 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "           sequence protein_idxes  start_pos  stop_pos  nAA  HLA_prob_pred  \\\n",
+       "0          EMSEFHNY             0        168       176    8       0.793702   \n",
+       "1          EMSEFHNY             0        168       176    8       0.793702   \n",
+       "2          EMSEFHNY             0        168       176    8       0.793702   \n",
+       "3          EMSEFHNY             0        168       176    8       0.793702   \n",
+       "4          EMSEFHNY             0        168       176    8       0.793702   \n",
+       "..              ...           ...        ...       ...  ...            ...   \n",
+       "493  TLKVSQAAAELQQY           146        110       124   14       0.891976   \n",
+       "494  TLKVSQAAAELQQY           146        110       124   14       0.891976   \n",
+       "495  LSPGWGAGAAGRRW           147          6        20   14       0.842583   \n",
+       "496  LSPGWGAGAAGRRW           147          6        20   14       0.842583   \n",
+       "497  LSPGWGAGAAGRRW           147          6        20   14       0.842583   \n",
+       "\n",
+       "     is_prot_nterm  is_prot_cterm         mods mod_sites  charge  \n",
+       "0            False          False  Oxidation@M         2       1  \n",
+       "1            False          False  Oxidation@M         2       2  \n",
+       "2            False          False  Oxidation@M         2       3  \n",
+       "3            False          False                              1  \n",
+       "4            False          False                              2  \n",
+       "..             ...            ...          ...       ...     ...  \n",
+       "493          False          False                              2  \n",
+       "494          False          False                              3  \n",
+       "495          False          False                              1  \n",
+       "496          False          False                              2  \n",
+       "497          False          False                              3  \n",
+       "\n",
+       "[498 rows x 11 columns]"
+      ]
+     },
+     "execution_count": 25,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "speclib.add_modifications()\n",
+    "speclib.add_charge()\n",
+    "speclib.precursor_df"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now ccs, rt and ms2 can be predicted for each entry"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2024-07-23 14:22:43> Predicting RT/IM/MS2 for 400 precursors ...\n",
+      "2024-07-23 14:22:43> Predicting RT ...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|██████████| 7/7 [00:00<00:00, 27.54it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2024-07-23 14:22:43> Predicting mobility ...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "100%|██████████| 7/7 [00:00<00:00, 50.06it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2024-07-23 14:22:44> Predicting MS2 ...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "100%|██████████| 7/7 [00:00<00:00, 23.73it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2024-07-23 14:22:44> End predicting RT/IM/MS2\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "speclib.predict_all()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "iRTs can be added using translate_rt_to_irt_pred. This is not neccessary for search engines like DIA-NN or AlphaDIA but required for Spectronaut."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Predict RT for 11 iRT precursors.\n",
+      "Linear regression of `rt_pred` to `irt`:\n",
+      "   R_square         R       slope  intercept  test_num\n",
+      "0   0.99007  0.995022  152.235639 -39.232164        11\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>sequence</th>\n",
+       "      <th>protein_idxes</th>\n",
+       "      <th>start_pos</th>\n",
+       "      <th>stop_pos</th>\n",
+       "      <th>nAA</th>\n",
+       "      <th>HLA_prob_pred</th>\n",
+       "      <th>is_prot_nterm</th>\n",
+       "      <th>is_prot_cterm</th>\n",
+       "      <th>mods</th>\n",
+       "      <th>mod_sites</th>\n",
+       "      <th>...</th>\n",
+       "      <th>precursor_mz</th>\n",
+       "      <th>rt_pred</th>\n",
+       "      <th>rt_norm_pred</th>\n",
+       "      <th>ccs_pred</th>\n",
+       "      <th>mobility_pred</th>\n",
+       "      <th>nce</th>\n",
+       "      <th>instrument</th>\n",
+       "      <th>frag_start_idx</th>\n",
+       "      <th>frag_stop_idx</th>\n",
+       "      <th>irt_pred</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>EMSEFHNY</td>\n",
+       "      <td>0</td>\n",
+       "      <td>168</td>\n",
+       "      <td>176</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0.793702</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>Oxidation@M</td>\n",
+       "      <td>2</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1072.404037</td>\n",
+       "      <td>0.189650</td>\n",
+       "      <td>0.189650</td>\n",
+       "      <td>254.195892</td>\n",
+       "      <td>1.253140</td>\n",
+       "      <td>30.0</td>\n",
+       "      <td>Lumos</td>\n",
+       "      <td>0</td>\n",
+       "      <td>7</td>\n",
+       "      <td>-10.360738</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>EMSEFHNY</td>\n",
+       "      <td>0</td>\n",
+       "      <td>168</td>\n",
+       "      <td>176</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0.793702</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>Oxidation@M</td>\n",
+       "      <td>2</td>\n",
+       "      <td>...</td>\n",
+       "      <td>536.705657</td>\n",
+       "      <td>0.189650</td>\n",
+       "      <td>0.189650</td>\n",
+       "      <td>337.328583</td>\n",
+       "      <td>0.831494</td>\n",
+       "      <td>30.0</td>\n",
+       "      <td>Lumos</td>\n",
+       "      <td>7</td>\n",
+       "      <td>14</td>\n",
+       "      <td>-10.360738</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>EMSEFHNY</td>\n",
+       "      <td>0</td>\n",
+       "      <td>168</td>\n",
+       "      <td>176</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0.793702</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>...</td>\n",
+       "      <td>1056.409123</td>\n",
+       "      <td>0.289261</td>\n",
+       "      <td>0.289261</td>\n",
+       "      <td>255.103699</td>\n",
+       "      <td>1.257373</td>\n",
+       "      <td>30.0</td>\n",
+       "      <td>Lumos</td>\n",
+       "      <td>14</td>\n",
+       "      <td>21</td>\n",
+       "      <td>4.803681</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>EMSEFHNY</td>\n",
+       "      <td>0</td>\n",
+       "      <td>168</td>\n",
+       "      <td>176</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0.793702</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>...</td>\n",
+       "      <td>528.708200</td>\n",
+       "      <td>0.289261</td>\n",
+       "      <td>0.289261</td>\n",
+       "      <td>337.444641</td>\n",
+       "      <td>0.831621</td>\n",
+       "      <td>30.0</td>\n",
+       "      <td>Lumos</td>\n",
+       "      <td>21</td>\n",
+       "      <td>28</td>\n",
+       "      <td>4.803681</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>KDALLVGV</td>\n",
+       "      <td>1</td>\n",
+       "      <td>130</td>\n",
+       "      <td>138</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0.817415</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>...</td>\n",
+       "      <td>814.503280</td>\n",
+       "      <td>0.433791</td>\n",
+       "      <td>0.433791</td>\n",
+       "      <td>256.615204</td>\n",
+       "      <td>1.260001</td>\n",
+       "      <td>30.0</td>\n",
+       "      <td>Lumos</td>\n",
+       "      <td>28</td>\n",
+       "      <td>35</td>\n",
+       "      <td>26.806270</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>395</th>\n",
+       "      <td>TLKVSQAAAELQQY</td>\n",
+       "      <td>146</td>\n",
+       "      <td>110</td>\n",
+       "      <td>124</td>\n",
+       "      <td>14</td>\n",
+       "      <td>0.891976</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>...</td>\n",
+       "      <td>775.414662</td>\n",
+       "      <td>0.489545</td>\n",
+       "      <td>0.489545</td>\n",
+       "      <td>429.360901</td>\n",
+       "      <td>1.062514</td>\n",
+       "      <td>30.0</td>\n",
+       "      <td>Lumos</td>\n",
+       "      <td>3810</td>\n",
+       "      <td>3823</td>\n",
+       "      <td>35.294030</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>396</th>\n",
+       "      <td>TLKVSQAAAELQQY</td>\n",
+       "      <td>146</td>\n",
+       "      <td>110</td>\n",
+       "      <td>124</td>\n",
+       "      <td>14</td>\n",
+       "      <td>0.891976</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>...</td>\n",
+       "      <td>517.278867</td>\n",
+       "      <td>0.489545</td>\n",
+       "      <td>0.489545</td>\n",
+       "      <td>463.231049</td>\n",
+       "      <td>0.764225</td>\n",
+       "      <td>30.0</td>\n",
+       "      <td>Lumos</td>\n",
+       "      <td>3823</td>\n",
+       "      <td>3836</td>\n",
+       "      <td>35.294030</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>397</th>\n",
+       "      <td>LSPGWGAGAAGRRW</td>\n",
+       "      <td>147</td>\n",
+       "      <td>6</td>\n",
+       "      <td>20</td>\n",
+       "      <td>14</td>\n",
+       "      <td>0.842583</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>...</td>\n",
+       "      <td>1441.744742</td>\n",
+       "      <td>0.377743</td>\n",
+       "      <td>0.377743</td>\n",
+       "      <td>289.200989</td>\n",
+       "      <td>1.430378</td>\n",
+       "      <td>30.0</td>\n",
+       "      <td>Lumos</td>\n",
+       "      <td>3836</td>\n",
+       "      <td>3849</td>\n",
+       "      <td>18.273780</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>398</th>\n",
+       "      <td>LSPGWGAGAAGRRW</td>\n",
+       "      <td>147</td>\n",
+       "      <td>6</td>\n",
+       "      <td>20</td>\n",
+       "      <td>14</td>\n",
+       "      <td>0.842583</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>...</td>\n",
+       "      <td>721.376009</td>\n",
+       "      <td>0.377743</td>\n",
+       "      <td>0.377743</td>\n",
+       "      <td>404.633698</td>\n",
+       "      <td>1.000659</td>\n",
+       "      <td>30.0</td>\n",
+       "      <td>Lumos</td>\n",
+       "      <td>3849</td>\n",
+       "      <td>3862</td>\n",
+       "      <td>18.273780</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>399</th>\n",
+       "      <td>LSPGWGAGAAGRRW</td>\n",
+       "      <td>147</td>\n",
+       "      <td>6</td>\n",
+       "      <td>20</td>\n",
+       "      <td>14</td>\n",
+       "      <td>0.842583</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>...</td>\n",
+       "      <td>481.253098</td>\n",
+       "      <td>0.377743</td>\n",
+       "      <td>0.377743</td>\n",
+       "      <td>476.655701</td>\n",
+       "      <td>0.785851</td>\n",
+       "      <td>30.0</td>\n",
+       "      <td>Lumos</td>\n",
+       "      <td>3862</td>\n",
+       "      <td>3875</td>\n",
+       "      <td>18.273780</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>400 rows × 21 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "           sequence protein_idxes  start_pos  stop_pos  nAA  HLA_prob_pred  \\\n",
+       "0          EMSEFHNY             0        168       176    8       0.793702   \n",
+       "1          EMSEFHNY             0        168       176    8       0.793702   \n",
+       "2          EMSEFHNY             0        168       176    8       0.793702   \n",
+       "3          EMSEFHNY             0        168       176    8       0.793702   \n",
+       "4          KDALLVGV             1        130       138    8       0.817415   \n",
+       "..              ...           ...        ...       ...  ...            ...   \n",
+       "395  TLKVSQAAAELQQY           146        110       124   14       0.891976   \n",
+       "396  TLKVSQAAAELQQY           146        110       124   14       0.891976   \n",
+       "397  LSPGWGAGAAGRRW           147          6        20   14       0.842583   \n",
+       "398  LSPGWGAGAAGRRW           147          6        20   14       0.842583   \n",
+       "399  LSPGWGAGAAGRRW           147          6        20   14       0.842583   \n",
+       "\n",
+       "     is_prot_nterm  is_prot_cterm         mods mod_sites  ...  precursor_mz  \\\n",
+       "0            False          False  Oxidation@M         2  ...   1072.404037   \n",
+       "1            False          False  Oxidation@M         2  ...    536.705657   \n",
+       "2            False          False                         ...   1056.409123   \n",
+       "3            False          False                         ...    528.708200   \n",
+       "4            False          False                         ...    814.503280   \n",
+       "..             ...            ...          ...       ...  ...           ...   \n",
+       "395          False          False                         ...    775.414662   \n",
+       "396          False          False                         ...    517.278867   \n",
+       "397          False          False                         ...   1441.744742   \n",
+       "398          False          False                         ...    721.376009   \n",
+       "399          False          False                         ...    481.253098   \n",
+       "\n",
+       "      rt_pred  rt_norm_pred    ccs_pred  mobility_pred   nce  instrument  \\\n",
+       "0    0.189650      0.189650  254.195892       1.253140  30.0       Lumos   \n",
+       "1    0.189650      0.189650  337.328583       0.831494  30.0       Lumos   \n",
+       "2    0.289261      0.289261  255.103699       1.257373  30.0       Lumos   \n",
+       "3    0.289261      0.289261  337.444641       0.831621  30.0       Lumos   \n",
+       "4    0.433791      0.433791  256.615204       1.260001  30.0       Lumos   \n",
+       "..        ...           ...         ...            ...   ...         ...   \n",
+       "395  0.489545      0.489545  429.360901       1.062514  30.0       Lumos   \n",
+       "396  0.489545      0.489545  463.231049       0.764225  30.0       Lumos   \n",
+       "397  0.377743      0.377743  289.200989       1.430378  30.0       Lumos   \n",
+       "398  0.377743      0.377743  404.633698       1.000659  30.0       Lumos   \n",
+       "399  0.377743      0.377743  476.655701       0.785851  30.0       Lumos   \n",
+       "\n",
+       "    frag_start_idx  frag_stop_idx   irt_pred  \n",
+       "0                0              7 -10.360738  \n",
+       "1                7             14 -10.360738  \n",
+       "2               14             21   4.803681  \n",
+       "3               21             28   4.803681  \n",
+       "4               28             35  26.806270  \n",
+       "..             ...            ...        ...  \n",
+       "395           3810           3823  35.294030  \n",
+       "396           3823           3836  35.294030  \n",
+       "397           3836           3849  18.273780  \n",
+       "398           3849           3862  18.273780  \n",
+       "399           3862           3875  18.273780  \n",
+       "\n",
+       "[400 rows x 21 columns]"
+      ]
+     },
+     "execution_count": 27,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "speclib.translate_rt_to_irt_pred()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now, the predicted library can be exported in an hdf format (AlphaDIA) or translated to a tsv. The tsv translation can be very time consuming. Before the spectral library can be translated, the gene and protein column need to be mapped from the protein_df into the precursor_df. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# hdf_path = \"D:\\Software\\FASTA\\Human\\speclib_example.hdf\"\n",
+    "# tsv_path = \"D:\\Software\\FASTA\\Human\\speclib_example.tsv\"\n",
+    "# speclib.save_hdf(hdf_path) # save as hdf speclib"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from peptdeep.spec_lib.translate import translate_to_tsv\n",
+    "speclib.append_protein_name()\n",
+    "# translate_to_tsv(speclib=speclib, tsv = tsv_path) # save as tsv speclib"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 4. Matching peptides back to proteins"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The peptide sequnces can be matched back to proteins using annotate_precursor_df, requiring a 'sequence' column and a protein_df like the previously loaded fasta file. This can be done with the sequence output of any search engine or before the library is generated. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|██████████| 2/2 [00:00<00:00, 7639.90it/s]\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>start_pos</th>\n",
+       "      <th>stop_pos</th>\n",
+       "      <th>nAA</th>\n",
+       "      <th>HLA_prob_pred</th>\n",
+       "      <th>sequence</th>\n",
+       "      <th>protein_id</th>\n",
+       "      <th>protein_idxes</th>\n",
+       "      <th>full_name</th>\n",
+       "      <th>gene_org</th>\n",
+       "      <th>gene_name</th>\n",
+       "      <th>is_prot_nterm</th>\n",
+       "      <th>is_prot_cterm</th>\n",
+       "      <th>genes</th>\n",
+       "      <th>proteins</th>\n",
+       "      <th>cardinality</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>168</td>\n",
+       "      <td>176</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0.793702</td>\n",
+       "      <td>EMSEFHNY</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>A0A024RAP8_HUMAN</td>\n",
+       "      <td>A0A024RAP8</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>130</td>\n",
+       "      <td>138</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0.817415</td>\n",
+       "      <td>KDALLVGV</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>A0A024R161_HUMAN</td>\n",
+       "      <td>A0A024R161</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>137</td>\n",
+       "      <td>145</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0.751329</td>\n",
+       "      <td>VPAGSNPF</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>A0A024R161_HUMAN</td>\n",
+       "      <td>A0A024R161</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>170</td>\n",
+       "      <td>178</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0.940019</td>\n",
+       "      <td>SEFHNYNL</td>\n",
+       "      <td>3</td>\n",
+       "      <td>3</td>\n",
+       "      <td>3</td>\n",
+       "      <td>3</td>\n",
+       "      <td>3</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>A0A024RAP8_HUMAN</td>\n",
+       "      <td>A0A024RAP8</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>181</td>\n",
+       "      <td>189</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0.895964</td>\n",
+       "      <td>KSDFSTRW</td>\n",
+       "      <td>4</td>\n",
+       "      <td>4</td>\n",
+       "      <td>4</td>\n",
+       "      <td>4</td>\n",
+       "      <td>4</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>A0A024RAP8_HUMAN</td>\n",
+       "      <td>A0A024RAP8</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>143</th>\n",
+       "      <td>95</td>\n",
+       "      <td>109</td>\n",
+       "      <td>14</td>\n",
+       "      <td>0.969541</td>\n",
+       "      <td>QSAEEAFLLVATAY</td>\n",
+       "      <td>143</td>\n",
+       "      <td>143</td>\n",
+       "      <td>143</td>\n",
+       "      <td>143</td>\n",
+       "      <td>143</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>A0A024R161_HUMAN</td>\n",
+       "      <td>A0A024R161</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>144</th>\n",
+       "      <td>329</td>\n",
+       "      <td>343</td>\n",
+       "      <td>14</td>\n",
+       "      <td>0.756001</td>\n",
+       "      <td>SPNLLTIIEMQKGD</td>\n",
+       "      <td>144</td>\n",
+       "      <td>144</td>\n",
+       "      <td>144</td>\n",
+       "      <td>144</td>\n",
+       "      <td>144</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>A0A024RAP8_HUMAN</td>\n",
+       "      <td>A0A024RAP8</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>145</th>\n",
+       "      <td>5</td>\n",
+       "      <td>19</td>\n",
+       "      <td>14</td>\n",
+       "      <td>0.733784</td>\n",
+       "      <td>LLSPGWGAGAAGRR</td>\n",
+       "      <td>145</td>\n",
+       "      <td>145</td>\n",
+       "      <td>145</td>\n",
+       "      <td>145</td>\n",
+       "      <td>145</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>A0A024R161_HUMAN</td>\n",
+       "      <td>A0A024R161</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>146</th>\n",
+       "      <td>110</td>\n",
+       "      <td>124</td>\n",
+       "      <td>14</td>\n",
+       "      <td>0.891976</td>\n",
+       "      <td>TLKVSQAAAELQQY</td>\n",
+       "      <td>146</td>\n",
+       "      <td>146</td>\n",
+       "      <td>146</td>\n",
+       "      <td>146</td>\n",
+       "      <td>146</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>A0A024R161_HUMAN</td>\n",
+       "      <td>A0A024R161</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>147</th>\n",
+       "      <td>6</td>\n",
+       "      <td>20</td>\n",
+       "      <td>14</td>\n",
+       "      <td>0.842583</td>\n",
+       "      <td>LSPGWGAGAAGRRW</td>\n",
+       "      <td>147</td>\n",
+       "      <td>147</td>\n",
+       "      <td>147</td>\n",
+       "      <td>147</td>\n",
+       "      <td>147</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>A0A024R161_HUMAN</td>\n",
+       "      <td>A0A024R161</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>148 rows × 15 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "     start_pos  stop_pos  nAA  HLA_prob_pred        sequence protein_id  \\\n",
+       "0          168       176    8       0.793702        EMSEFHNY          0   \n",
+       "1          130       138    8       0.817415        KDALLVGV          1   \n",
+       "2          137       145    8       0.751329        VPAGSNPF          2   \n",
+       "3          170       178    8       0.940019        SEFHNYNL          3   \n",
+       "4          181       189    8       0.895964        KSDFSTRW          4   \n",
+       "..         ...       ...  ...            ...             ...        ...   \n",
+       "143         95       109   14       0.969541  QSAEEAFLLVATAY        143   \n",
+       "144        329       343   14       0.756001  SPNLLTIIEMQKGD        144   \n",
+       "145          5        19   14       0.733784  LLSPGWGAGAAGRR        145   \n",
+       "146        110       124   14       0.891976  TLKVSQAAAELQQY        146   \n",
+       "147          6        20   14       0.842583  LSPGWGAGAAGRRW        147   \n",
+       "\n",
+       "    protein_idxes full_name gene_org gene_name  is_prot_nterm  is_prot_cterm  \\\n",
+       "0               0         0        0         0          False          False   \n",
+       "1               1         1        1         1          False          False   \n",
+       "2               2         2        2         2          False          False   \n",
+       "3               3         3        3         3          False          False   \n",
+       "4               4         4        4         4          False          False   \n",
+       "..            ...       ...      ...       ...            ...            ...   \n",
+       "143           143       143      143       143          False          False   \n",
+       "144           144       144      144       144          False          False   \n",
+       "145           145       145      145       145          False          False   \n",
+       "146           146       146      146       146          False          False   \n",
+       "147           147       147      147       147          False          False   \n",
+       "\n",
+       "                genes    proteins  cardinality  \n",
+       "0    A0A024RAP8_HUMAN  A0A024RAP8            1  \n",
+       "1    A0A024R161_HUMAN  A0A024R161            1  \n",
+       "2    A0A024R161_HUMAN  A0A024R161            1  \n",
+       "3    A0A024RAP8_HUMAN  A0A024RAP8            1  \n",
+       "4    A0A024RAP8_HUMAN  A0A024RAP8            1  \n",
+       "..                ...         ...          ...  \n",
+       "143  A0A024R161_HUMAN  A0A024R161            1  \n",
+       "144  A0A024RAP8_HUMAN  A0A024RAP8            1  \n",
+       "145  A0A024R161_HUMAN  A0A024R161            1  \n",
+       "146  A0A024R161_HUMAN  A0A024R161            1  \n",
+       "147  A0A024R161_HUMAN  A0A024R161            1  \n",
+       "\n",
+       "[148 rows x 15 columns]"
+      ]
+     },
+     "execution_count": 30,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from alphabase.protein.fasta import annotate_precursor_df\n",
+    "inferred_sequence_df = annotate_precursor_df(sequence_df, protein_df)\n",
+    "inferred_sequence_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "base",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/nbs_tests/hla/hla_class1.ipynb b/nbs_tests/hla/hla_class1.ipynb
index d0fa0eb3..f4bcd7ae 100644
--- a/nbs_tests/hla/hla_class1.ipynb
+++ b/nbs_tests/hla/hla_class1.ipynb
@@ -33,10 +33,11 @@
    "metadata": {},
    "outputs": [
     {
-     "name": "stderr",
+     "name": "stdout",
      "output_type": "stream",
      "text": [
-      "OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.\n"
+      "2024-07-02 17:16:12> Downloading https://github.com/MannLabs/alphapeptdeep/releases/download/pre-trained-models/hla_model.zip ...\n",
+      "2024-07-02 17:16:14> The pretrained models had been downloaded in C:\\Users\\wahle/peptdeep\\pretrained_models\\hla_model.zip\n"
      ]
     }
    ],
@@ -78,7 +79,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 1/1 [00:00<00:00, 14.32it/s]\n"
+      "100%|██████████| 1/1 [00:00<00:00,  7.46it/s]\n"
      ]
     },
     {
@@ -321,7 +322,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.12.4"
+   "version": "3.8.8"
   }
  },
  "nbformat": 4,
diff --git a/nbs_tests/mass_spec/mass_calibration.ipynb b/nbs_tests/mass_spec/mass_calibration.ipynb
index 6ee0cbe6..16140e9f 100644
--- a/nbs_tests/mass_spec/mass_calibration.ipynb
+++ b/nbs_tests/mass_spec/mass_calibration.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -18,16 +18,33 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
+   "source": [
+    "import torch # noqa: 401, to prevent crash in Mac Arm"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.\n"
+     ]
+    }
+   ],
    "source": [
     "from peptdeep.mass_spec.mass_calibration import *"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
@@ -112,7 +129,7 @@
        "7  0.0  1.0"
       ]
      },
-     "execution_count": null,
+     "execution_count": 4,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -146,6 +163,18 @@
    "display_name": "Python 3.8.3 ('base')",
    "language": "python",
    "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.9"
   }
  },
  "nbformat": 4,
diff --git a/nbs_tests/mass_spec/match.ipynb b/nbs_tests/mass_spec/match.ipynb
index 3a029140..cce9bee6 100644
--- a/nbs_tests/mass_spec/match.ipynb
+++ b/nbs_tests/mass_spec/match.ipynb
@@ -16,6 +16,15 @@
     "# Match"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch # noqa: 401, to prevent crash in Mac Arm"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -377,6 +386,10 @@
    "display_name": "Python 3.8.3 ('base')",
    "language": "python",
    "name": "python3"
+  },
+  "language_info": {
+   "name": "python",
+   "version": "3.11.9"
   }
  },
  "nbformat": 4,
diff --git a/nbs_tests/mass_spec/ms_reader.ipynb b/nbs_tests/mass_spec/ms_reader.ipynb
index 4ea3bd81..7064c883 100644
--- a/nbs_tests/mass_spec/ms_reader.ipynb
+++ b/nbs_tests/mass_spec/ms_reader.ipynb
@@ -16,6 +16,15 @@
     "# MS Reader"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch # noqa: 401, to prevent crash in Mac Arm"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
diff --git a/nbs_tests/model/ccs.ipynb b/nbs_tests/model/ccs.ipynb
index a9f808fa..4e84ebb5 100644
--- a/nbs_tests/model/ccs.ipynb
+++ b/nbs_tests/model/ccs.ipynb
@@ -327,7 +327,7 @@
     "repeat = 10\n",
     "precursor_df = pd.DataFrame({\n",
     "    'sequence': ['AGHCEWQMKYR']*repeat,\n",
-    "    'mods': ['Acetyl@Protein N-term;Carbamidomethyl@C;Oxidation@M']*repeat,\n",
+    "    'mods': ['Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidation@M']*repeat,\n",
     "    'mod_sites': ['0;4;8']*repeat,\n",
     "    'nAA': [11]*repeat,\n",
     "    'charge': [2]*repeat,\n",
diff --git a/nbs_tests/model/featurize.ipynb b/nbs_tests/model/featurize.ipynb
index 7eafdd2b..8adb958b 100644
--- a/nbs_tests/model/featurize.ipynb
+++ b/nbs_tests/model/featurize.ipynb
@@ -66,10 +66,10 @@
    "outputs": [],
    "source": [
     "#| hide\n",
-    "x = parse_mod_feature(5, ['Acetyl@Protein N-term','Phospho@S','Oxidation@M'], [0,-1,1])\n",
+    "x = parse_mod_feature(5, ['Acetyl@Protein_N-term','Phospho@S','Oxidation@M'], [0,-1,1])\n",
     "assert x.shape == (7, mod_feature_size)\n",
     "assert np.all(x[1,:]==MOD_TO_FEATURE['Oxidation@M'])\n",
-    "assert np.all(x[0,:]==MOD_TO_FEATURE['Acetyl@Protein N-term'])\n",
+    "assert np.all(x[0,:]==MOD_TO_FEATURE['Acetyl@Protein_N-term'])\n",
     "assert np.all(x[-1,:]==MOD_TO_FEATURE['Phospho@S'])\n",
     "assert np.all(x[(2,3,4,5),:]==0)"
    ]
diff --git a/nbs_tests/model/ms2.ipynb b/nbs_tests/model/ms2.ipynb
index 9fe774e6..c820dfba 100644
--- a/nbs_tests/model/ms2.ipynb
+++ b/nbs_tests/model/ms2.ipynb
@@ -396,7 +396,7 @@
     "repeat = 10\n",
     "precursor_df = pd.DataFrame({\n",
     "    'sequence': ['AGHCEWQMKYR']*repeat,\n",
-    "    'mods': ['Acetyl@Protein N-term;Carbamidomethyl@C;Oxidation@M']*repeat,\n",
+    "    'mods': ['Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidation@M']*repeat,\n",
     "    'mod_sites': ['0;4;8']*repeat,\n",
     "    'nAA': [11]*repeat,\n",
     "    'nce': [20]*repeat,\n",
diff --git a/nbs_tests/model/rt.ipynb b/nbs_tests/model/rt.ipynb
index 9bf8803e..ed952b40 100644
--- a/nbs_tests/model/rt.ipynb
+++ b/nbs_tests/model/rt.ipynb
@@ -135,7 +135,7 @@
     "def create_test_dataframe_with_identical_rows(nrows = 10):\n",
     "    precursor_df = pd.DataFrame({\n",
     "        'sequence': ['AGHCEWQMKYR']*nrows,\n",
-    "        'mods': ['Acetyl@Protein N-term;Carbamidomethyl@C;Oxidation@M']*nrows,\n",
+    "        'mods': ['Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidation@M']*nrows,\n",
     "        'mod_sites': ['0;4;8']*nrows,\n",
     "        'nAA': [11]*nrows,\n",
     "        'rt_norm': [0.6]*nrows\n",
diff --git a/nbs_tests/pipeline_api.ipynb b/nbs_tests/pipeline_api.ipynb
index 4d81db6f..a5678902 100644
--- a/nbs_tests/pipeline_api.ipynb
+++ b/nbs_tests/pipeline_api.ipynb
@@ -37,6 +37,15 @@
     "The refined models will be saved in the path pointed by \"PEPTDEEP_HOME\" in `peptdeep.settings.global_settings`."
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch # noqa: 401, to prevent crash in Mac Arm"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
diff --git a/nbs_tests/protein/fasta.ipynb b/nbs_tests/protein/fasta.ipynb
index 1256ad16..3c8af3a0 100644
--- a/nbs_tests/protein/fasta.ipynb
+++ b/nbs_tests/protein/fasta.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -18,7 +18,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch # noqa: 401, to prevent crash in Mac Arm"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {
@@ -35,7 +44,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
@@ -197,7 +206,7 @@
        "8          False                  20  "
       ]
      },
-     "execution_count": null,
+     "execution_count": 4,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -224,7 +233,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
@@ -276,7 +285,7 @@
        "1         yy      gene           FGHIJKLMNOPQR"
       ]
      },
-     "execution_count": null,
+     "execution_count": 5,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -287,7 +296,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
@@ -469,7 +478,7 @@
        "8          False                  20       xx        "
       ]
      },
-     "execution_count": null,
+     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -482,7 +491,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -514,7 +523,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
@@ -584,7 +593,7 @@
        "      <td>0</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Acetyl@Protein N-term;Oxidation@M;Carbamidomet...</td>\n",
+       "      <td>Acetyl@Protein_N-term;Oxidation@M;Carbamidomet...</td>\n",
        "      <td>0;1;4</td>\n",
        "      <td>7</td>\n",
        "      <td>xx</td>\n",
@@ -597,7 +606,7 @@
        "      <td>0</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Acetyl@Protein N-term;Carbamidomethyl@C</td>\n",
+       "      <td>Acetyl@Protein_N-term;Carbamidomethyl@C</td>\n",
        "      <td>0;4</td>\n",
        "      <td>7</td>\n",
        "      <td>xx</td>\n",
@@ -675,7 +684,7 @@
        "      <td>1</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Acetyl@Protein N-term;Carbamidomethyl@C</td>\n",
+       "      <td>Acetyl@Protein_N-term;Carbamidomethyl@C</td>\n",
        "      <td>0;3</td>\n",
        "      <td>12</td>\n",
        "      <td>xx</td>\n",
@@ -714,7 +723,7 @@
        "      <td>1</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Acetyl@Protein N-term;Oxidation@M;Carbamidomet...</td>\n",
+       "      <td>Acetyl@Protein_N-term;Oxidation@M;Carbamidomet...</td>\n",
        "      <td>0;1;4</td>\n",
        "      <td>13</td>\n",
        "      <td>xx</td>\n",
@@ -727,7 +736,7 @@
        "      <td>1</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Acetyl@Protein N-term;Carbamidomethyl@C</td>\n",
+       "      <td>Acetyl@Protein_N-term;Carbamidomethyl@C</td>\n",
        "      <td>0;4</td>\n",
        "      <td>13</td>\n",
        "      <td>xx</td>\n",
@@ -766,7 +775,7 @@
        "      <td>1</td>\n",
        "      <td>True</td>\n",
        "      <td>True</td>\n",
-       "      <td>Acetyl@Protein N-term;Oxidation@M</td>\n",
+       "      <td>Acetyl@Protein_N-term;Oxidation@M</td>\n",
        "      <td>0;8</td>\n",
        "      <td>13</td>\n",
        "      <td>xx;yy</td>\n",
@@ -779,7 +788,7 @@
        "      <td>1</td>\n",
        "      <td>True</td>\n",
        "      <td>True</td>\n",
-       "      <td>Acetyl@Protein N-term</td>\n",
+       "      <td>Acetyl@Protein_N-term</td>\n",
        "      <td>0</td>\n",
        "      <td>13</td>\n",
        "      <td>xx;yy</td>\n",
@@ -844,7 +853,7 @@
        "      <td>2</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Acetyl@Protein N-term;Oxidation@M;Carbamidomet...</td>\n",
+       "      <td>Acetyl@Protein_N-term;Oxidation@M;Carbamidomet...</td>\n",
        "      <td>0;14;3</td>\n",
        "      <td>19</td>\n",
        "      <td>xx</td>\n",
@@ -857,7 +866,7 @@
        "      <td>2</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Acetyl@Protein N-term;Carbamidomethyl@C</td>\n",
+       "      <td>Acetyl@Protein_N-term;Carbamidomethyl@C</td>\n",
        "      <td>0;3</td>\n",
        "      <td>19</td>\n",
        "      <td>xx</td>\n",
@@ -922,7 +931,7 @@
        "      <td>2</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Acetyl@Protein N-term;Oxidation@M;Carbamidomet...</td>\n",
+       "      <td>Acetyl@Protein_N-term;Oxidation@M;Carbamidomet...</td>\n",
        "      <td>0;1;4</td>\n",
        "      <td>20</td>\n",
        "      <td>xx</td>\n",
@@ -935,7 +944,7 @@
        "      <td>2</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Acetyl@Protein N-term;Oxidation@M;Carbamidomet...</td>\n",
+       "      <td>Acetyl@Protein_N-term;Oxidation@M;Carbamidomet...</td>\n",
        "      <td>0;15;4</td>\n",
        "      <td>20</td>\n",
        "      <td>xx</td>\n",
@@ -948,7 +957,7 @@
        "      <td>2</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Acetyl@Protein N-term;Oxidation@M;Oxidation@M;...</td>\n",
+       "      <td>Acetyl@Protein_N-term;Oxidation@M;Oxidation@M;...</td>\n",
        "      <td>0;1;15;4</td>\n",
        "      <td>20</td>\n",
        "      <td>xx</td>\n",
@@ -961,7 +970,7 @@
        "      <td>2</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Acetyl@Protein N-term;Carbamidomethyl@C</td>\n",
+       "      <td>Acetyl@Protein_N-term;Carbamidomethyl@C</td>\n",
        "      <td>0;4</td>\n",
        "      <td>20</td>\n",
        "      <td>xx</td>\n",
@@ -1009,36 +1018,36 @@
        "    is_prot_cterm                                               mods  \\\n",
        "0           False                      Oxidation@M;Carbamidomethyl@C   \n",
        "1           False                                  Carbamidomethyl@C   \n",
-       "2           False  Acetyl@Protein N-term;Oxidation@M;Carbamidomet...   \n",
-       "3           False            Acetyl@Protein N-term;Carbamidomethyl@C   \n",
+       "2           False  Acetyl@Protein_N-term;Oxidation@M;Carbamidomet...   \n",
+       "3           False            Acetyl@Protein_N-term;Carbamidomethyl@C   \n",
        "4            True                                        Oxidation@M   \n",
        "5            True                                                      \n",
        "6            True                                        Oxidation@M   \n",
        "7            True                                                      \n",
        "8           False                                  Carbamidomethyl@C   \n",
-       "9           False            Acetyl@Protein N-term;Carbamidomethyl@C   \n",
+       "9           False            Acetyl@Protein_N-term;Carbamidomethyl@C   \n",
        "10          False                      Oxidation@M;Carbamidomethyl@C   \n",
        "11          False                                  Carbamidomethyl@C   \n",
-       "12          False  Acetyl@Protein N-term;Oxidation@M;Carbamidomet...   \n",
-       "13          False            Acetyl@Protein N-term;Carbamidomethyl@C   \n",
+       "12          False  Acetyl@Protein_N-term;Oxidation@M;Carbamidomet...   \n",
+       "13          False            Acetyl@Protein_N-term;Carbamidomethyl@C   \n",
        "14           True                                        Oxidation@M   \n",
        "15           True                                                      \n",
-       "16           True                  Acetyl@Protein N-term;Oxidation@M   \n",
-       "17           True                              Acetyl@Protein N-term   \n",
+       "16           True                  Acetyl@Protein_N-term;Oxidation@M   \n",
+       "17           True                              Acetyl@Protein_N-term   \n",
        "18           True                                        Oxidation@M   \n",
        "19           True                                                      \n",
        "20          False                      Oxidation@M;Carbamidomethyl@C   \n",
        "21          False                                  Carbamidomethyl@C   \n",
-       "22          False  Acetyl@Protein N-term;Oxidation@M;Carbamidomet...   \n",
-       "23          False            Acetyl@Protein N-term;Carbamidomethyl@C   \n",
+       "22          False  Acetyl@Protein_N-term;Oxidation@M;Carbamidomet...   \n",
+       "23          False            Acetyl@Protein_N-term;Carbamidomethyl@C   \n",
        "24          False                      Oxidation@M;Carbamidomethyl@C   \n",
        "25          False                      Oxidation@M;Carbamidomethyl@C   \n",
        "26          False          Oxidation@M;Oxidation@M;Carbamidomethyl@C   \n",
        "27          False                                  Carbamidomethyl@C   \n",
-       "28          False  Acetyl@Protein N-term;Oxidation@M;Carbamidomet...   \n",
-       "29          False  Acetyl@Protein N-term;Oxidation@M;Carbamidomet...   \n",
-       "30          False  Acetyl@Protein N-term;Oxidation@M;Oxidation@M;...   \n",
-       "31          False            Acetyl@Protein N-term;Carbamidomethyl@C   \n",
+       "28          False  Acetyl@Protein_N-term;Oxidation@M;Carbamidomet...   \n",
+       "29          False  Acetyl@Protein_N-term;Oxidation@M;Carbamidomet...   \n",
+       "30          False  Acetyl@Protein_N-term;Oxidation@M;Oxidation@M;...   \n",
+       "31          False            Acetyl@Protein_N-term;Carbamidomethyl@C   \n",
        "\n",
        "   mod_sites  nAA proteins genes  \n",
        "0        1;4    7       xx        \n",
@@ -1075,7 +1084,7 @@
        "31       0;4   20       xx        "
       ]
      },
-     "execution_count": null,
+     "execution_count": 8,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1087,7 +1096,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1103,14 +1112,14 @@
     "    else:\n",
     "        assert 'Carbamidomethyl@C' not in mods\n",
     "    # test Acetyl@Protein N-term\n",
-    "    if 'Acetyl@Protein N-term' in mods:\n",
+    "    if 'Acetyl@Protein_N-term' in mods:\n",
     "        assert _lib.precursor_df.is_prot_nterm[i]\n",
     "        assert '0' in sites\n",
     "    if '0' in mods:\n",
     "        assert _lib.precursor_df.is_prot_nterm[i]\n",
-    "        assert 'Acetyl@Protein N-term' in mods\n",
+    "        assert 'Acetyl@Protein_N-term' in mods\n",
     "    if not _lib.precursor_df.is_prot_nterm[i]:\n",
-    "        assert 'Acetyl@Protein N-term' not in mods\n",
+    "        assert 'Acetyl@Protein_N-term' not in mods\n",
     "    # test Oxidation@M\n",
     "    if 'Oxidation@M' in mods:\n",
     "        assert 'M' in seq\n",
@@ -1133,7 +1142,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [
     {
@@ -1203,7 +1212,7 @@
        "      <td>0</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Acetyl@Protein N-term;Oxidation@M;Carbamidomet...</td>\n",
+       "      <td>Acetyl@Protein_N-term;Oxidation@M;Carbamidomet...</td>\n",
        "      <td>0;1;4</td>\n",
        "      <td>7</td>\n",
        "      <td>xx</td>\n",
@@ -1216,7 +1225,7 @@
        "      <td>0</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Acetyl@Protein N-term;Carbamidomethyl@C</td>\n",
+       "      <td>Acetyl@Protein_N-term;Carbamidomethyl@C</td>\n",
        "      <td>0;4</td>\n",
        "      <td>7</td>\n",
        "      <td>xx</td>\n",
@@ -1346,7 +1355,7 @@
        "      <td>1</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Acetyl@Protein N-term;Carbamidomethyl@C</td>\n",
+       "      <td>Acetyl@Protein_N-term;Carbamidomethyl@C</td>\n",
        "      <td>0;3</td>\n",
        "      <td>12</td>\n",
        "      <td>xx</td>\n",
@@ -1385,7 +1394,7 @@
        "      <td>1</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Acetyl@Protein N-term;Oxidation@M;Carbamidomet...</td>\n",
+       "      <td>Acetyl@Protein_N-term;Oxidation@M;Carbamidomet...</td>\n",
        "      <td>0;1;4</td>\n",
        "      <td>13</td>\n",
        "      <td>xx</td>\n",
@@ -1398,7 +1407,7 @@
        "      <td>1</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Acetyl@Protein N-term;Carbamidomethyl@C</td>\n",
+       "      <td>Acetyl@Protein_N-term;Carbamidomethyl@C</td>\n",
        "      <td>0;4</td>\n",
        "      <td>13</td>\n",
        "      <td>xx</td>\n",
@@ -1437,7 +1446,7 @@
        "      <td>1</td>\n",
        "      <td>True</td>\n",
        "      <td>True</td>\n",
-       "      <td>Acetyl@Protein N-term;Oxidation@M</td>\n",
+       "      <td>Acetyl@Protein_N-term;Oxidation@M</td>\n",
        "      <td>0;8</td>\n",
        "      <td>13</td>\n",
        "      <td>xx;yy</td>\n",
@@ -1450,7 +1459,7 @@
        "      <td>1</td>\n",
        "      <td>True</td>\n",
        "      <td>True</td>\n",
-       "      <td>Acetyl@Protein N-term</td>\n",
+       "      <td>Acetyl@Protein_N-term</td>\n",
        "      <td>0</td>\n",
        "      <td>13</td>\n",
        "      <td>xx;yy</td>\n",
@@ -1567,7 +1576,7 @@
        "      <td>2</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Acetyl@Protein N-term;Oxidation@M;Carbamidomet...</td>\n",
+       "      <td>Acetyl@Protein_N-term;Oxidation@M;Carbamidomet...</td>\n",
        "      <td>0;14;3</td>\n",
        "      <td>19</td>\n",
        "      <td>xx</td>\n",
@@ -1580,7 +1589,7 @@
        "      <td>2</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Acetyl@Protein N-term;Carbamidomethyl@C</td>\n",
+       "      <td>Acetyl@Protein_N-term;Carbamidomethyl@C</td>\n",
        "      <td>0;3</td>\n",
        "      <td>19</td>\n",
        "      <td>xx</td>\n",
@@ -1645,7 +1654,7 @@
        "      <td>2</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Acetyl@Protein N-term;Oxidation@M;Carbamidomet...</td>\n",
+       "      <td>Acetyl@Protein_N-term;Oxidation@M;Carbamidomet...</td>\n",
        "      <td>0;1;4</td>\n",
        "      <td>20</td>\n",
        "      <td>xx</td>\n",
@@ -1658,7 +1667,7 @@
        "      <td>2</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Acetyl@Protein N-term;Oxidation@M;Carbamidomet...</td>\n",
+       "      <td>Acetyl@Protein_N-term;Oxidation@M;Carbamidomet...</td>\n",
        "      <td>0;15;4</td>\n",
        "      <td>20</td>\n",
        "      <td>xx</td>\n",
@@ -1671,7 +1680,7 @@
        "      <td>2</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Acetyl@Protein N-term;Oxidation@M;Oxidation@M;...</td>\n",
+       "      <td>Acetyl@Protein_N-term;Oxidation@M;Oxidation@M;...</td>\n",
        "      <td>0;1;15;4</td>\n",
        "      <td>20</td>\n",
        "      <td>xx</td>\n",
@@ -1684,7 +1693,7 @@
        "      <td>2</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Acetyl@Protein N-term;Carbamidomethyl@C</td>\n",
+       "      <td>Acetyl@Protein_N-term;Carbamidomethyl@C</td>\n",
        "      <td>0;4</td>\n",
        "      <td>20</td>\n",
        "      <td>xx</td>\n",
@@ -1740,8 +1749,8 @@
        "    is_prot_cterm                                               mods  \\\n",
        "0           False                      Oxidation@M;Carbamidomethyl@C   \n",
        "1           False                                  Carbamidomethyl@C   \n",
-       "2           False  Acetyl@Protein N-term;Oxidation@M;Carbamidomet...   \n",
-       "3           False            Acetyl@Protein N-term;Carbamidomethyl@C   \n",
+       "2           False  Acetyl@Protein_N-term;Oxidation@M;Carbamidomet...   \n",
+       "3           False            Acetyl@Protein_N-term;Carbamidomethyl@C   \n",
        "4            True                                        Oxidation@M   \n",
        "5            True                                                      \n",
        "6            True                              Oxidation@M;Phospho@S   \n",
@@ -1751,15 +1760,15 @@
        "10           True                                          Phospho@T   \n",
        "11           True                                                      \n",
        "12          False                                  Carbamidomethyl@C   \n",
-       "13          False            Acetyl@Protein N-term;Carbamidomethyl@C   \n",
+       "13          False            Acetyl@Protein_N-term;Carbamidomethyl@C   \n",
        "14          False                      Oxidation@M;Carbamidomethyl@C   \n",
        "15          False                                  Carbamidomethyl@C   \n",
-       "16          False  Acetyl@Protein N-term;Oxidation@M;Carbamidomet...   \n",
-       "17          False            Acetyl@Protein N-term;Carbamidomethyl@C   \n",
+       "16          False  Acetyl@Protein_N-term;Oxidation@M;Carbamidomet...   \n",
+       "17          False            Acetyl@Protein_N-term;Carbamidomethyl@C   \n",
        "18           True                                        Oxidation@M   \n",
        "19           True                                                      \n",
-       "20           True                  Acetyl@Protein N-term;Oxidation@M   \n",
-       "21           True                              Acetyl@Protein N-term   \n",
+       "20           True                  Acetyl@Protein_N-term;Oxidation@M   \n",
+       "21           True                              Acetyl@Protein_N-term   \n",
        "22           True                              Oxidation@M;Phospho@S   \n",
        "23           True                              Oxidation@M;Phospho@T   \n",
        "24           True                                        Oxidation@M   \n",
@@ -1768,16 +1777,16 @@
        "27           True                                                      \n",
        "28          False                      Oxidation@M;Carbamidomethyl@C   \n",
        "29          False                                  Carbamidomethyl@C   \n",
-       "30          False  Acetyl@Protein N-term;Oxidation@M;Carbamidomet...   \n",
-       "31          False            Acetyl@Protein N-term;Carbamidomethyl@C   \n",
+       "30          False  Acetyl@Protein_N-term;Oxidation@M;Carbamidomet...   \n",
+       "31          False            Acetyl@Protein_N-term;Carbamidomethyl@C   \n",
        "32          False                      Oxidation@M;Carbamidomethyl@C   \n",
        "33          False                      Oxidation@M;Carbamidomethyl@C   \n",
        "34          False          Oxidation@M;Oxidation@M;Carbamidomethyl@C   \n",
        "35          False                                  Carbamidomethyl@C   \n",
-       "36          False  Acetyl@Protein N-term;Oxidation@M;Carbamidomet...   \n",
-       "37          False  Acetyl@Protein N-term;Oxidation@M;Carbamidomet...   \n",
-       "38          False  Acetyl@Protein N-term;Oxidation@M;Oxidation@M;...   \n",
-       "39          False            Acetyl@Protein N-term;Carbamidomethyl@C   \n",
+       "36          False  Acetyl@Protein_N-term;Oxidation@M;Carbamidomet...   \n",
+       "37          False  Acetyl@Protein_N-term;Oxidation@M;Carbamidomet...   \n",
+       "38          False  Acetyl@Protein_N-term;Oxidation@M;Oxidation@M;...   \n",
+       "39          False            Acetyl@Protein_N-term;Carbamidomethyl@C   \n",
        "\n",
        "   mod_sites  nAA proteins genes  \n",
        "0        1;4    7       xx        \n",
@@ -1822,7 +1831,7 @@
        "39       0;4   20       xx        "
       ]
      },
-     "execution_count": null,
+     "execution_count": 10,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1836,7 +1845,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [
     {
@@ -1909,7 +1918,7 @@
        "      <td>0</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Acetyl@Protein N-term;Oxidation@M;Carbamidomet...</td>\n",
+       "      <td>Acetyl@Protein_N-term;Oxidation@M;Carbamidomet...</td>\n",
        "      <td>0;1;4</td>\n",
        "      <td>7</td>\n",
        "      <td>xx</td>\n",
@@ -1923,7 +1932,7 @@
        "      <td>0</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Acetyl@Protein N-term;Carbamidomethyl@C</td>\n",
+       "      <td>Acetyl@Protein_N-term;Carbamidomethyl@C</td>\n",
        "      <td>0;4</td>\n",
        "      <td>7</td>\n",
        "      <td>xx</td>\n",
@@ -1965,7 +1974,7 @@
        "      <td>2</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Carbamidomethyl@C;Dimethyl:2H(6)13C(2)@Any N-t...</td>\n",
+       "      <td>Carbamidomethyl@C;Dimethyl:2H(6)13C(2)@Any_N-t...</td>\n",
        "      <td>4;0;7;13</td>\n",
        "      <td>20</td>\n",
        "      <td>xx</td>\n",
@@ -1979,7 +1988,7 @@
        "      <td>2</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Acetyl@Protein N-term;Oxidation@M;Carbamidomet...</td>\n",
+       "      <td>Acetyl@Protein_N-term;Oxidation@M;Carbamidomet...</td>\n",
        "      <td>0;1;4;7;13</td>\n",
        "      <td>20</td>\n",
        "      <td>xx</td>\n",
@@ -1993,7 +2002,7 @@
        "      <td>2</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Acetyl@Protein N-term;Oxidation@M;Carbamidomet...</td>\n",
+       "      <td>Acetyl@Protein_N-term;Oxidation@M;Carbamidomet...</td>\n",
        "      <td>0;15;4;7;13</td>\n",
        "      <td>20</td>\n",
        "      <td>xx</td>\n",
@@ -2007,7 +2016,7 @@
        "      <td>2</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Acetyl@Protein N-term;Oxidation@M;Oxidation@M;...</td>\n",
+       "      <td>Acetyl@Protein_N-term;Oxidation@M;Oxidation@M;...</td>\n",
        "      <td>0;1;15;4;7;13</td>\n",
        "      <td>20</td>\n",
        "      <td>xx</td>\n",
@@ -2021,7 +2030,7 @@
        "      <td>2</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Acetyl@Protein N-term;Carbamidomethyl@C;Dimeth...</td>\n",
+       "      <td>Acetyl@Protein_N-term;Carbamidomethyl@C;Dimeth...</td>\n",
        "      <td>0;4;7;13</td>\n",
        "      <td>20</td>\n",
        "      <td>xx</td>\n",
@@ -2050,15 +2059,15 @@
        "     is_prot_cterm                                               mods  \\\n",
        "0            False                      Oxidation@M;Carbamidomethyl@C   \n",
        "1            False                                  Carbamidomethyl@C   \n",
-       "2            False  Acetyl@Protein N-term;Oxidation@M;Carbamidomet...   \n",
-       "3            False            Acetyl@Protein N-term;Carbamidomethyl@C   \n",
+       "2            False  Acetyl@Protein_N-term;Oxidation@M;Carbamidomet...   \n",
+       "3            False            Acetyl@Protein_N-term;Carbamidomethyl@C   \n",
        "4             True                                        Oxidation@M   \n",
        "..             ...                                                ...   \n",
-       "115          False  Carbamidomethyl@C;Dimethyl:2H(6)13C(2)@Any N-t...   \n",
-       "116          False  Acetyl@Protein N-term;Oxidation@M;Carbamidomet...   \n",
-       "117          False  Acetyl@Protein N-term;Oxidation@M;Carbamidomet...   \n",
-       "118          False  Acetyl@Protein N-term;Oxidation@M;Oxidation@M;...   \n",
-       "119          False  Acetyl@Protein N-term;Carbamidomethyl@C;Dimeth...   \n",
+       "115          False  Carbamidomethyl@C;Dimethyl:2H(6)13C(2)@Any_N-t...   \n",
+       "116          False  Acetyl@Protein_N-term;Oxidation@M;Carbamidomet...   \n",
+       "117          False  Acetyl@Protein_N-term;Oxidation@M;Carbamidomet...   \n",
+       "118          False  Acetyl@Protein_N-term;Oxidation@M;Oxidation@M;...   \n",
+       "119          False  Acetyl@Protein_N-term;Carbamidomethyl@C;Dimeth...   \n",
        "\n",
        "         mod_sites  nAA proteins genes labeling_channel  \n",
        "0              1;4    7       xx                   none  \n",
@@ -2076,7 +2085,7 @@
        "[120 rows x 11 columns]"
       ]
      },
-     "execution_count": null,
+     "execution_count": 11,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2085,15 +2094,15 @@
     "#| hide\n",
     "_lib.add_peptide_labeling({\n",
     "    'none': [], # not labelled for reference\n",
-    "    'light': ['Dimethyl@Any N-term','Dimethyl@K'],\n",
-    "    'heavy': ['Dimethyl:2H(6)13C(2)@Any N-term','Dimethyl:2H(6)13C(2)@K'],\n",
+    "    'light': ['Dimethyl@Any_N-term','Dimethyl@K'],\n",
+    "    'heavy': ['Dimethyl:2H(6)13C(2)@Any_N-term','Dimethyl:2H(6)13C(2)@K'],\n",
     "})\n",
     "_lib.precursor_df"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2102,7 +2111,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [
     {
@@ -2229,7 +2238,7 @@
        "      <td>0</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Acetyl@Protein N-term;Carbamidomethyl@C</td>\n",
+       "      <td>Acetyl@Protein_N-term;Carbamidomethyl@C</td>\n",
        "      <td>0;2</td>\n",
        "      <td>8</td>\n",
        "      <td>0</td>\n",
@@ -2325,7 +2334,7 @@
        "      <td>0</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Acetyl@Protein N-term;Carbamidomethyl@C</td>\n",
+       "      <td>Acetyl@Protein_N-term;Carbamidomethyl@C</td>\n",
        "      <td>0;6</td>\n",
        "      <td>8</td>\n",
        "      <td>1</td>\n",
@@ -2397,7 +2406,7 @@
        "      <td>0</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Acetyl@Protein N-term;Oxidation@M;Carbamidomet...</td>\n",
+       "      <td>Acetyl@Protein_N-term;Oxidation@M;Carbamidomet...</td>\n",
        "      <td>0;1;3</td>\n",
        "      <td>9</td>\n",
        "      <td>0</td>\n",
@@ -2421,7 +2430,7 @@
        "      <td>0</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Acetyl@Protein N-term;Carbamidomethyl@C</td>\n",
+       "      <td>Acetyl@Protein_N-term;Carbamidomethyl@C</td>\n",
        "      <td>0;3</td>\n",
        "      <td>9</td>\n",
        "      <td>0</td>\n",
@@ -2493,7 +2502,7 @@
        "      <td>0</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Acetyl@Protein N-term;Oxidation@M;Carbamidomet...</td>\n",
+       "      <td>Acetyl@Protein_N-term;Oxidation@M;Carbamidomet...</td>\n",
        "      <td>0;8;6</td>\n",
        "      <td>9</td>\n",
        "      <td>1</td>\n",
@@ -2517,7 +2526,7 @@
        "      <td>0</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Acetyl@Protein N-term;Carbamidomethyl@C</td>\n",
+       "      <td>Acetyl@Protein_N-term;Carbamidomethyl@C</td>\n",
        "      <td>0;6</td>\n",
        "      <td>9</td>\n",
        "      <td>1</td>\n",
@@ -2637,7 +2646,7 @@
        "      <td>1</td>\n",
        "      <td>True</td>\n",
        "      <td>True</td>\n",
-       "      <td>Acetyl@Protein N-term;Oxidation@M</td>\n",
+       "      <td>Acetyl@Protein_N-term;Oxidation@M</td>\n",
        "      <td>0;7</td>\n",
        "      <td>11</td>\n",
        "      <td>0</td>\n",
@@ -2661,7 +2670,7 @@
        "      <td>1</td>\n",
        "      <td>True</td>\n",
        "      <td>True</td>\n",
-       "      <td>Acetyl@Protein N-term;Oxidation@M</td>\n",
+       "      <td>Acetyl@Protein_N-term;Oxidation@M</td>\n",
        "      <td>0;7</td>\n",
        "      <td>11</td>\n",
        "      <td>0</td>\n",
@@ -2685,7 +2694,7 @@
        "      <td>1</td>\n",
        "      <td>True</td>\n",
        "      <td>True</td>\n",
-       "      <td>Acetyl@Protein N-term</td>\n",
+       "      <td>Acetyl@Protein_N-term</td>\n",
        "      <td>0</td>\n",
        "      <td>11</td>\n",
        "      <td>0</td>\n",
@@ -2709,7 +2718,7 @@
        "      <td>1</td>\n",
        "      <td>True</td>\n",
        "      <td>True</td>\n",
-       "      <td>Acetyl@Protein N-term</td>\n",
+       "      <td>Acetyl@Protein_N-term</td>\n",
        "      <td>0</td>\n",
        "      <td>11</td>\n",
        "      <td>0</td>\n",
@@ -2791,8 +2800,8 @@
        "      <td>0</td>\n",
        "      <td>0.352144</td>\n",
        "      <td>0.352144</td>\n",
-       "      <td>402.555023</td>\n",
-       "      <td>0.994806</td>\n",
+       "      <td>402.554993</td>\n",
+       "      <td>0.994805</td>\n",
        "      <td>30.0</td>\n",
        "      <td>Lumos</td>\n",
        "      <td>220</td>\n",
@@ -2815,7 +2824,7 @@
        "      <td>0</td>\n",
        "      <td>0.352144</td>\n",
        "      <td>0.352144</td>\n",
-       "      <td>482.206787</td>\n",
+       "      <td>482.206757</td>\n",
        "      <td>0.794435</td>\n",
        "      <td>30.0</td>\n",
        "      <td>Lumos</td>\n",
@@ -2829,7 +2838,7 @@
        "      <td>1</td>\n",
        "      <td>True</td>\n",
        "      <td>True</td>\n",
-       "      <td>Acetyl@Protein N-term;Oxidation@M</td>\n",
+       "      <td>Acetyl@Protein_N-term;Oxidation@M</td>\n",
        "      <td>0;4</td>\n",
        "      <td>11</td>\n",
        "      <td>1</td>\n",
@@ -2839,7 +2848,7 @@
        "      <td>0</td>\n",
        "      <td>0.406691</td>\n",
        "      <td>0.406691</td>\n",
-       "      <td>414.260437</td>\n",
+       "      <td>414.260406</td>\n",
        "      <td>1.024166</td>\n",
        "      <td>30.0</td>\n",
        "      <td>Lumos</td>\n",
@@ -2853,7 +2862,7 @@
        "      <td>1</td>\n",
        "      <td>True</td>\n",
        "      <td>True</td>\n",
-       "      <td>Acetyl@Protein N-term;Oxidation@M</td>\n",
+       "      <td>Acetyl@Protein_N-term;Oxidation@M</td>\n",
        "      <td>0;4</td>\n",
        "      <td>11</td>\n",
        "      <td>1</td>\n",
@@ -2863,7 +2872,7 @@
        "      <td>0</td>\n",
        "      <td>0.406691</td>\n",
        "      <td>0.406691</td>\n",
-       "      <td>470.269653</td>\n",
+       "      <td>470.269684</td>\n",
        "      <td>0.775096</td>\n",
        "      <td>30.0</td>\n",
        "      <td>Lumos</td>\n",
@@ -2877,7 +2886,7 @@
        "      <td>1</td>\n",
        "      <td>True</td>\n",
        "      <td>True</td>\n",
-       "      <td>Acetyl@Protein N-term</td>\n",
+       "      <td>Acetyl@Protein_N-term</td>\n",
        "      <td>0</td>\n",
        "      <td>11</td>\n",
        "      <td>1</td>\n",
@@ -2901,7 +2910,7 @@
        "      <td>1</td>\n",
        "      <td>True</td>\n",
        "      <td>True</td>\n",
-       "      <td>Acetyl@Protein N-term</td>\n",
+       "      <td>Acetyl@Protein_N-term</td>\n",
        "      <td>0</td>\n",
        "      <td>11</td>\n",
        "      <td>1</td>\n",
@@ -2911,7 +2920,7 @@
        "      <td>0</td>\n",
        "      <td>0.462864</td>\n",
        "      <td>0.462864</td>\n",
-       "      <td>469.226685</td>\n",
+       "      <td>469.226715</td>\n",
        "      <td>0.773290</td>\n",
        "      <td>30.0</td>\n",
        "      <td>Lumos</td>\n",
@@ -3162,35 +3171,35 @@
        "0                                         Oxidation@M         2    8      0   \n",
        "1                                                                  8      0   \n",
        "2                                   Carbamidomethyl@C         2    8      0   \n",
-       "3             Acetyl@Protein N-term;Carbamidomethyl@C       0;2    8      0   \n",
+       "3             Acetyl@Protein_N-term;Carbamidomethyl@C       0;2    8      0   \n",
        "4                                         Oxidation@M         6    8      1   \n",
        "5                                                                  8      1   \n",
        "6                                   Carbamidomethyl@C         6    8      1   \n",
-       "7             Acetyl@Protein N-term;Carbamidomethyl@C       0;6    8      1   \n",
+       "7             Acetyl@Protein_N-term;Carbamidomethyl@C       0;6    8      1   \n",
        "8                       Oxidation@M;Carbamidomethyl@C       1;3    9      0   \n",
        "9                                   Carbamidomethyl@C         3    9      0   \n",
-       "10  Acetyl@Protein N-term;Oxidation@M;Carbamidomet...     0;1;3    9      0   \n",
-       "11            Acetyl@Protein N-term;Carbamidomethyl@C       0;3    9      0   \n",
+       "10  Acetyl@Protein_N-term;Oxidation@M;Carbamidomet...     0;1;3    9      0   \n",
+       "11            Acetyl@Protein_N-term;Carbamidomethyl@C       0;3    9      0   \n",
        "12                      Oxidation@M;Carbamidomethyl@C       8;6    9      1   \n",
        "13                                  Carbamidomethyl@C         6    9      1   \n",
-       "14  Acetyl@Protein N-term;Oxidation@M;Carbamidomet...     0;8;6    9      1   \n",
-       "15            Acetyl@Protein N-term;Carbamidomethyl@C       0;6    9      1   \n",
+       "14  Acetyl@Protein_N-term;Oxidation@M;Carbamidomet...     0;8;6    9      1   \n",
+       "15            Acetyl@Protein_N-term;Carbamidomethyl@C       0;6    9      1   \n",
        "16                                        Oxidation@M         7   11      0   \n",
        "17                                        Oxidation@M         7   11      0   \n",
        "18                                                                11      0   \n",
        "19                                                                11      0   \n",
-       "20                  Acetyl@Protein N-term;Oxidation@M       0;7   11      0   \n",
-       "21                  Acetyl@Protein N-term;Oxidation@M       0;7   11      0   \n",
-       "22                              Acetyl@Protein N-term         0   11      0   \n",
-       "23                              Acetyl@Protein N-term         0   11      0   \n",
+       "20                  Acetyl@Protein_N-term;Oxidation@M       0;7   11      0   \n",
+       "21                  Acetyl@Protein_N-term;Oxidation@M       0;7   11      0   \n",
+       "22                              Acetyl@Protein_N-term         0   11      0   \n",
+       "23                              Acetyl@Protein_N-term         0   11      0   \n",
        "24                                        Oxidation@M         4   11      1   \n",
        "25                                        Oxidation@M         4   11      1   \n",
        "26                                                                11      1   \n",
        "27                                                                11      1   \n",
-       "28                  Acetyl@Protein N-term;Oxidation@M       0;4   11      1   \n",
-       "29                  Acetyl@Protein N-term;Oxidation@M       0;4   11      1   \n",
-       "30                              Acetyl@Protein N-term         0   11      1   \n",
-       "31                              Acetyl@Protein N-term         0   11      1   \n",
+       "28                  Acetyl@Protein_N-term;Oxidation@M       0;4   11      1   \n",
+       "29                  Acetyl@Protein_N-term;Oxidation@M       0;4   11      1   \n",
+       "30                              Acetyl@Protein_N-term         0   11      1   \n",
+       "31                              Acetyl@Protein_N-term         0   11      1   \n",
        "32                                        Oxidation@M         6   13      1   \n",
        "33                                        Oxidation@M         6   13      1   \n",
        "34                                                                13      1   \n",
@@ -3269,12 +3278,12 @@
        "23  468.311920       0.771782  30.0       Lumos             190            200  \n",
        "24  400.909912       0.990859  30.0       Lumos             200            210  \n",
        "25  478.989624       0.789230  30.0       Lumos             210            220  \n",
-       "26  402.555023       0.994806  30.0       Lumos             220            230  \n",
-       "27  482.206787       0.794435  30.0       Lumos             230            240  \n",
-       "28  414.260437       1.024166  30.0       Lumos             240            250  \n",
-       "29  470.269653       0.775096  30.0       Lumos             250            260  \n",
+       "26  402.554993       0.994805  30.0       Lumos             220            230  \n",
+       "27  482.206757       0.794435  30.0       Lumos             230            240  \n",
+       "28  414.260406       1.024166  30.0       Lumos             240            250  \n",
+       "29  470.269684       0.775096  30.0       Lumos             250            260  \n",
        "30  417.726074       1.032617  30.0       Lumos             260            270  \n",
-       "31  469.226685       0.773290  30.0       Lumos             270            280  \n",
+       "31  469.226715       0.773290  30.0       Lumos             270            280  \n",
        "32  421.076538       1.041983  30.0       Lumos             280            292  \n",
        "33  490.627533       0.809400  30.0       Lumos             292            304  \n",
        "34  423.214233       1.047176  30.0       Lumos             304            316  \n",
@@ -3287,7 +3296,7 @@
        "[40 rows x 26 columns]"
       ]
      },
-     "execution_count": null,
+     "execution_count": 13,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -3326,7 +3335,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [
     {
@@ -3381,7 +3390,7 @@
        "      <td>1</td>\n",
        "      <td>False</td>\n",
        "      <td>True</td>\n",
-       "      <td>Oxidation@M;Dimethyl@Any N-term</td>\n",
+       "      <td>Oxidation@M;Dimethyl@Any_N-term</td>\n",
        "      <td>2;0</td>\n",
        "      <td>8</td>\n",
        "      <td>0</td>\n",
@@ -3391,7 +3400,7 @@
        "      <td>0</td>\n",
        "      <td>0.242660</td>\n",
        "      <td>0.242660</td>\n",
-       "      <td>345.390839</td>\n",
+       "      <td>345.390869</td>\n",
        "      <td>0.850135</td>\n",
        "      <td>30.0</td>\n",
        "      <td>Lumos</td>\n",
@@ -3405,7 +3414,7 @@
        "      <td>1</td>\n",
        "      <td>False</td>\n",
        "      <td>True</td>\n",
-       "      <td>Dimethyl:2H(6)13C(2)@Any N-term</td>\n",
+       "      <td>Dimethyl:2H(6)13C(2)@Any_N-term</td>\n",
        "      <td>0</td>\n",
        "      <td>8</td>\n",
        "      <td>0</td>\n",
@@ -3429,7 +3438,7 @@
        "      <td>1</td>\n",
        "      <td>False</td>\n",
        "      <td>True</td>\n",
-       "      <td>Oxidation@M;Dimethyl:2H(6)13C(2)@Any N-term</td>\n",
+       "      <td>Oxidation@M;Dimethyl:2H(6)13C(2)@Any_N-term</td>\n",
        "      <td>2;0</td>\n",
        "      <td>8</td>\n",
        "      <td>0</td>\n",
@@ -3453,7 +3462,7 @@
        "      <td>1</td>\n",
        "      <td>False</td>\n",
        "      <td>True</td>\n",
-       "      <td>Oxidation@M;Dimethyl:2H(6)13C(2)@Any N-term</td>\n",
+       "      <td>Oxidation@M;Dimethyl:2H(6)13C(2)@Any_N-term</td>\n",
        "      <td>6;0</td>\n",
        "      <td>8</td>\n",
        "      <td>1</td>\n",
@@ -3463,7 +3472,7 @@
        "      <td>2</td>\n",
        "      <td>0.040846</td>\n",
        "      <td>0.040846</td>\n",
-       "      <td>319.400330</td>\n",
+       "      <td>319.400391</td>\n",
        "      <td>0.786163</td>\n",
        "      <td>30.0</td>\n",
        "      <td>Lumos</td>\n",
@@ -3477,7 +3486,7 @@
        "      <td>1</td>\n",
        "      <td>False</td>\n",
        "      <td>True</td>\n",
-       "      <td>Dimethyl:2H(6)13C(2)@Any N-term</td>\n",
+       "      <td>Dimethyl:2H(6)13C(2)@Any_N-term</td>\n",
        "      <td>0</td>\n",
        "      <td>8</td>\n",
        "      <td>1</td>\n",
@@ -3525,7 +3534,7 @@
        "      <td>2</td>\n",
        "      <td>False</td>\n",
        "      <td>True</td>\n",
-       "      <td>Dimethyl@Any N-term;Dimethyl@K</td>\n",
+       "      <td>Dimethyl@Any_N-term;Dimethyl@K</td>\n",
        "      <td>0;8</td>\n",
        "      <td>13</td>\n",
        "      <td>1</td>\n",
@@ -3535,8 +3544,8 @@
        "      <td>0</td>\n",
        "      <td>0.620949</td>\n",
        "      <td>0.620949</td>\n",
-       "      <td>430.461273</td>\n",
-       "      <td>1.065108</td>\n",
+       "      <td>430.461243</td>\n",
+       "      <td>1.065107</td>\n",
        "      <td>30.0</td>\n",
        "      <td>Lumos</td>\n",
        "      <td>692</td>\n",
@@ -3549,7 +3558,7 @@
        "      <td>2</td>\n",
        "      <td>False</td>\n",
        "      <td>True</td>\n",
-       "      <td>Oxidation@M;Dimethyl@Any N-term;Dimethyl@K</td>\n",
+       "      <td>Oxidation@M;Dimethyl@Any_N-term;Dimethyl@K</td>\n",
        "      <td>6;0;8</td>\n",
        "      <td>13</td>\n",
        "      <td>1</td>\n",
@@ -3559,7 +3568,7 @@
        "      <td>0</td>\n",
        "      <td>0.468698</td>\n",
        "      <td>0.468698</td>\n",
-       "      <td>482.796692</td>\n",
+       "      <td>482.796661</td>\n",
        "      <td>0.796481</td>\n",
        "      <td>30.0</td>\n",
        "      <td>Lumos</td>\n",
@@ -3573,7 +3582,7 @@
        "      <td>2</td>\n",
        "      <td>False</td>\n",
        "      <td>True</td>\n",
-       "      <td>Oxidation@M;Dimethyl@Any N-term;Dimethyl@K</td>\n",
+       "      <td>Oxidation@M;Dimethyl@Any_N-term;Dimethyl@K</td>\n",
        "      <td>6;0;8</td>\n",
        "      <td>13</td>\n",
        "      <td>1</td>\n",
@@ -3583,7 +3592,7 @@
        "      <td>0</td>\n",
        "      <td>0.468698</td>\n",
        "      <td>0.468698</td>\n",
-       "      <td>428.150757</td>\n",
+       "      <td>428.150787</td>\n",
        "      <td>1.059489</td>\n",
        "      <td>30.0</td>\n",
        "      <td>Lumos</td>\n",
@@ -3597,7 +3606,7 @@
        "      <td>2</td>\n",
        "      <td>False</td>\n",
        "      <td>True</td>\n",
-       "      <td>Dimethyl:2H(6)13C(2)@Any N-term;Dimethyl:2H(6)...</td>\n",
+       "      <td>Dimethyl:2H(6)13C(2)@Any_N-term;Dimethyl:2H(6)...</td>\n",
        "      <td>0;5</td>\n",
        "      <td>13</td>\n",
        "      <td>0</td>\n",
@@ -3621,7 +3630,7 @@
        "      <td>2</td>\n",
        "      <td>False</td>\n",
        "      <td>True</td>\n",
-       "      <td>Dimethyl:2H(6)13C(2)@Any N-term;Dimethyl:2H(6)...</td>\n",
+       "      <td>Dimethyl:2H(6)13C(2)@Any_N-term;Dimethyl:2H(6)...</td>\n",
        "      <td>0;5</td>\n",
        "      <td>13</td>\n",
        "      <td>0</td>\n",
@@ -3658,17 +3667,17 @@
        "79  FGHIKLMNPQRST             0              2          False           True   \n",
        "\n",
        "                                                 mods mod_sites  nAA  decoy  \\\n",
-       "0                     Oxidation@M;Dimethyl@Any N-term       2;0    8      0   \n",
-       "1                     Dimethyl:2H(6)13C(2)@Any N-term         0    8      0   \n",
-       "2         Oxidation@M;Dimethyl:2H(6)13C(2)@Any N-term       2;0    8      0   \n",
-       "3         Oxidation@M;Dimethyl:2H(6)13C(2)@Any N-term       6;0    8      1   \n",
-       "4                     Dimethyl:2H(6)13C(2)@Any N-term         0    8      1   \n",
+       "0                     Oxidation@M;Dimethyl@Any_N-term       2;0    8      0   \n",
+       "1                     Dimethyl:2H(6)13C(2)@Any_N-term         0    8      0   \n",
+       "2         Oxidation@M;Dimethyl:2H(6)13C(2)@Any_N-term       2;0    8      0   \n",
+       "3         Oxidation@M;Dimethyl:2H(6)13C(2)@Any_N-term       6;0    8      1   \n",
+       "4                     Dimethyl:2H(6)13C(2)@Any_N-term         0    8      1   \n",
        "..                                                ...       ...  ...    ...   \n",
-       "75                     Dimethyl@Any N-term;Dimethyl@K       0;8   13      1   \n",
-       "76         Oxidation@M;Dimethyl@Any N-term;Dimethyl@K     6;0;8   13      1   \n",
-       "77         Oxidation@M;Dimethyl@Any N-term;Dimethyl@K     6;0;8   13      1   \n",
-       "78  Dimethyl:2H(6)13C(2)@Any N-term;Dimethyl:2H(6)...       0;5   13      0   \n",
-       "79  Dimethyl:2H(6)13C(2)@Any N-term;Dimethyl:2H(6)...       0;5   13      0   \n",
+       "75                     Dimethyl@Any_N-term;Dimethyl@K       0;8   13      1   \n",
+       "76         Oxidation@M;Dimethyl@Any_N-term;Dimethyl@K     6;0;8   13      1   \n",
+       "77         Oxidation@M;Dimethyl@Any_N-term;Dimethyl@K     6;0;8   13      1   \n",
+       "78  Dimethyl:2H(6)13C(2)@Any_N-term;Dimethyl:2H(6)...       0;5   13      0   \n",
+       "79  Dimethyl:2H(6)13C(2)@Any_N-term;Dimethyl:2H(6)...       0;5   13      0   \n",
        "\n",
        "    charge  ...       i_5 mono_isotope_idx   rt_pred  rt_norm_pred  \\\n",
        "0        2  ...  0.001352                0  0.242660      0.242660   \n",
@@ -3684,22 +3693,22 @@
        "79       3  ...  0.058123                2  0.206957      0.206957   \n",
        "\n",
        "      ccs_pred  mobility_pred   nce  instrument  frag_start_idx  frag_stop_idx  \n",
-       "0   345.390839       0.850135  30.0       Lumos               0              7  \n",
+       "0   345.390869       0.850135  30.0       Lumos               0              7  \n",
        "1   313.133270       0.770554  30.0       Lumos               7             14  \n",
        "2   314.302277       0.773615  30.0       Lumos              14             21  \n",
-       "3   319.400330       0.786163  30.0       Lumos              21             28  \n",
+       "3   319.400391       0.786163  30.0       Lumos              21             28  \n",
        "4   320.333069       0.788271  30.0       Lumos              28             35  \n",
        "..         ...            ...   ...         ...             ...            ...  \n",
-       "75  430.461273       1.065108  30.0       Lumos             692            704  \n",
-       "76  482.796692       0.796481  30.0       Lumos             704            716  \n",
-       "77  428.150757       1.059489  30.0       Lumos             716            728  \n",
+       "75  430.461243       1.065107  30.0       Lumos             692            704  \n",
+       "76  482.796661       0.796481  30.0       Lumos             704            716  \n",
+       "77  428.150787       1.059489  30.0       Lumos             716            728  \n",
        "78  412.858307       1.021552  30.0       Lumos             728            740  \n",
        "79  478.660187       0.789583  30.0       Lumos             740            752  \n",
        "\n",
        "[80 rows x 27 columns]"
       ]
      },
-     "execution_count": null,
+     "execution_count": 14,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -3707,8 +3716,8 @@
    "source": [
     "_lib.import_and_process_protein_dict(protein_dict)\n",
     "_lib.add_peptide_labeling({\n",
-    "    'light': ['Dimethyl@Any N-term','Dimethyl@K'],\n",
-    "    'heavy': ['Dimethyl:2H(6)13C(2)@Any N-term','Dimethyl:2H(6)13C(2)@K'],\n",
+    "    'light': ['Dimethyl@Any_N-term','Dimethyl@K'],\n",
+    "    'heavy': ['Dimethyl:2H(6)13C(2)@Any_N-term','Dimethyl:2H(6)13C(2)@K'],\n",
     "})\n",
     "_lib.predict_all()\n",
     "assert (_lib.precursor_df.decoy==1).any()\n",
@@ -3732,6 +3741,18 @@
    "display_name": "Python 3.8.3 ('base')",
    "language": "python",
    "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.9"
   }
  },
  "nbformat": 4,
diff --git a/nbs_tests/psm_frag_reader/maxquant_frag_reader.ipynb b/nbs_tests/psm_frag_reader/maxquant_frag_reader.ipynb
index 80dda52d..a43bcad3 100644
--- a/nbs_tests/psm_frag_reader/maxquant_frag_reader.ipynb
+++ b/nbs_tests/psm_frag_reader/maxquant_frag_reader.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -27,16 +27,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.\n"
+     ]
+    }
+   ],
    "source": [
     "from peptdeep.psm_frag_reader.maxquant_frag_reader import *"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -48,9 +56,46 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/wenfengzeng/workspace/peptdeep/peptdeep/psm_frag_reader/maxquant_frag_reader.py:50: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n",
+      "  lambda x: parse_phos_probs(x[0], x[1], prob), axis=1\n",
+      "/Users/wenfengzeng/workspace/peptdeep/peptdeep/psm_frag_reader/maxquant_frag_reader.py:141: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value '[0.         0.34720501 0.54503546 0.14126802 0.17500845 0.1020231\n",
+      " 0.04637072 0.         0.         0.01899846 0.        ]' has dtype incompatible with float32, please explicitly cast to a compatible dtype first.\n",
+      "  self._fragment_intensity_df.iloc[start:end, :] = intens\n",
+      "/Users/wenfengzeng/workspace/peptdeep/peptdeep/psm_frag_reader/maxquant_frag_reader.py:141: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value '[0.02471942 0.41737406 0.67116171 1.         0.37160414 0.59517672\n",
+      " 0.54813229 0.         0.0606665  0.03838788 0.03735192]' has dtype incompatible with float32, please explicitly cast to a compatible dtype first.\n",
+      "  self._fragment_intensity_df.iloc[start:end, :] = intens\n",
+      "/Users/wenfengzeng/workspace/peptdeep/peptdeep/psm_frag_reader/maxquant_frag_reader.py:141: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value '[0.         0.04495926 0.0213509  0.02114326 0.01335259 0.\n",
+      " 0.         0.         0.         0.         0.        ]' has dtype incompatible with float32, please explicitly cast to a compatible dtype first.\n",
+      "  self._fragment_intensity_df.iloc[start:end, :] = intens\n",
+      "/Users/wenfengzeng/workspace/peptdeep/peptdeep/psm_frag_reader/maxquant_frag_reader.py:141: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value '[0.         0.         0.51698907 0.87869409 0.14043304 0.1052603\n",
+      " 0.19786873 0.         0.         0.         0.         0.\n",
+      " 0.        ]' has dtype incompatible with float32, please explicitly cast to a compatible dtype first.\n",
+      "  self._fragment_intensity_df.iloc[start:end, :] = intens\n",
+      "/Users/wenfengzeng/workspace/peptdeep/peptdeep/psm_frag_reader/maxquant_frag_reader.py:141: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value '[0.         0.         0.         0.54449196 0.2230503  0.\n",
+      " 0.30967216 0.         0.         0.         0.         0.\n",
+      " 0.        ]' has dtype incompatible with float32, please explicitly cast to a compatible dtype first.\n",
+      "  self._fragment_intensity_df.iloc[start:end, :] = intens\n"
+     ]
+    },
+    {
+     "ename": "AssertionError",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mAssertionError\u001b[0m                            Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[4], line 18\u001b[0m\n\u001b[1;32m     16\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m mq_reader\u001b[38;5;241m.\u001b[39mpsm_df\u001b[38;5;241m.\u001b[39mmods\u001b[38;5;241m.\u001b[39mvalues[\u001b[38;5;241m0\u001b[39m] \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m     17\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m mq_reader\u001b[38;5;241m.\u001b[39mpsm_df\u001b[38;5;241m.\u001b[39mmod_sites\u001b[38;5;241m.\u001b[39mvalues[\u001b[38;5;241m0\u001b[39m] \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m---> 18\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m mq_reader\u001b[38;5;241m.\u001b[39mpsm_df\u001b[38;5;241m.\u001b[39mmods\u001b[38;5;241m.\u001b[39mvalues[\u001b[38;5;241m1\u001b[39m] \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mAcetyl@Protein N-term\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m     19\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m mq_reader\u001b[38;5;241m.\u001b[39mpsm_df\u001b[38;5;241m.\u001b[39mmod_sites\u001b[38;5;241m.\u001b[39mvalues[\u001b[38;5;241m1\u001b[39m] \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m0\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m     20\u001b[0m seq \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mAAAGPSNSSSGTSTPR\u001b[39m\u001b[38;5;124m'\u001b[39m\n",
+      "\u001b[0;31mAssertionError\u001b[0m: "
+     ]
+    }
+   ],
    "source": [
     "#| hide\n",
     "mq_str = '''Raw file\tScan number\tScan index\tSequence\tLength\tMissed cleavages\tModifications\tModified sequence\tPhospho (STY) Probabilities\tPhospho (STY) Score Diffs\tAcetyl (Protein N-term)\tPhospho (STY)\tProteins\tGene Names\tProtein Names\tCharge\tFragmentation\tMass analyzer\tType\tScan event number\tIsotope index\tm/z\tMass\tMass Error [ppm]\tSimple Mass Error [ppm]\tRetention time\tPEP\tScore\tDelta score\tScore diff\tLocalization prob\tCombinatorics\tPIF\tFraction of total spectrum\tBase peak fraction\tPrecursor Full ScanNumber\tPrecursor Intensity\tPrecursor Apex Fraction\tPrecursor Apex Offset\tPrecursor Apex Offset Time\tDiagnostic peak Phospho (STY) Y\tMatches\tIntensities\tMass Deviations [Da]\tMass Deviations [ppm]\tMasses\tNumber of Matches\tIntensity coverage\tPeak coverage\tNeutral loss level\tETD identification type\tReverse\tAll scores\tAll sequences\tAll modified sequences\tid\tProtein group IDs\tPeptide ID\tMod. peptide ID\tEvidence ID\tPhospho (STY) site IDs\n",
@@ -69,7 +114,7 @@
     "assert 'frag_stop_idx' in mq_reader.psm_df.columns\n",
     "assert mq_reader.psm_df.mods.values[0] == ''\n",
     "assert mq_reader.psm_df.mod_sites.values[0] == ''\n",
-    "assert mq_reader.psm_df.mods.values[1] == 'Acetyl@Protein N-term'\n",
+    "assert mq_reader.psm_df.mods.values[1] in ('Acetyl@Protein_N-term', 'Acetyl@Protein N-term')\n",
     "assert mq_reader.psm_df.mod_sites.values[1] == '0'\n",
     "seq = 'AAAGPSNSSSGTSTPR'\n",
     "frag_types = raw_df[raw_df['Sequence']==seq]['Matches'].values[0].split(';')\n",
@@ -496,6 +541,18 @@
    "display_name": "Python 3.8.3 ('base')",
    "language": "python",
    "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.9"
   }
  },
  "nbformat": 4,
diff --git a/nbs_tests/spec_lib/library_factory.ipynb b/nbs_tests/spec_lib/library_factory.ipynb
index 9e96cffe..bcf70221 100644
--- a/nbs_tests/spec_lib/library_factory.ipynb
+++ b/nbs_tests/spec_lib/library_factory.ipynb
@@ -23,6 +23,15 @@
     "Factory classes to predict libraries from different sources (input file format)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch # noqa: 401, to prevent crash in Mac Arm"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
diff --git a/nbs_tests/spec_lib/predict_lib.ipynb b/nbs_tests/spec_lib/predict_lib.ipynb
index 7fa38264..55faffad 100644
--- a/nbs_tests/spec_lib/predict_lib.ipynb
+++ b/nbs_tests/spec_lib/predict_lib.ipynb
@@ -33,6 +33,15 @@
     "\n"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch # noqa: 401, to prevent crash in Mac Arm"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
diff --git a/nbs_tests/spec_lib/test_translate_tsv.ipynb b/nbs_tests/spec_lib/test_translate_tsv.ipynb
index 514e2c9b..b9658a39 100644
--- a/nbs_tests/spec_lib/test_translate_tsv.ipynb
+++ b/nbs_tests/spec_lib/test_translate_tsv.ipynb
@@ -138,7 +138,7 @@
     "charged_frag_types = ['b_z1','y_z1','y_modloss_z1']\n",
     "precursor_df = pd.DataFrame({\n",
     "    'sequence': ['ASGHCEWMKYR']*repeat+['ASGHCEWMAAR'],\n",
-    "    'mods': ['Acetyl@Protein N-term;Carbamidomethyl@C;Oxidation@M']*repeat+[''],\n",
+    "    'mods': ['Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidation@M']*repeat+[''],\n",
     "    'mod_sites': ['0;4;8']*repeat+[''],\n",
     "    'nAA': 11,\n",
     "    'NCE': 20,\n",
diff --git a/peptdeep/hla/hla_class1.py b/peptdeep/hla/hla_class1.py
index 3a9f5789..f093d53b 100644
--- a/peptdeep/hla/hla_class1.py
+++ b/peptdeep/hla/hla_class1.py
@@ -6,7 +6,7 @@
 from typing import Union
 
 import peptdeep.model.building_block as building_block
-from peptdeep.model.model_interface import ModelInterface
+from peptdeep.model.model_interface import ModelInterface, append_nAA_column_if_missing
 from peptdeep.model.featurize import get_ascii_indices
 from peptdeep.pretrained_models import pretrain_dir, download_models, global_settings
 
@@ -380,6 +380,42 @@ def predict_from_proteins(
         peptide_df["sequence"] = get_seq_series(peptide_df, self._cat_protein_sequence)
         return peptide_df
 
+    def _concat_neg_df(self, precursor_df, column_to_train="HLA"):
+        precursor_df = append_nAA_column_if_missing(precursor_df)
+        precursor_df[column_to_train] = 1
+        df_list = [precursor_df]
+        for nAA, group_df in precursor_df.groupby("nAA"):
+            rnd_seqs = get_random_sequences(
+                self.protein_df, n=len(group_df), pep_len=nAA
+            )
+            df_list.append(
+                pd.DataFrame({"sequence": rnd_seqs, "nAA": nAA, column_to_train: 0})
+            )
+        return pd.concat(df_list).reset_index(drop=True)
+
+    def test(self, precursor_df):
+        df = self._concat_neg_df(precursor_df)
+        self.predict(df)
+        prob_list = []
+        precision_list = []
+        recall_list = []
+        fp_list = []
+        for prob in [0.5, 0.6, 0.7, 0.8, 0.9]:
+            prob_list.append(prob)
+            precision_list.append(df[df.HLA_prob_pred > prob].HLA.mean())
+            recall_list.append(df[df.HLA_prob_pred > prob].HLA.sum() / len(df) * 2)
+            fp_list.append(
+                1 - (1 - df[df.HLA_prob_pred < prob].HLA).sum() / len(df) * 2
+            )
+        return pd.DataFrame(
+            dict(
+                HLA_prob_pred=prob_list,
+                precision=precision_list,
+                recall=recall_list,
+                false_positive=fp_list,
+            )
+        )
+
     def _download_pretrained_hla_model(self):
         download_models(url=self._model_url, target_path=self._model_zip)
 
diff --git a/peptdeep/hla/hla_utils.py b/peptdeep/hla/hla_utils.py
index d74d36b0..ae29f31f 100644
--- a/peptdeep/hla/hla_utils.py
+++ b/peptdeep/hla/hla_utils.py
@@ -95,12 +95,12 @@ def nonspecific_digest_cat_proteins(
     pd.DataFrame
         A dataframe sorted by `nAA` with three columns:
         `start_pos`: the start index of the peptide in cat_protein
-        `end_pos`: the stop/end index of the peptide in cat_protein
+        `stop_pos`: the stop/end index of the peptide in cat_protein
         `nAA`: the number of amino acids (peptide length).
     """
     pos_starts, pos_ends = get_substring_indices(cat_sequence, min_len, max_len)
-    digest_df = pd.DataFrame(dict(start_pos=pos_starts, end_pos=pos_ends))
-    digest_df["nAA"] = digest_df.end_pos - digest_df.start_pos
+    digest_df = pd.DataFrame(dict(start_pos=pos_starts, stop_pos=pos_ends))
+    digest_df["nAA"] = digest_df.stop_pos - digest_df.start_pos
     digest_df.sort_values("nAA", inplace=True)
     digest_df.reset_index(inplace=True, drop=True)
     return digest_df
@@ -170,7 +170,7 @@ def get_seq_series(idxes_df: pd.DataFrame, cat_prot: str) -> pd.Series:
     pd.Series
         pd.Series with sub-sequences (peptide sequences).
     """
-    return idxes_df[["start_pos", "end_pos"]].apply(
+    return idxes_df[["start_pos", "stop_pos"]].apply(
         lambda x: cat_prot[slice(*x)], axis=1
     )
 
diff --git a/peptdeep/model/model_interface.py b/peptdeep/model/model_interface.py
index 993373ec..864e3370 100644
--- a/peptdeep/model/model_interface.py
+++ b/peptdeep/model/model_interface.py
@@ -79,19 +79,17 @@ def __init__(
             optimizer, num_warmup_steps, num_training_steps, num_cycles, last_epoch
         )
 
-    def step(self, epoch: int, loss: float):
+    def step(self, epoch: int = None, loss=None):
         """
         Get the learning rate for the next epoch.
 
         Parameters
         ----------
-        epoch : int
+        epoch : int (Deprecated)
             The current epoch number.
-        loss : float
-            The loss value of the current epoch.
 
         """
-        return self.lambda_lr.step(epoch)
+        return self.lambda_lr.step()
 
     def get_last_lr(self) -> float:
         """
diff --git a/peptdeep/protein/fasta.py b/peptdeep/protein/fasta.py
index 160a2c72..ecf92fb8 100644
--- a/peptdeep/protein/fasta.py
+++ b/peptdeep/protein/fasta.py
@@ -21,7 +21,7 @@ def __init__(
         precursor_charge_max: int = 4,
         precursor_mz_min: float = 400.0,
         precursor_mz_max: float = 1800.0,
-        var_mods: list = ["Acetyl@Protein N-term", "Oxidation@M"],
+        var_mods: list = ["Acetyl@Protein_N-term", "Oxidation@M"],
         min_var_mod_num: int = 0,
         max_var_mod_num: int = 2,
         fix_mods: list = ["Carbamidomethyl@C"],
diff --git a/tests/run_tests.sh b/tests/run_tests.sh
index 6edc016c..c383dfc9 100644
--- a/tests/run_tests.sh
+++ b/tests/run_tests.sh
@@ -1,2 +1,6 @@
-INCLUDED_NBS=$(find ../nbs_tests -name "*.ipynb")
-python -m pytest --nbmake $(echo $INCLUDED_NBS)
+TEST_NBS=$(find ../nbs_tests -name "*.ipynb")
+TUTORIAL_NBS=$(find ../docs/tutorials -name "*.ipynb")
+
+ALL_NBS=$(echo $TEST_NBS$'\n'$TUTORIAL_NBS)
+
+python -m pytest --nbmake $(echo $ALL_NBS)