diff --git a/README.md b/README.md
index e61c58e..97a2827 100644
--- a/README.md
+++ b/README.md
@@ -61,7 +61,7 @@ E.g. http://ftp.pride.ebi.ac.uk/pub/databases/pride/resources/proteomes/absolute
 ### Peptide Normalization - peptide_normalization.py
 
 ```asciidoc
-python peptide_normalization.py --peptides PXD003947-peptides.csv --impute --normalize --contaminants data/contaminants_ids.tsv --output PXD003947-peptides-norm.tsv
+python peptide_normalization.py --msstats PXD003947.sdrf_openms_design_msstats_in.csv --sdrf PXD003947.sdrf.tsv --remove_decoy_contaminants data/contaminants_ids.tsv --remove_low_frequency_peptides --output PXD003947-peptides-norm.tsv
 ``` 
 
 The command provides an additional `flag` for skip_normalization, pnormalization, compress, log2, violin, verbose.
@@ -70,31 +70,40 @@ The command provides an additional `flag` for skip_normalization, pnormalization
 Usage: peptide_normalization.py [OPTIONS]
 
 Options:
-  -m, --msstats TEXT    MsStats file import generated by quantms  [required]
-  -s, --sdrf TEXT       SDRF file import generated by quantms  [required]
-  --min_aa INTEGER      Minimum number of amino acids to filter peptides
-  --min_unique INTEGER  Minimum number of unique peptides to filter proteins
-  --contaminants TEXT   Contaminants and high abundant proteins to be removed
-  --output TEXT         Peptide intensity file including other all properties
-                        for normalization
-  --skip_normalization  Skip normalization step
-  --nmethod TEXT        Normalization method used to normalize intensities for
-                        all samples (options: qnorm)
-  --pnormalization      Normalize the peptide intensities using different
-                        methods (options: qnorm)
-  --compress            Read the input peptides file in compress gzip file
-  --log2                Transform to log2 the peptide intensity values before
-                        normalization
-  --violin              Use violin plot instead of boxplot for distribution
-                        representations
-  --verbose             Print addition information about the distributions of
-                        the intensities, number of peptides remove after
-                        normalization, etc.
-  --qc_report TEXT      PDF file to store multiple QC images
-  --help                Show this message and exit.
+  -m, --msstats TEXT              MsStats file import generated by quantms
+  -p, --parquet TEXT              Parquet file import generated by quantms
+  -s, --sdrf TEXT                 SDRF file import generated by quantms
+  --min_aa INTEGER                Minimum number of amino acids to filter
+                                  peptides
+  --min_unique INTEGER            Minimum number of unique peptides to filter
+                                  proteins
+  --remove_ids TEXT               Remove specific protein ids from the
+                                  analysis using a file with one id per line
+  --remove_decoy_contaminants     Remove decoy and contaminants proteins from
+                                  the analysis
+  --remove_low_frequency_peptides Remove peptides that are present in less
+                                  than 20% of the samples
+  --output TEXT                   Peptide intensity file including other all
+                                  properties for normalization
+  --skip_normalization            Skip normalization step
+  --nmethod TEXT                  Normalization method used to normalize
+                                  intensities for all samples (options: qnorm)
+  --pnormalization                Normalize the peptide intensities using
+                                  different methods (options: qnorm)
+  --compress                      Read the input peptides file in compress
+                                  gzip file
+  --log2                          Transform to log2 the peptide intensity
+                                  values before normalization
+  --violin                        Use violin plot instead of boxplot for
+                                  distribution representations
+  --verbose                       Print addition information about the
+                                  distributions of the intensities, number of
+                                  peptides remove after normalization, etc.
+  --qc_report TEXT                PDF file to store multiple QC images
+  --help                          Show this message and exit.
 ```
 
-Peptide normalization starts from the output file from script `peptide_file_generation.py`. The structure of the input contains the following columns: 
+Peptide normalization starts from the peptides dataframe. The structure of the input contains the following columns: 
 
 - ProteinName: Protein name
 - PeptideSequence: Peptide sequence including post-translation modifications `(e.g. .(Acetyl)ASPDWGYDDKN(Deamidated)GPEQWSK)`
@@ -245,6 +254,60 @@ For cellular protein copy number calculation, the uniprot accession of histones
 
 In the calculation of protein concentration, the volume is calculated according to the cell protein concentration first, and then the protein mass is divided by the volume to calculate the intracellular protein concentration.
 
+### Datasets Merge - datasets_merge.py
+There are batch effects in protein identification and quantitative results between different studies, which may be caused by differences in experimental techniques, conditional methods, data analysis, etc. Here we provide a method to apply batch effect correction.  First to impute ibaq data, then remove outliers using `hdbscan`, and apply batch effect correction using `pycombat`.
+
+
+```asciidoc
+python datasets_merge.py datasets_merge --data_folder ../ibaqpy_test/ --output datasets-merge.csv --verbose
+``` 
+
+```asciidoc
+python datasets_merge.py --help
+Usage: datasets_merge.py [OPTIONS]
+
+  Merge ibaq results from compute_ibaq.py.
+
+  :param data_folder: Data dolfer contains SDRFs and ibaq CSVs.
+  :param output: Output file after batch effect removal.
+  :param covariate: Indicators included in covariate consideration when datasets are merged.
+  :param organism: Organism to keep in input data.
+  :param covariate_to_keep: Keep tissue parts from metadata, e.g. 'LV,RV,LA,RA'.
+  :param non_missing_percent_to_keep: non-missing values in each group.
+  :param n_components: Number of principal components to be computed.
+  :param min_cluster: The minimum size of clusters.
+  :param min_sample_num: The minimum number of samples in a neighborhood for a point to be considered as a core point.
+  :param n_iter: Number of iterations to be performed.
+  :param verbose/quiet: Output debug information.
+
+Options:
+  Options:
+  -d, --data_folder TEXT          Data dolfer contains SDRFs and ibaq CSVs. [required]
+  -o, --output TEXT               Output file after batch effect removal. [required]
+  -c, --covariate TEXT            Indicators included in covariate consideration when datasets are merged.
+  --organism TEXT                 Organism to keep in input data.
+  --covariate_to_keep TEXT        Keep tissue parts from metadata, e.g. 'LV,RV,LA,RA'.
+  --non_missing_percent_to_keep FLOAT
+                                  non-missing values in each group.
+  --n_components TEXT             Number of principal components to be computed.
+  --min_cluster TEXT              The minimum size of clusters.
+  --min_sample_num TEXT           The minimum number of samples in a neighborhood for a point to be considered as a core point.
+  --n_iter TEXT                   Number of iterations to be performed.
+  -v, --verbose / -q, --quiet     Output debug information.
+  --help                          Show this message and exit.
+```
+
+#### Impute Missing Values
+Remove proteins missing more than 30% of all samples. Users can keep tissue parts interested, and data will be converted to a expression matrix (samples as columns, proteins as rows), then missing values will be imputed with `KNNImputer`. 
+
+#### Remove Outliers
+When outliers are removed, multiple hierarchical clustering is performed using `hdbscan.HDBSCAN`, where outliers are labeled -1 in the PCA plot. When clustering is performed, the default cluster minimum value and the minimum number of neighbors of the core point are the minimum number of samples in all datasets.
+
+**Users can skip this step and do outliers removal manually.*
+
+#### Batch Effect Correction
+Using `pycombat` for batch effect correction, and batch is set to `datasets` (refers specifically to PXD ids) and the covariate should be `tissue_part`.
+
 ### Credits 
 
 - Julianus Pfeuffer
diff --git a/bin/datasets_merger.py b/bin/datasets_merger.py
index 1c82b9e..687d99f 100644
--- a/bin/datasets_merger.py
+++ b/bin/datasets_merger.py
@@ -2,6 +2,7 @@
 import re
 import click
 from ibaq.combiner import Combiner
+from ibaq import __version__
 
 CONTEXT_SETTINGS = dict(help_option_names=["-h", "--help"])
 
@@ -13,7 +14,7 @@ def cli():
     """
     pass
 
-
+@click.version_option(version=__version__, package_name="ibaqpy", message="%(package)s %(version)s")
 @click.command("datasets_merge", short_help="Merge ibaq results from compute_ibaq")
 @click.option("--data_folder", "-d", help="Data dolfer contains SDRFs and ibaq CSVs.", required=True)
 @click.option("--output", "-o", help="Output file after batch effect removal.", required=True)
@@ -21,11 +22,12 @@ def cli():
     "--covariate",
     "-c",
     default=None,
-    help="Indicators included in covariate consideration when datasets are merged.",
+    help="Indicator included in covariate consideration when datasets are merged.",
 )
 @click.option("--organism", help="Organism to keep in input data.", default="HUMAN")
 @click.option("--covariate_to_keep", "-k", help="Keep tissue parts from metadata, e.g. 'LV,RV,LA,RA'.", default=None)
 @click.option("--non_missing_percent_to_keep", "-m", help="non-missing values in each group.", default=0.3)
+@click.option("--skip_outliers_removal", help="Skip removing outliers in all datasets.", default=False, is_flag=True)
 @click.option("--n_components", help="Number of principal components to be computed.", default=None)
 @click.option("--min_cluster", help="The minimum size of clusters.", default=None)
 @click.option(
@@ -44,6 +46,7 @@ def datasets_merge(
     organism: str,
     covariate_to_keep: list,
     non_missing_percent_to_keep: float,
+    skip_outliers_removal: bool,
     n_components: int,
     min_cluster: int,
     min_sample_num: int,
@@ -54,7 +57,8 @@ def datasets_merge(
         covariate_to_keep = re.split(",\s*", covariate_to_keep)
     combiner = Combiner(data_folder = data_folder, covariate = covariate)
     combiner.imputer(covariate_to_keep)
-    combiner.outlier_removal(n_components, min_cluster, min_sample_num, n_iter)
+    if not skip_outliers_removal:
+        combiner.outlier_removal(n_components, min_cluster, min_sample_num, n_iter)
     combiner.batch_correction(n_components, covariate_to_keep)
     result = combiner.df_corrected
     result.to_csv(output, sep=",", index=True)
diff --git a/bin/peptide_normalization.py b/bin/peptide_normalization.py
index 6440724..f78dee4 100644
--- a/bin/peptide_normalization.py
+++ b/bin/peptide_normalization.py
@@ -248,15 +248,13 @@ def peptide_normalization(msstats: str, parquet: str, sdrf: str, min_aa: int, mi
                           pnormalization: bool, compress: bool, log2: bool,
                           violin: bool, verbose: bool, qc_report: str) -> None:
 
-    def quit():
+    if output is None:
         print_help_msg(peptide_normalization)
         exit(1)
 
-    if output is None:
-        quit()
-
     if parquet is None and (msstats is None or sdrf is None):
-            quit()
+        print_help_msg(peptide_normalization)
+        exit(1)
 
     compression_method = 'gzip' if compress else None
 
diff --git a/ibaq/__init__.py b/ibaq/__init__.py
index e69de29..e344246 100644
--- a/ibaq/__init__.py
+++ b/ibaq/__init__.py
@@ -0,0 +1 @@
+__version__ = "0.0.3"
\ No newline at end of file
diff --git a/ibaq/combiner.py b/ibaq/combiner.py
index 0cc6195..0f71a5f 100644
--- a/ibaq/combiner.py
+++ b/ibaq/combiner.py
@@ -14,7 +14,7 @@
 
 class Combiner:
 
-    def __init__(self, data_folder: os.PathLike, covariate: str, organism: str="HUMAN"):
+    def __init__(self, data_folder: os.PathLike, covariate: str=None, organism: str="HUMAN"):
         """Generate concated IbaqNorm and metadata.
         """
         logger.info("Combining SDRFs and ibaq results ...")
@@ -174,14 +174,13 @@ def batch_correction(self, n_components: int=None, tissue_parts_to_keep: int=Non
         self.metadata = self.metadata.reset_index(drop=True)
         self.metadata = self.metadata.set_index("sample_id").reindex(columns, axis=0).reset_index()
         if self.covariate:
-            # get the covariates from metadata as list
+            # get the covariates from metadata as a list
             covariates_index = self.metadata[self.covariate].tolist()
-            print(covariates_index)
         else:
-            covariates_index = None
+            covariates_index = []
 
         # apply batch correction
-        self.df_corrected = apply_batch_correction(self.df, self.batch_index, covs=["tissue_part"] * len(columns))
+        self.df_corrected = apply_batch_correction(self.df, self.batch_index, covs=covariates_index)
         print(self.df_corrected)
 
         # plot PCA of corrected data
diff --git a/ibaq/utils.py b/ibaq/utils.py
index cf92992..d8c04bd 100644
--- a/ibaq/utils.py
+++ b/ibaq/utils.py
@@ -352,7 +352,7 @@ def apply_batch_correction(
         raise ValueError("Every batch factor should have at least 2 samples.")
 
     # If not None, check if the number of covariates match the number of samples
-    if covs is not None:
+    if covs:
         if len(df.columns) != len(covs):
             raise ValueError(
                 "The number of samples should match the number of covariates."
diff --git a/testdata/PXD003947-featrue.parquet b/testdata/PXD003947-featrue.parquet
new file mode 100644
index 0000000..e73c5e5
Binary files /dev/null and b/testdata/PXD003947-featrue.parquet differ