diff --git a/espfit/app/analysis.py b/espfit/app/analysis.py
index 5235935..296cf4d 100644
--- a/espfit/app/analysis.py
+++ b/espfit/app/analysis.py
@@ -58,7 +58,7 @@ def output_directory_path(self, value):
         os.makedirs(value, exist_ok=True)
 
 
-    def load_traj(self, reference_pdb='solvated.pdb', trajectory_netcdf='traj.nc', stride=1, input_directory_path=None):
+    def load_traj(self, reference_pdb='solvated.pdb', trajectory_netcdf='traj.nc', stride=1, exclude_n_frames=0, input_directory_path=None):
         """Load MD trajectory.
         
         Parameters
@@ -71,6 +71,11 @@ def load_traj(self, reference_pdb='solvated.pdb', trajectory_netcdf='traj.nc', s
 
         stride : int, optional
             Stride to load the trajectory. Default is 1.
+        
+        exclude_n_frames : float or int, optional
+            Exclude the first n frames from the trajectory. Default is 0.
+            If float, the first n percentange of frames will be excluded.
+            If int, the first n frames will be excluded.
 
         input_directory_path : str, optional
             Input directory path. Default is None.
@@ -79,6 +84,10 @@ def load_traj(self, reference_pdb='solvated.pdb', trajectory_netcdf='traj.nc', s
         Returns
         -------
         None
+
+        Notes
+        -----
+        * Should `stride` and `exclude_n_frames` be set as class attributes?
         """
         import mdtraj
 
@@ -114,6 +123,17 @@ def load_traj(self, reference_pdb='solvated.pdb', trajectory_netcdf='traj.nc', s
         else:
             self.traj = traj
 
+        # Exclude the first n frames from the trajectory
+        if isinstance(exclude_n_frames, int):
+            if 0 < exclude_n_frames <= traj.n_frames:
+                self.traj = self.traj[exclude_n_frames:]
+        elif isinstance(exclude_n_frames, float):
+            if 0 < exclude_n_frames < 1:
+                exclude_n_frames = int(exclude_n_frames * traj.n_frames)
+                self.traj = self.traj[exclude_n_frames:]
+        else:
+            raise ValueError(f"Invalid exclude_n_frames: {exclude_n_frames}. Expected int or float.")
+
         
 class RNASystem(BaseDataLoader):
     """RNA system class to compute experimental observables from MD simulations.
@@ -175,7 +195,7 @@ def radian_to_degree(self, a):
         return a
 
 
-    def compute_jcouplings(self, weights=None, couplings=None, residues=None):
+    def compute_jcouplings(self, weights=None, couplings=['H1H2', 'H2H3', 'H3H4'], residues=None):
         """Compute J-couplings from MD trajectory.
         
         Parameters
@@ -184,7 +204,7 @@ def compute_jcouplings(self, weights=None, couplings=None, residues=None):
             Weights to compute the J-couplings. Default is None.
 
         couplings : str, optional
-            Name of the couplings to compute. Default is None. 
+            Name of the couplings to compute. Default is ['H1H2', 'H2H3', 'H3H4']. 
             If a list of couplings to be chosen from [H1H2,H2H3,H3H4,1H5P,2H5P,C4Pb,1H5H4,2H5H4,H3P,C4Pe,H1C2/4,H1C6/8] 
             is provided, only the selected couplings will be computed. Otherwise, all couplings will be computed.
 
@@ -208,7 +228,7 @@ def compute_jcouplings(self, weights=None, couplings=None, residues=None):
         """
         import barnaba as bb
 
-        _logger.info("Compute J-couplings from MD trajectory")
+        _logger.debug("Compute J-couplings from MD trajectory")
 
         if couplings is not None:
             # Check if the provided coupling names are valid
@@ -228,23 +248,25 @@ def compute_jcouplings(self, weights=None, couplings=None, residues=None):
         for i, resname in enumerate(resname_list):
             _values = values[:,i,:]  # Coupling values of i-th residue
             values_by_names = dict()
-            for j, coupling_name in enumerate(couplings):
+            for j, coupling_name in enumerate(couplings):                
                 avg_raw = np.round(_values[:,j].mean(), 5)  # e.g. mean value of H1H2 coupling of i-th residue
                 std_raw = np.round(_values[:,j].std(), 5)   # e.g. standard deviation of H1H2 coupling of i-th residue
                 avg_raw = replace_nan_with_none(avg_raw)
                 std_raw = replace_nan_with_none(std_raw)
                 if weights is not None:
                     arr = _values[:,j] * weights
-                    #_logger.info(f'non-weighted: {_values[:,j]}')
-                    #_logger.info(f'weights:      {weights}')
-                    #_logger.info(f'weighted:     {arr}')
+                    _logger.info(f'weights:\n{weights}')
+                    _logger.info(f'non-weighted observalble:\n{_values[:,j]}')
+                    _logger.info(f'weighted observable:\n{arr}')
                     avg = np.round(arr.mean(), 5)
                     std = np.round(arr.std(), 5)
                     avg = replace_nan_with_none(avg)
                     std = replace_nan_with_none(std)
                 else:
+                    _logger.info(f'No weights are provided. Using predicted values without reweighting.')
                     avg = avg_raw
                     std = std_raw
+                _logger.info(f'J-scalar ({coupling_name}-{resname}): avg={avg}, std={std}')
                 values_by_names[coupling_name] = {'avg': avg, 'std': std, 'avg_raw': avg_raw, 'std_raw': std_raw}
             coupling_dict[resname] =  values_by_names
 
diff --git a/espfit/app/sampler.py b/espfit/app/sampler.py
index eb211e5..387b684 100644
--- a/espfit/app/sampler.py
+++ b/espfit/app/sampler.py
@@ -38,8 +38,8 @@ class BaseSimulation(object):
     export_xml(exportSystem=True, exportState=True, exportIntegrator=True, output_directory_path=None):
         Export serialized system XML file and solvated pdb file.
     """
-    def __init__(self, maxIterations=100, nsteps=250000, atomSubset='solute', 
-                 checkpoint_frequency=25000, logging_frequency=250000, netcdf_frequency=250000, 
+    def __init__(self, maxIterations=100, nsteps=2500000, atomSubset='solute', 
+                 checkpoint_frequency=250000, logging_frequency=250000, netcdf_frequency=250000, 
                  output_directory_path=None, input_directory_path=None):
         """Initialize base simulation object.
         
@@ -48,19 +48,19 @@ def __init__(self, maxIterations=100, nsteps=250000, atomSubset='solute',
         maxIterations : int, default=100
             Maximum number of iterations to perform minimization.
 
-        nsteps : int, default=250000 (10 ns using 4 fs timestep)
+        nsteps : int, default=2500000 (10 ns using 4 fs timestep)
             Number of steps to run the simulation.
 
         atomSubset : str, default='solute'
             Subset of atoms to save. Default is 'solute'. Other options 'all' and 'not water'.
             
-        checkpoint_frequency : int, default=25000 (1 ns)
+        checkpoint_frequency : int, default=250000 (1 ns)
             Frequency (in steps) at which to write checkpoint files.
 
-        logging_frequency : int, default=250000 (10 ns)
+        logging_frequency : int, default=250000 (1 ns)
             Frequency (in steps) at which to write logging files.
 
-        netcdf_frequency : int, default=250000 (10 ns)
+        netcdf_frequency : int, default=250000 (1 ns)
             Frequency (in steps) at which to write netcdf files.
 
         output_directory_path : str, optional
@@ -336,7 +336,7 @@ class SetupSampler(BaseSimulation):
     ['amber/protein.ff14SB.xml', 'amber/RNA.OL3.xml']       : pl-multi (TPO): NG, pl-single: OK, RNA: OK
     """
     def __init__(self, 
-                 small_molecule_forcefield='espfit/data/forcefield/espaloma-0.3.2.pt',
+                 small_molecule_forcefield='espaloma-0.3.2',
                  forcefield_files = ['amber/ff14SB.xml', 'amber/phosaa14SB.xml'],
                  water_model='tip3p', 
                  solvent_padding=9.0 * unit.angstroms, 
@@ -355,7 +355,8 @@ def __init__(self,
         Parameters
         ----------
         small_molecule_forcefield : str, optional
-            The force field to be used for small molecules. Default is 'openff-2.1.0'.
+            The force field to be used for small molecules. Default is 'espaloma-0.3.2'.
+            Alternative recommended choice is 'openff-2.1.0'.
         forcefield_files : list, optional
             List of force field files. Default is ['amber14-all.xml'].
         water_model : str, optional
@@ -378,11 +379,15 @@ def __init__(self,
             The integration timestep. Default is 4 * unit.femtoseconds.
         override_with_espaloma : bool, optional
             Whether to override the original parameters with espaloma. Default is True.
+            This will override all solute molecules with espaloma parameters.
         """
         super(SetupSampler, self).__init__(**kwargs)
+        if small_molecule_forcefield == 'espaloma-0.3.2':
+            from importlib.resources import files
+            small_molecule_forcefield = str(files('espfit').joinpath("data/forcefield/espaloma-0.3.2.pt"))
         self.small_molecule_forcefield = small_molecule_forcefield
         self.water_model = water_model
-        self.forcefield_files = self._update_forcefield_files(forcefield_files)
+        self.forcefield_files = forcefield_files
         self.solvent_padding = solvent_padding
         self.ionic_strength = ionic_strength
         self.hmass = hmass
@@ -394,12 +399,20 @@ def __init__(self,
         self.override_with_espaloma = override_with_espaloma
         self.target_class = None
         self.target_name = None
+        # Update forcefield file list to add water model files
+        self._update_forcefield_files()
+        # Get water class (3-site: tip3p, 4-site model: tip4pew)
+        self._get_water_class()
 
 
     @classmethod
-    def from_toml(cls, filename, *args, **override_sampler_kwargs):
+    def _from_toml(cls, filename, *args, **override_sampler_kwargs):
         """Create SetupSampler from a TOML configuration file.
         
+        Note that this is designed for creating new systems with temporary espaloma models generated during 
+        espaloma training. It supports multiple systems in the configuration file and returns a list of 
+        SetupSampler instances.
+
         Parameters
         ----------
         filename : str
@@ -429,30 +442,50 @@ def from_toml(cls, filename, *args, **override_sampler_kwargs):
             print(e)
             raise
         
-        config = config['sampler']['setup']  # list
+        config = config['sampler']['setup']  # list of multiple setups
         if config is None:
             raise ValueError("target is not specified in the configuration file")
         
+        # If the configuration file contains a single system, convert it to a list
+        if not isinstance(config, list):
+            config = [config]
+
         samplers = []
         _logger.debug(f'Found {len(config)} systems in the configuration file')
         for _config in config:
+            # Create SetupSampler instance with default settings
+            # Note that this automatically adds the default water model (tip3p) to `self.forcefield_files`.
             sampler = cls()
 
             # Target information
-            target_class = _config['target_class']
-            target_name = _config['target_name']
-
+            target_class = _config.get('target_class', None)
+            target_name = _config.get('target_name', None)
             sampler.target_class = target_class
             sampler.target_name = target_name
 
-            biopolymer_file = files('espfit').joinpath(f'data/target/{target_class}/{target_name}/target.pdb')
-            ligand_file = files('espfit').joinpath(f'data/target/{target_class}/{target_name}/ligand.sdf')
-            if not ligand_file.exists():
-                ligand_file = None
+            # Get biopolymer and ligand file if given
+            # Priority 1: Use input files if given
+            biopolymer_file, ligand_file = None, None
+            if _config.get('biopolymer_file'):
+                biopolymer_file = _config['biopolymer_file']
+                if not os.path.exists(biopolymer_file):
+                    raise FileNotFoundError(f"File not found: {biopolymer_file}")
+            if _config.get('ligand_file'):
+                ligand_file = _config['ligand_file']
+                if not os.path.exists(ligand_file):
+                    raise FileNotFoundError(f"File not found: {ligand_file}")
+            # Priority 2: Search espfit/data/target directory if input files are not given
+            if biopolymer_file is None and ligand_file is None:
+                biopolymer_file = files('espfit').joinpath(f'data/target/{target_class}/{target_name}/target.pdb')
+                ligand_file = files('espfit').joinpath(f'data/target/{target_class}/{target_name}/ligand.sdf')
+                if not biopolymer_file.exists():
+                    raise FileNotFoundError(f"File not found: {biopolymer_file}")
+                if not ligand_file.exists():
+                    ligand_file = None
 
             # System settings
             for key, value in _config.items():
-                if key not in ['target_class', 'target_name']:
+                if key not in ['target_class', 'target_name', 'biopolymer_file', 'ligand_file']:
                     if hasattr(sampler, key):
                         if isinstance(value, str) and "*" in value:
                             _value = float(value.split('*')[0].strip())
@@ -462,7 +495,7 @@ def from_toml(cls, filename, *args, **override_sampler_kwargs):
                         setattr(sampler, key, value)
                     else:
                         raise ValueError(f"Invalid keyword argument: {key}")
-            
+
             # Pass temporary espaloma model to the sampler if kwargs are given
             for key, value in override_sampler_kwargs.items():
                 if hasattr(sampler, key):
@@ -476,7 +509,18 @@ def from_toml(cls, filename, *args, **override_sampler_kwargs):
                     sampler.output_directory_path = os.path.join(sampler.output_directory_path, sampler.target_name, f'{args[0]}')
                 else:
                     raise ValueError(f"Invalid argument: {args}. Expected a single integer value for the epoch number.")
-            
+
+            # Empty and recreate `forcefield_files` to avoid appending multiple water model forcefields.
+            # Use list of forcefield files if given in the `override_sampler_kwargs`. If not given, use the default forcefield_files.
+            if 'forcefield_files' in override_sampler_kwargs.keys():
+                pass
+            else:
+                forcefield_files = ['amber/ff14SB.xml', 'amber/phosaa14SB.xml']
+            sampler.forcefield_files = forcefield_files
+            sampler._update_forcefield_files()
+            # Update water class (3-site: tip3p, 4-site model: tip4pew)
+            sampler._get_water_class()
+
             # Create system
             sampler.create_system(biopolymer_file=biopolymer_file, ligand_file=ligand_file)            
             samplers.append(sampler)
@@ -485,7 +529,7 @@ def from_toml(cls, filename, *args, **override_sampler_kwargs):
         return samplers
 
 
-    def _update_forcefield_files(self, forcefield_files):
+    def _update_forcefield_files(self):
         """Get forcefield files.
 
         Update `forcefield_files` depending on the type of water model.
@@ -499,28 +543,23 @@ def _update_forcefield_files(self, forcefield_files):
         # For some reason, the original forcefield_files keeps appending when SetupSampler is called.
         # TODO: Is this the right way to handle this issue?
         import copy
-        _forcefield_files = copy.deepcopy(forcefield_files)
+        _forcefield_files = copy.deepcopy(self.forcefield_files)
 
         # 3-site water models
         if self.water_model == 'tip3p':
             _forcefield_files.append(['amber/tip3p_standard.xml', 'amber/tip3p_HFE_multivalent.xml'])
         elif self.water_model == 'tip3pfb':
-            self.water_model = 'tip3p'
             _forcefield_files.append(['amber/tip3pfb_standard.xml', 'amber/tip3pfb_HFE_multivalent.xml'])
         elif self.water_model == 'spce':
-            self.water_model = 'tip3p'
             _forcefield_files.append(['amber/spce_standard.xml', 'amber/spce_HFE_multivalent.xml'])
         elif self.water_model == 'opc3':
             raise NotImplementedError('see https://github.com/choderalab/rna-espaloma/blob/main/experiment/nucleoside/script/create_system_espaloma.py#L366')
         # 4-site water models
         elif self.water_model == 'tip4pew':
-            self.water_model = 'tip4pew'
             _forcefield_files.append(['amber/tip4pew_standard.xml', 'amber/tip4pew_HFE_multivalent.xml'])
         elif self.water_model == 'tip4pfb':
-            self.water_model = 'tip4pew'
             _forcefield_files.append(['amber/tip4pfb_standard.xml', 'amber/tip4pfb_HFE_multivalent.xml'])
         elif self.water_model == 'opc':
-            self.water_model = 'tip4pew'
             _forcefield_files.append(['amber/opc_standard.xml'])
         else:
             raise NotImplementedError(f'Water model {self.water_model} is not supported.')
@@ -532,7 +571,24 @@ def _update_forcefield_files(self, forcefield_files):
             else:
                 new_forcefield_files.append(f)
 
-        return new_forcefield_files
+        #return new_forcefield_files
+        self.forcefield_files = new_forcefield_files
+
+
+    def _get_water_class(self):
+        """Get water class (3-site or 4-site) based on the water model.
+
+        Set self.water_class to tip3p or tip4pew based on the water model.
+
+        3-site water models: tip3p, tip3pfb, spce
+        4-site water models: tip4pew, tip4pfb, opc
+        """
+        if self.water_model in ['tip3p', 'tip3pfb', 'spce']:
+            self.water_class = 'tip3p'
+        elif self.water_model in ['tip4pew', 'tip4pfb', 'opc']:
+            self.water_class = 'tip4pew'
+        else:
+            raise NotImplementedError(f'Water model {self.water_model} is not supported.')
 
 
     def _load_biopolymer_file(self):
@@ -615,10 +671,10 @@ def create_system(self, biopolymer_file=None, ligand_file=None):
         from openmm import MonteCarloBarostat
         from openmm import LangevinMiddleIntegrator
 
+        # Load biopolymer and ligand files
         self._biopolymer_file = biopolymer_file
         self._ligand_file = ligand_file
 
-        # Load biopolymer and ligand files
         if self._biopolymer_file is None and self._ligand_file is None:
             raise ValueError("At least one biopolymer (.pdb) or ligand (.sdf) file must be provided")
         if self._biopolymer_file is not None:
@@ -660,7 +716,7 @@ def create_system(self, biopolymer_file=None, ligand_file=None):
         # Solvate system
         _logger.debug("Solvating system...")
         modeller = app.Modeller(self._complex_topology, self._complex_positions)
-        modeller.addSolvent(self._system_generator.forcefield, model=self.water_model, padding=self.solvent_padding, ionicStrength=self.ionic_strength)
+        modeller.addSolvent(self._system_generator.forcefield, model=self.water_class, padding=self.solvent_padding, ionicStrength=self.ionic_strength)
 
         # Create system
         self.modeller_solvated_topology = modeller.getTopology()
@@ -684,11 +740,13 @@ def create_system(self, biopolymer_file=None, ligand_file=None):
             # As a workaround, we will delete the original `self._system_generator` and create a new one to regenerate the system with espaloma.
             # Only water and ion forcefield files will be used to regenerate the system. Solute molecules will be parametrized with espaloma.
             # 
-            _logger.debug("Regenerate system with espaloma.")
+            _logger.info("Regenerate system with espaloma")
 
             # Re-create system generator
             del self._system_generator
-            self.forcefield_files = self._update_forcefield_files(forcefield_files=[])  # Get water and ion forcefield files
+            #self.forcefield_files = self._update_forcefield_files(forcefield_files=[])  # Get water and ion forcefield files
+            self.forcefield_files = []    # Initialize forcefield_files to empty list and get water and ion forcefield files
+            self._update_forcefield_files()
             self._system_generator = SystemGenerator(
                 forcefields=self.forcefield_files, forcefield_kwargs=forcefield_kwargs, periodic_forcefield_kwargs = periodic_forcefield_kwargs, barostat=barostat, 
                 small_molecule_forcefield=self.small_molecule_forcefield, cache=None, template_generator_kwargs=template_generator_kwargs)
@@ -699,6 +757,13 @@ def create_system(self, biopolymer_file=None, ligand_file=None):
             self.new_solvated_system = self.modeller_solvated_system
             self.new_solvated_topology = self.modeller_solvated_topology
 
+        # Set force groups
+        # https://github.com/openmm/openmm-cookbook/blob/main/notebooks/cookbook/Analyzing%20Energy%20Contributions.ipynb
+        # OpenMM does not have a way to directly query the energy of a Force object. 
+        # Instead, it lets you query the energy of a force group. We therefore need each Force object to be in a different group.
+        for i, f in enumerate(self.new_solvated_system.getForces()):
+            f.setForceGroup(i)
+
         # Save solvated pdb file
         outfile = os.path.join(self.output_directory_path, f"solvated.pdb")
         with open(f"{outfile}", "w") as wf:
@@ -729,8 +794,6 @@ def _regenerate_espaloma_system(self):
         import mdtraj
         from openff.toolkit import Molecule
 
-        _logger.debug("Regenerate system with espaloma")
-
         # Check biopolymer chains
         mdtop = mdtraj.Topology.from_openmm(self.modeller_solvated_topology)
         chain_indices = [ chain.index for chain in self.modeller_solvated_topology.chains() ]
diff --git a/espfit/app/train.py b/espfit/app/train.py
index 0dd37a6..1d90360 100644
--- a/espfit/app/train.py
+++ b/espfit/app/train.py
@@ -384,6 +384,8 @@ def train(self):
         with torch.autograd.set_detect_anomaly(True):
             for i in range(self.restart_epoch, self.epochs):
                 epoch = i + 1    # Start from epoch 1 (not zero-indexing)
+                _logger.info(f'Epoch {epoch}')
+                
                 for g in ds_tr_loader:
                     optimizer.zero_grad()
                     # TODO: Better way to handle this?
@@ -401,10 +403,10 @@ def train(self):
                 if epoch % self.checkpoint_frequency == 0:
                     # Note: returned loss is a joint loss of different units.
                     loss = HARTREE_TO_KCALPERMOL * loss.pow(0.5).item()
-                    _logger.info(f'Epoch {epoch}: loss={loss:.3f}')
+                    _logger.info(f'Total loss: {loss:.3f}')
                     self._save_checkpoint(epoch)
     
-    
+
     def train_sampler(self, sampler_patience=800, neff_threshold=0.2, sampler_weight=1.0, debug=False):
         """
         Train the Espaloma network model with sampler.
@@ -426,6 +428,17 @@ def train_sampler(self, sampler_patience=800, neff_threshold=0.2, sampler_weight
         Returns
         -------
         None
+
+        Notes
+        -----
+        Back-propagation is performed against accumulated gradients to avoid CUDA out of memory error.
+        Following error is raised if back-propagation is performed without gradient accumulation:
+
+        > torch.cuda.OutOfMemoryError: CUDA out of memory. 
+        > Tried to allocate 80.00 MiB (GPU 0; 10.75 GiB total capacity; 
+        > 9.76 GiB already allocated; 7.62 MiB free; 10.40 GiB reserved in total by PyTorch) 
+        > If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  
+        > See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF
         """
         from espfit.utils.sampler.reweight import SetupSamplerReweight
 
@@ -451,104 +464,72 @@ def train_sampler(self, sampler_patience=800, neff_threshold=0.2, sampler_weight
         
         # Train
         ds_tr_loader = self.dataset_train.view(collate_fn='graph', batch_size=self.batch_size, shuffle=True)
+        accumulation_steps = len(ds_tr_loader)
         optimizer = torch.optim.Adam(self.net.parameters(), lr=self.learning_rate)
         with torch.autograd.set_detect_anomaly(True):
             for i in range(self.restart_epoch, self.epochs):
                 epoch = i + 1    # Start from 1 (not zero-indexing)
-            
-                """
-                # torch.cuda.OutOfMemoryError: CUDA out of memory. 
-                # Tried to allocate 80.00 MiB (GPU 0; 10.75 GiB total capacity; 
-                # 9.76 GiB already allocated; 7.62 MiB free; 10.40 GiB reserved in total by PyTorch) 
-                # If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  
-                # See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF
-
-                loss = torch.tensor(0.0)
-                if torch.cuda.is_available():
-                    loss = loss.cuda("cuda:0")
-
-                for g in ds_tr_loader:
-                    optimizer.zero_grad()
-                    if torch.cuda.is_available():
-                        g = g.to("cuda:0")
-                    g.nodes["n1"].data["xyz"].requires_grad = True
-
-                    _loss, loss_dict = self.net(g)
-                    loss += _loss
+                _logger.info(f'# Epoch {epoch}')
 
+                # Compute sampler loss if epoch > sampler_patience
                 if epoch > self.sampler_patience:
-                    # Save checkpoint as local model (net.pt)
-                    # `neff_min` is -1 if SamplerReweight.samplers is None
-                    samplers = self._setup_local_samplers(epoch, net_copy, debug)
-                    neff_min = SamplerReweight.get_effective_sample_size(temporary_samplers=samplers)
-
-                    # If effective sample size is below threshold, update SamplerReweight.samplers and re-run simulaton
+                    # Save checkpoint as local model (net.pt) and create list of sampler instances
+                    temporary_samplers = self._setup_temporary_samplers(epoch, net_copy, debug)
+                
+                    # Check the effective sample size (neff), computed for each sampler stored in `SamplerReweight.samplers`.
+                    # Coordinates from `SamplerReweight.samplers` are used to compute the effective sample size and Boltzmann weights 
+                    # using force field parameters (Espaloma models) stored in `SamplerReweight.samplers` and `temporary_samplers`, 
+                    # which are the updated force field parameters from the current epoch.
+                    # Re-run the simulations based on the minimum neff value. `neff_min` is set to -1 if `SamplerReweight.samplers` is None, 
+                    # which is the default state when `SetupSamplerReweight()` is initialized.
+                    # Note that `SamplerReweight.get_effective_sample_size` updates the `SamplerReweight.weights_neff_dict` attribute.
+                    neff_min = SamplerReweight.get_effective_sample_size(temporary_samplers=temporary_samplers)
+                    
+                    # Re-run simulation if `neff_min` is below threshold
                     if neff_min < self.neff_threshold:
-                        _logger.info(f'Minimum effective sample size ({neff_min:.3f}) below threshold ({self.neff_threshold})')
-                        SamplerReweight.samplers = samplers
+                        _logger.info(f'Minimum effective sample size ({neff_min:.3f}) below threshold ({self.neff_threshold:.3f})')
+                        # Update SamplerReweight.sampler with the temporary_samplers
+                        SamplerReweight.samplers = temporary_samplers
                         SamplerReweight.run()
-                    del samplers
-
+                        # Initiliaze `SamplerReweight.weights_neff_dict` to empty dictionary
+                        _logger.info(f'Reset sampler weights')
+                        SamplerReweight.weights_neff_dict = dict()
+                    del temporary_samplers
+                    
                     # Compute sampler loss
-                    loss_list = SamplerReweight.compute_loss()   # list of torch.tensor
-                    for sampler_index, sampler_loss in enumerate(loss_list):
-                        sampler = SamplerReweight.samplers[sampler_index]
-                        loss += sampler_loss * sampler_weight
-                        loss_dict[f'{sampler.target_name}'] = sampler_loss.item()
-                    loss_dict['neff'] = neff_min
-
-                loss_dict['loss'] = loss.item()
-                self.report_loss(epoch, loss_dict)
-
-                # Back propagate
-                loss.backward()
-                optimizer.step()
-                """
-
-                # Gradient accumulation
-                accumulation_steps = len(ds_tr_loader)
+                    _loss = torch.tensor(0.0)
+                    _loss_dict = dict()
+                    if SamplerReweight.samplers:
+                        sampler_loss_list = SamplerReweight.compute_loss()   # list of torch.tensor
+                        for sampler_index, sampler_loss in enumerate(sampler_loss_list):
+                            sampler = SamplerReweight.samplers[sampler_index]
+                            _loss += (sampler_loss/accumulation_steps) * sampler_weight
+                            _loss_dict[f'{sampler.target_name}'] = sampler_loss.item()
+                        _loss_dict['neff'] = neff_min
+
+                # Accumulate gradiants
                 for g in ds_tr_loader:
                     optimizer.zero_grad()
                     if torch.cuda.is_available():
                         g = g.to("cuda:0")
                     g.nodes["n1"].data["xyz"].requires_grad = True
-
+                    # Compute loss
                     loss, loss_dict = self.net(g)
                     loss = loss/accumulation_steps
-                    loss.backward()
-
-                if epoch > self.sampler_patience:
-                    # Save checkpoint as local model (net.pt)
-                    # `neff_min` is -1 if SamplerReweight.samplers is None
-                    samplers = self._setup_local_samplers(epoch, net_copy, debug)
-                    neff_min = SamplerReweight.get_effective_sample_size(temporary_samplers=samplers)
-
-                    # If effective sample size is below threshold, update SamplerReweight.samplers and re-run simulaton
-                    if neff_min < self.neff_threshold:
-                        _logger.info(f'Minimum effective sample size ({neff_min:.3f}) below threshold ({self.neff_threshold})')
-                        SamplerReweight.samplers = samplers
-                        SamplerReweight.run()
-                    del samplers
-
-                    # Compute sampler loss
-                    loss_list = SamplerReweight.compute_loss()   # list of torch.tensor
-                    for sampler_index, sampler_loss in enumerate(loss_list):
-                        sampler = SamplerReweight.samplers[sampler_index]
-                        loss += sampler_loss * sampler_weight
-                        loss_dict[f'{sampler.target_name}'] = sampler_loss.item()
-                    loss.backward()
-                    loss_dict['neff'] = neff_min
-
+                    # Add sampler loss
+                    if epoch > self.sampler_patience:
+                        loss += _loss
+                        loss.backward(retain_graph=True)
                 loss_dict['loss'] = loss.item()
+                if _loss_dict:
+                    loss_dict.update(_loss_dict)
                 self.report_loss(epoch, loss_dict)
-
-                # Update
                 optimizer.step()
-
+                
                 if epoch % self.checkpoint_frequency == 0:
                     # Note: returned loss is a joint loss of different units.
                     #_loss = HARTREE_TO_KCALPERMOL * loss.pow(0.5).item()
-                    _logger.info(f'Epoch {epoch}: loss={loss.item():.3f}')
+                    _logger.info(f'Total loss: {loss.item():.3f}')
                     self._save_checkpoint(epoch)
 
 
@@ -628,8 +609,8 @@ def _save_local_model(self, epoch, net_copy):
         self.save_model(net=net_copy, checkpoint_file=local_model, output_model=f"net.pt", output_directory_path=self.output_directory_path)
 
 
-    def _setup_local_samplers(self, epoch, net_copy, debug):
-        """Setup local samplers.
+    def _setup_temporary_samplers(self, epoch, net_copy, debug):
+        """Setup samplers using temporary espaloma models generated during training.
         
         Parameters
         ----------
@@ -666,7 +647,7 @@ def _setup_local_samplers(self, epoch, net_copy, debug):
             }
         
         # Create sampler system from configuration file. Returns list of systems.                        
-        samplers = SetupSampler.from_toml(self.configfile, *args, **override_sampler_kwargs)
+        samplers = SetupSampler._from_toml(self.configfile, *args, **override_sampler_kwargs)
 
         return samplers
     
\ No newline at end of file
diff --git a/espfit/tests/test_app_train_sampler.py b/espfit/tests/test_app_train_sampler.py
index 1f71247..2de9625 100644
--- a/espfit/tests/test_app_train_sampler.py
+++ b/espfit/tests/test_app_train_sampler.py
@@ -68,7 +68,9 @@ def test_train_sampler(test_load_dataset, test_create_espaloma_from_toml):
     # Train
     sampler_patience = 10
     # Force sampler to run after reaching sampler patience by setting neff_threshold to 1.0
-    model.train_sampler(sampler_patience=sampler_patience, neff_threshold=1.0, sampler_weight=1)
+    # Set debug=True to use espaloma-0.3.2.pt instead of on-the-fly intermediate espaloma model.
+    # Epochs are too short for stable espaloma model.
+    model.train_sampler(sampler_patience=sampler_patience, neff_threshold=1.0, sampler_weight=1, debug=True)
 
     # Check outputs
     n_ckpt = len(glob.glob(os.path.join(model.output_directory_path, 'ckpt*pt')))
diff --git a/espfit/utils/sampler/reweight.py b/espfit/utils/sampler/reweight.py
index cfb14bc..ba47a73 100644
--- a/espfit/utils/sampler/reweight.py
+++ b/espfit/utils/sampler/reweight.py
@@ -31,7 +31,9 @@ class SetupSamplerReweight(object):
     """
     def __init__(self):
         self.samplers = None
-        self.weights = dict()   # {'target_name': {'weights': w_i}, {'neff': neff}}
+        self.weights_neff_dict = dict()   # {'target_name': {'weights': w_i}, {'neff': neff}}
+        self.force_group_names = ['HarmonicBondForce', 'HarmonicAngleForce', 'PeriodicTorsionForce', 'NonbondedForce']
+        self.exclude_n_frames = 0.1
 
 
     def run(self):
@@ -42,7 +44,7 @@ def run(self):
         None
         """
         for sampler in self.samplers:
-            _logger.info(f'Running simulation for {sampler.target_name} for {sampler.nsteps} steps...')
+            _logger.info(f'Re-run simulation for {sampler.target_name}')
             sampler.minimize()
             sampler.run()
 
@@ -60,57 +62,77 @@ def get_effective_sample_size(self, temporary_samplers):
         float
             The minimum effective sample size among all samplers.
         """
-        import mdtraj
         import numpy as np
         from openmm.unit import kilocalories_per_mole as kcalpermol
         from espfit.utils.units import KB_T_KCALPERMOL
+        from espfit.app.analysis import BaseDataLoader
         
+        _logger.info(f'Compute effective sample size and sampling weights')
+
         if self.samplers is None:
+            _logger.info('No samplers found. Return effective sample size -1.')
             return -1
 
         for sampler, temporary_sampler in zip(self.samplers, temporary_samplers):
-            _logger.info(f'Compute effective sample size and sampling weights for {sampler.target_name}')
+            _logger.info(f'Analyzing {sampler.target_name}...')
 
             # Get temperature
             temp0 = sampler.temperature._value
             temp1 = temporary_sampler.temperature._value
             assert temp0 == temp1, f'Temperature should be equivalent but got sampler {temp0} K and temporary sampler {temp1} K'
             beta = 1 / (KB_T_KCALPERMOL * temp0)
-            _logger.debug(f'beta temperature in kcal/mol: {beta}')
+            #_logger.debug(f'beta temperature in kcal/mol: {beta}')
 
             # Get position from trajectory
-            traj = mdtraj.load(sampler.output_directory_path + '/traj.nc', top=sampler.output_directory_path + '/solvated.pdb')
-            _logger.info(f'Found {traj.n_frames} frames in trajectory')
+            baseloader = BaseDataLoader(atomSubset=sampler.atomSubset)
+            baseloader.load_traj(input_directory_path=sampler.output_directory_path, exclude_n_frames=self.exclude_n_frames)
+            _logger.info(f'Found {baseloader.traj.n_frames} frames from {sampler.output_directory_path}')
             
             # Compute weights and effective sample size
-            log_w = []
-            for i in range(traj.n_frames):
+            w_arr = []
+            for i in range(baseloader.traj.n_frames):
                 # U(x0, theta0)
-                sampler.simulation.context.setPositions(traj.openmm_positions(i))
-                potential_energy = sampler.simulation.context.getState(getEnergy=True).getPotentialEnergy()
+                sampler.simulation.context.setPositions(baseloader.traj.openmm_positions(i))
+                for gid, force in enumerate(sampler.simulation.system.getForces()):
+                    if force.getName() in self.force_group_names:
+                        try:
+                            potential_energy += sampler.simulation.context.getState(getEnergy=True, groups={gid}).getPotentialEnergy()
+                        except:
+                            potential_energy = sampler.simulation.context.getState(getEnergy=True, groups={gid}).getPotentialEnergy()
                 # U(x0, theta1)
-                temporary_sampler.simulation.context.setPositions(traj.openmm_positions(i))
-                reduced_potential_energy = temporary_sampler.simulation.context.getState(getEnergy=True).getPotentialEnergy()
+                temporary_sampler.simulation.context.setPositions(baseloader.traj.openmm_positions(i))
+                for gid, force in enumerate(temporary_sampler.simulation.system.getForces()):
+                    if force.getName() in self.force_group_names:
+                        try:
+                            reduced_potential_energy += temporary_sampler.simulation.context.getState(getEnergy=True, groups={gid}).getPotentialEnergy()
+                        except:
+                            reduced_potential_energy = temporary_sampler.simulation.context.getState(getEnergy=True, groups={gid}).getPotentialEnergy()
                 # deltaU = U(x0, theta1) - U(x0, theta0)
                 delta = (reduced_potential_energy - potential_energy).value_in_unit(kcalpermol)
-                # log_w = ln(exp(-beta * delta))
+                # w = ln(exp(-beta * delta))
                 w = -1 * beta * delta
-                log_w.append(w)
+                w_arr.append(w)
 
-                #_logger.debug(f'U(x0, theta0): {potential_energy.value_in_unit(kcalpermol):10.3f} kcal/mol')
-                #_logger.debug(f'U(x0, theta1): {reduced_potential_energy.value_in_unit(kcalpermol):10.3f} kcal/mol')
-                #_logger.debug(f'deltaU:        {delta:10.3f} kcal/mol')
-                #_logger.debug(f'log_w:         {w:10.3f}')
+                _logger.info(f'U(x0, theta0): {potential_energy.value_in_unit(kcalpermol):10.3f} kcal/mol')
+                _logger.info(f'U(x0, theta1): {reduced_potential_energy.value_in_unit(kcalpermol):10.3f} kcal/mol')
+                _logger.info(f'deltaU:        {delta:10.3f} kcal/mol')
+                _logger.info(f'w:             {w:10.3f}')
 
             # Compute weights and effective sample size (ratio: 0 to 1)
-            w_i = np.exp(log_w) / np.sum(np.exp(log_w))
+            # Prevent RuntimeWarning: overflow encountered in exp
+            w_arr = np.float128(w_arr)
+            w_i = np.exp(w_arr) / np.sum(np.exp(w_arr))
             neff = np.sum(w_i) ** 2 / np.sum(w_i ** 2) / len(w_i)
-            #_logger.debug(f'w_i_sum:       {np.sum(w_i):10.3f}')
-            #_logger.debug(f'neff:          {neff:10.3f}')
+            _logger.debug(f'w_i_sum:       {np.sum(w_i):10.3f}')
+            _logger.debug(f'neff:          {neff:10.3f}')
+
+            # Check if sum of weights is 1
+            sum_w = abs(np.sum(w_i))
+            assert abs(1 - sum_w) <= 0.001, f"Weight sum {sum_w} is greater than tolerance 0.001"
 
-            self.weights[f'{sampler.target_name}'] = {'neff': neff, 'weights': w_i}
-            #_logger.info(f'{self.weights}')
-            neffs = [self.weights[key]['neff'] for key in self.weights.keys()]
+            self.weights_neff_dict[f'{sampler.target_name}'] = {'neff': neff, 'weights': w_i}
+            _logger.debug(f'{self.weights_neff_dict}')
+            neffs = [self.weights_neff_dict[key]['neff'] for key in self.weights_neff_dict.keys()]
 
         return min(neffs)
     
@@ -125,7 +147,7 @@ def compute_loss(self):
         """
         loss_list = []
         for sampler in self.samplers:
-            _logger.info(f'Compute loss for {sampler.target_name}')
+            _logger.debug(f'Compute loss for {sampler.target_name}')
             loss = self._compute_loss_per_system(sampler)  # torch.tensor
             loss_list.append(loss)
 
@@ -228,11 +250,11 @@ def _compute_weighted_observable(self, atomSubset, target_name, output_directory
 
         # Load trajectory
         target = RNASystem(atomSubset=atomSubset)
-        target.load_traj(input_directory_path=output_directory_path)
+        target.load_traj(input_directory_path=output_directory_path, exclude_n_frames=self.exclude_n_frames)
         
         # Compute observable
-        if self.weights.keys():
-            pred = target.compute_jcouplings(weights=self.weights[target_name]['weights'])
+        if self.weights_neff_dict.keys():
+            pred = target.compute_jcouplings(weights=self.weights_neff_dict[target_name]['weights'])
         else:
             pred = target.compute_jcouplings(weights=None)
         _logger.debug(f'Computed observable: {pred}')
diff --git a/pyproject.toml b/pyproject.toml
index bffcfc8..4247bea 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -63,7 +63,10 @@ where = ["."]
 # Ref https://setuptools.pypa.io/en/latest/userguide/datafiles.html#package-data
 [tool.setuptools.package-data]
 espfit = [
-    "py.typed"
+    "py.typed",
+    "data/target/nucleoside/*/target.pdb",
+    "data/target/nucleoside/*/experiment.yml",
+    "data/forcefield/espaloma-0.3.2.pt"
 ]
 
 [tool.versioningit]