soft-matter · nkeim · Nov 25, 2015 · Nov 25, 2015 · Nov 25, 2015 · Nov 25, 2015
diff --git a/trackpy/linking.py b/trackpy/linking.py
@@ -14,7 +14,7 @@
 import pandas as pd
 
 from .try_numba import try_numba_autojit, NUMBA_AVAILABLE
-from .utils import is_pandas_since_016, pandas_sort
+from .utils import pandas_sort
 
 logger = logging.getLogger(__name__)
 
@@ -472,7 +472,7 @@ def link(levels, search_range, hash_generator, memory=0, track_cls=None,
 def link_df(features, search_range, memory=0,
             neighbor_strategy='KDTree', link_strategy='auto',
             predictor=None, adaptive_stop=None, adaptive_step=0.95,
-            copy_features=False, diagnostics=False, pos_columns=None,
+            diagnostics=False, pos_columns=None,
             t_column=None, hash_size=None, box_size=None,
             verify_integrity=True, retain_index=False):
     """Link features into trajectories, assigning a label to each trajectory.
@@ -507,8 +507,6 @@ def link_df(features, search_range, memory=0,
         becomes <= adaptive_stop, give up and raise a SubnetOversizeException.
     adaptive_step : float, optional
         Reduce search_range by multiplying it by this factor.
-    copy_features : boolean
-        Leave the original features DataFrame intact (slower, uses more memory)
     diagnostics : boolean
         Collect details about how each particle was linked, and return as
         columns in the output DataFrame. Implies copy=True.
@@ -546,63 +544,29 @@ def link_df(features, search_range, memory=0,
         MARGIN = 1  # avoid OutOfHashException
         hash_size = features[pos_columns].max() + MARGIN
 
-    # Check if DataFrame is writeable.
-    # I don't know how to do this for pandas < 0.16.
-    if (is_pandas_since_016 and features.is_copy is not None and
-            not copy_features):
-        warn('The features DataFrame is a view, so it is not writeable. '
-             'The results will be output to a copy. Use copy_features='
-             'True to prevent this warning message.')
-        copy_features = True
-
-    # Group the DataFrame by time steps and make a 'level' out of each
-    # one, using the index to keep track of Points.
-    if retain_index:
-        orig_index = features.index.copy()  # Save it; restore it at the end.
-    features.reset_index(inplace=True, drop=True)
-    levels = (_build_level(frame, pos_columns, t_column,
-                           diagnostics=diagnostics) for frame_no, frame
-              in features.groupby(t_column))
-    labeled_levels = link_iter(
-        levels, search_range, memory=memory, predictor=predictor,
-        adaptive_stop=adaptive_stop, adaptive_step=adaptive_step,
+    features_iter = (frame for fnum, frame in features.groupby(t_column))
+    linked_iter = link_df_iter(
+        features_iter, search_range, memory=memory,
         neighbor_strategy=neighbor_strategy, link_strategy=link_strategy,
-        hash_size=hash_size, box_size=box_size)
-
-    if diagnostics:
-        features = strip_diagnostics(features)  # Makes a copy
-    elif copy_features:
-        features = features.copy()
-
-    # Do the tracking, and update the DataFrame after each iteration.
-    features['particle'] = np.nan  # placeholder
-    for level in labeled_levels:
-        index = [x.id for x in level]
-        labels = pd.Series([x.track.id for x in level], index)
-        frame_no = next(iter(level)).t  # uses an arbitary element from the set
-        if verify_integrity:
-            # This checks that the labeling is sane and tries
-            # to raise informatively if some unknown bug in linking
-            # produces a malformed labeling.
-            _verify_integrity(frame_no, labels)
-            # an additional check particular to link_df
-            if len(labels) > len(features[features[t_column] == frame_no]):
-                raise UnknownLinkingError("There are more labels than "
-                                          "particles to be labeled in Frame "
-                                          "%d".format(frame_no))
-        features['particle'].update(labels)
-        if diagnostics:
-            _add_diagnostic_columns(features, level)
-
-        logger.info("Frame %d: %d trajectories present", frame_no, len(labels))
-
-    if retain_index:
-        features.index = orig_index
-        # And don't bother to sort -- user must be doing something special.
+        predictor=predictor, adaptive_stop=adaptive_stop,
+        adaptive_step=adaptive_step, diagnostics=diagnostics,
+        pos_columns=pos_columns, t_column=t_column, hash_size=hash_size,
+        box_size=box_size, verify_integrity=verify_integrity,
+        retain_index=retain_index)
+    linked = pd.concat(linked_iter, verify_integrity=verify_integrity,
+                       ignore_index=(not retain_index))
+
+    if not retain_index:
+        linked = pandas_sort(linked, ['particle', t_column]
+                           ).reset_index(drop=True)
     else:
-        pandas_sort(features, ['particle', t_column], inplace=True)
-        features.reset_index(drop=True, inplace=True)
-    return features
+        linked = linked.reindex(features.index)
+
+    # Order columns as in original DataFrame, then 'particle', then
+    # any other new columns (i.e. diagnostics)
+    return linked.reindex(columns=list(features.columns) + ['particle'] +
+                          list(set(linked.columns) - set(features.columns) -
+                               {'particle',}))
 
 
 def link_df_iter(features, search_range, memory=0,

diff --git a/trackpy/tests/test_link.py b/trackpy/tests/test_link.py
@@ -18,7 +18,7 @@
 import trackpy as tp
 from trackpy.try_numba import NUMBA_AVAILABLE
 from trackpy.linking import PointND, link, Hash_table
-from trackpy.utils import is_pandas_since_016, pandas_sort
+from trackpy.utils import pandas_sort
 
 # Catch attempts to set values on an inadvertent copy of a Pandas object.
 tp.utils.make_pandas_strict()
@@ -280,44 +280,6 @@ def test_easy_tracking(self):
             assert np.sum(dx) == level_count - 1
             assert np.sum(dy) == 0
 
-    def test_copy(self):
-        """Check inplace/copy behavior of link_df, link_df_iter"""
-        # One 1D stepper
-        N = 5
-        f = DataFrame({'x': np.arange(N), 'y': np.ones(N), 'frame': np.arange(N)})
-        f_inplace = f.copy()
-        expected = f.copy()
-        expected['particle'] = np.zeros(N)
-
-        # Should add particle column in-place
-        # UNLESS diagnostics are enabled (or input dataframe is not writeable)
-        actual = self.link_df(f_inplace, 5)
-        assert_frame_equal(actual, expected)
-        if self.do_diagnostics:
-            assert 'particle' not in f_inplace.columns
-        else:
-            assert_frame_equal(actual, f_inplace)
-
-        # When DataFrame is actually a view, link_df should produce a warning
-        # and then copy the DataFrame. This only happens for pandas >= 0.16.
-        if is_pandas_since_016:
-            with warnings.catch_warnings(record=True) as w:
-                warnings.simplefilter('ignore')
-                warnings.simplefilter('always', UserWarning)
-                actual = self.link_df(f[f['frame'] > 0], 5)
-                assert len(w) == 1
-            assert 'particle' not in f.columns
-
-        # Should copy
-        actual = self.link_df(f, 5, copy_features=True)
-        assert_frame_equal(actual, expected)
-        assert 'particle' not in f.columns
-
-        # Should copy
-        actual_iter = self.link_df_iter(f, 5, hash_size=(10, 2))
-        assert_frame_equal(actual_iter, expected)
-        assert 'particle' not in f.columns
-
     @nose.tools.raises(tp.SubnetOversizeException)
     def test_oversize_fail(self):
         self.link_df(contracting_grid(), 1)

diff --git a/trackpy/utils.py b/trackpy/utils.py
@@ -17,14 +17,7 @@
 
 import trackpy
 
-# Set is_pandas_since_016 for use elsewhere.
-# Pandas >= 0.16.0 lets us check if a DataFrame is a view.
-try:
-    is_pandas_since_016 = (LooseVersion(pd.__version__) >=
-                           LooseVersion('0.16.0'))
-except ValueError:  # Probably a development version
-    is_pandas_since_016 = True
-
+# Pandas 0.17 makes some API changes.
 try:
     is_pandas_since_017 = (LooseVersion(pd.__version__) >=
                            LooseVersion('0.17.0'))