BUG: return np.nan if feature not in spatial_weights (#131)

* check if uID is in spatial_weights * fix distribution.py * fix * tests * warning formatting
pysal · Dec 14, 2019 · ba46d4a · ba46d4a
1 parent 14cd876
commit ba46d4a
Show file tree

Hide file tree

Showing 9 changed files with 254 additions and 125 deletions.
diff --git a/momepy/dimension.py b/momepy/dimension.py
@@ -399,26 +399,29 @@ def __init__(self, gdf, values, spatial_weights, unique_id, rng=None, mode="mean
 
         results_list = []
         for index, row in tqdm(data.iterrows(), total=data.shape[0]):
-            neighbours = spatial_weights.neighbors[index].copy()
-            if neighbours:
-                neighbours.append(index)
-            else:
-                neighbours = [index]
+            if index in spatial_weights.neighbors.keys():
+                neighbours = spatial_weights.neighbors[index].copy()
+                if neighbours:
+                    neighbours.append(index)
+                else:
+                    neighbours = [index]
 
-            values_list = data.loc[neighbours][values]
+                values_list = data.loc[neighbours][values]
 
-            if rng:
-                from momepy import limit_range
+                if rng:
+                    from momepy import limit_range
 
-                values_list = limit_range(values_list, rng=rng)
-            if mode == "mean":
-                results_list.append(np.mean(values_list))
-            elif mode == "median":
-                results_list.append(np.median(values_list))
-            elif mode == "mode":
-                results_list.append(sp.stats.mode(values_list)[0][0])
+                    values_list = limit_range(values_list, rng=rng)
+                if mode == "mean":
+                    results_list.append(np.mean(values_list))
+                elif mode == "median":
+                    results_list.append(np.median(values_list))
+                elif mode == "mode":
+                    results_list.append(sp.stats.mode(values_list)[0][0])
+                else:
+                    raise ValueError("{} is not supported as mode.".format(mode))
             else:
-                raise ValueError("{} is not supported as mode.".format(mode))
+                results_list.append(np.nan)
 
         self.series = pd.Series(results_list, index=gdf.index)
 
@@ -764,16 +767,19 @@ def __init__(self, gdf, values, spatial_weights, unique_id, areas=None):
 
         results_list = []
         for index, row in tqdm(data.iterrows(), total=data.shape[0]):
-            neighbours = spatial_weights.neighbors[index].copy()
-            if neighbours:
-                neighbours.append(index)
-            else:
-                neighbours = [index]
+            if index in spatial_weights.neighbors.keys():
+                neighbours = spatial_weights.neighbors[index].copy()
+                if neighbours:
+                    neighbours.append(index)
+                else:
+                    neighbours = [index]
 
-            subset = data.loc[neighbours]
-            results_list.append(
-                (sum(subset[values] * subset[areas])) / (sum(subset[areas]))
-            )
+                subset = data.loc[neighbours]
+                results_list.append(
+                    (sum(subset[values] * subset[areas])) / (sum(subset[areas]))
+                )
+            else:
+                results_list.append(np.nan)
 
         self.series = pd.Series(results_list, index=gdf.index)
 
@@ -824,14 +830,17 @@ def __init__(self, gdf, spatial_weights, unique_id):
 
         results_list = []
         for index, row in tqdm(data.iterrows(), total=data.shape[0]):
-            neighbours = spatial_weights.neighbors[index].copy()
-            if neighbours:
-                neighbours.append(index)
-            else:
-                neighbours = [index]
+            if index in spatial_weights.neighbors.keys():
+                neighbours = spatial_weights.neighbors[index].copy()
+                if neighbours:
+                    neighbours.append(index)
+                else:
+                    neighbours = [index]
 
-            areas = data.loc[neighbours].geometry.area
-            results_list.append(sum(areas))
+                areas = data.loc[neighbours].geometry.area
+                results_list.append(sum(areas))
+            else:
+                results_list.append(np.nan)
 
         self.series = pd.Series(results_list, index=gdf.index)
 

diff --git a/momepy/distribution.py b/momepy/distribution.py
@@ -485,15 +485,17 @@ def __init__(self, gdf, spatial_weights, unique_id, orientations):
 
         # iterating over rows one by one
         for index, row in tqdm(data.iterrows(), total=data.shape[0]):
-
-            neighbours = spatial_weights.neighbors[index].copy()
-            if neighbours:
-                orientation = data.loc[neighbours][orientations]
-                deviations = abs(orientation - row[orientations])
-
-                results_list.append(statistics.mean(deviations))
+            if index in spatial_weights.neighbors.keys():
+                neighbours = spatial_weights.neighbors[index].copy()
+                if neighbours:
+                    orientation = data.loc[neighbours][orientations]
+                    deviations = abs(orientation - row[orientations])
+
+                    results_list.append(statistics.mean(deviations))
+                else:
+                    results_list.append(np.nan)
             else:
-                results_list.append(0)
+                results_list.append(np.nan)
 
         self.series = pd.Series(results_list, index=gdf.index)
 
@@ -502,6 +504,8 @@ class NeighborDistance:
     """
     Calculate the mean distance to adjacent buildings (based on spatial_weights)
 
+    If no neighbours are found, return np.nan.
+
     .. math::
         \\frac{1}{n}\\sum_{i=1}^n dist_i=\\frac{dist_1+dist_2+\\cdots+dist_n}{n}
 
@@ -510,7 +514,7 @@ class NeighborDistance:
     gdf : GeoDataFrame
         GeoDataFrame containing objects to analyse
     spatial_weights : libpysal.weights
-        spatial weights matrix
+        spatial weights matrix based on unique_id
     unique_id : str
         name of the column with unique id used as spatial_weights index.
 
@@ -549,14 +553,17 @@ def __init__(self, gdf, spatial_weights, unique_id):
 
         # iterating over rows one by one
         for index, row in tqdm(data.iterrows(), total=data.shape[0]):
-            neighbours = spatial_weights.neighbors[index]
-            building_neighbours = data.loc[neighbours]
-            if len(building_neighbours) > 0:
-                results_list.append(
-                    np.mean(building_neighbours.geometry.distance(row["geometry"]))
-                )
+            if index in spatial_weights.neighbors.keys():
+                neighbours = spatial_weights.neighbors[index]
+                building_neighbours = data.loc[neighbours]
+                if len(building_neighbours) > 0:
+                    results_list.append(
+                        np.mean(building_neighbours.geometry.distance(row["geometry"]))
+                    )
+                else:
+                    results_list.append(np.nan)
             else:
-                results_list.append(0)
+                results_list.append(np.nan)
 
         self.series = pd.Series(results_list, index=gdf.index)
 
@@ -667,13 +674,16 @@ def __init__(
             # id to match spatial weights
             uid = row[unique_id]
             # define neighbours based on weights matrix defining analysis area
-            neighbours = spatial_weights_higher.neighbors[uid].copy()
-            neighbours.append(uid)
-            if neighbours:
-                selection = adj_list[adj_list.focal.isin(neighbours)][
-                    adj_list.neighbor.isin(neighbours)
-                ]
-                results_list.append(np.nanmean(selection.distance))
+            if uid in spatial_weights_higher.neighbors.keys():
+                neighbours = spatial_weights_higher.neighbors[uid].copy()
+                neighbours.append(uid)
+                if neighbours:
+                    selection = adj_list[adj_list.focal.isin(neighbours)][
+                        adj_list.neighbor.isin(neighbours)
+                    ]
+                    results_list.append(np.nanmean(selection.distance))
+            else:
+                results_list.append(np.nan)
 
         self.series = pd.Series(results_list, index=gdf.index)
 
@@ -855,16 +865,19 @@ def __init__(self, gdf, spatial_weights_higher, unique_id, spatial_weights=None)
 
         print("Calculating adjacency...")
         for index, row in tqdm(gdf.iterrows(), total=gdf.shape[0]):
-            neighbours = spatial_weights_higher.neighbors[row[unique_id]].copy()
-            if neighbours:
-                neighbours.append(row[unique_id])
+            if row[unique_id] in spatial_weights_higher.neighbors.keys():
+                neighbours = spatial_weights_higher.neighbors[row[unique_id]].copy()
+                if neighbours:
+                    neighbours.append(row[unique_id])
 
-                patches_sub = [patches[x] for x in neighbours]
-                patches_nr = len(set(patches_sub))
+                    patches_sub = [patches[x] for x in neighbours]
+                    patches_nr = len(set(patches_sub))
 
-                results_list.append(patches_nr / len(neighbours))
+                    results_list.append(patches_nr / len(neighbours))
+                else:
+                    results_list.append(np.nan)
             else:
-                results_list.append(0)
+                results_list.append(np.nan)
 
         self.series = pd.Series(results_list, index=gdf.index)
 
@@ -928,11 +941,15 @@ def __init__(self, gdf, spatial_weights, unique_id, weighted=False):
 
         neighbours = []
         for index, row in tqdm(gdf.iterrows(), total=gdf.shape[0]):
-            if weighted is True:
-                neighbours.append(
-                    spatial_weights.cardinalities[row[unique_id]] / row.geometry.length
-                )
+            if row[unique_id] in spatial_weights.neighbors.keys():
+                if weighted is True:
+                    neighbours.append(
+                        spatial_weights.cardinalities[row[unique_id]]
+                        / row.geometry.length
+                    )
+                else:
+                    neighbours.append(spatial_weights.cardinalities[row[unique_id]])
             else:
-                neighbours.append(spatial_weights.cardinalities[row[unique_id]])
+                neighbours.append(np.nan)
 
         self.series = pd.Series(neighbours, index=gdf.index)
diff --git a/momepy/diversity.py b/momepy/diversity.py
@@ -86,14 +86,17 @@ def __init__(self, gdf, values, spatial_weights, unique_id, rng=(0, 100), **kwar
 
         results_list = []
         for index, row in tqdm(data.iterrows(), total=data.shape[0]):
-            neighbours = spatial_weights.neighbors[index].copy()
-            if neighbours:
-                neighbours.append(index)
+            if index in spatial_weights.neighbors.keys():
+                neighbours = spatial_weights.neighbors[index].copy()
+                if neighbours:
+                    neighbours.append(index)
+                else:
+                    neighbours = [index]
+
+                values_list = data.loc[neighbours][values]
+                results_list.append(sp.stats.iqr(values_list, rng=rng, **kwargs))
             else:
-                neighbours = [index]
-
-            values_list = data.loc[neighbours][values]
-            results_list.append(sp.stats.iqr(values_list, rng=rng, **kwargs))
+                results_list.append(np.nan)
 
         self.series = pd.Series(results_list, index=gdf.index)
 
@@ -168,19 +171,22 @@ def __init__(self, gdf, values, spatial_weights, unique_id, rng=None):
 
         results_list = []
         for index, row in tqdm(data.iterrows(), total=data.shape[0]):
-            neighbours = spatial_weights.neighbors[index].copy()
-            if neighbours:
-                neighbours.append(index)
-            else:
-                neighbours = [index]
+            if index in spatial_weights.neighbors.keys():
+                neighbours = spatial_weights.neighbors[index].copy()
+                if neighbours:
+                    neighbours.append(index)
+                else:
+                    neighbours = [index]
 
-            values_list = data.loc[neighbours][values]
+                values_list = data.loc[neighbours][values]
 
-            if rng:
-                from momepy import limit_range
+                if rng:
+                    from momepy import limit_range
 
-                values_list = limit_range(values_list, rng=rng)
-            results_list.append(Theil(values_list).T)
+                    values_list = limit_range(values_list, rng=rng)
+                results_list.append(Theil(values_list).T)
+            else:
+                results_list.append(np.nan)
 
         self.series = pd.Series(results_list, index=gdf.index)
 
@@ -292,16 +298,19 @@ def __init__(
         data = data.set_index(unique_id)
         results_list = []
         for index, row in tqdm(data.iterrows(), total=data.shape[0]):
-            neighbours = spatial_weights.neighbors[index].copy()
-            if neighbours:
-                neighbours.append(index)
+            if index in spatial_weights.neighbors.keys():
+                neighbours = spatial_weights.neighbors[index].copy()
+                if neighbours:
+                    neighbours.append(index)
+                else:
+                    neighbours = [index]
+                values_list = data.loc[neighbours][values]
+
+                sample_bins = classifiers.UserDefined(values_list, self.bins)
+                counts = dict(zip(self.bins, sample_bins.counts))
+                results_list.append(self._simpson_di(counts))
             else:
-                neighbours = [index]
-            values_list = data.loc[neighbours][values]
-
-            sample_bins = classifiers.UserDefined(values_list, self.bins)
-            counts = dict(zip(self.bins, sample_bins.counts))
-            results_list.append(self._simpson_di(counts))
+                results_list.append(np.nan)
 
         self.series = pd.Series(results_list, index=gdf.index)
 
@@ -399,18 +408,21 @@ def __init__(self, gdf, values, spatial_weights, unique_id, rng=None):
 
         results_list = []
         for index, row in tqdm(data.iterrows(), total=data.shape[0]):
-            neighbours = spatial_weights.neighbors[index].copy()
-            if neighbours:
-                neighbours.append(index)
+            if index in spatial_weights.neighbors.keys():
+                neighbours = spatial_weights.neighbors[index].copy()
+                if neighbours:
+                    neighbours.append(index)
 
-                values_list = data.loc[neighbours][values].values
+                    values_list = data.loc[neighbours][values].values
 
-                if rng:
-                    from momepy import limit_range
+                    if rng:
+                        from momepy import limit_range
 
-                    values_list = np.array(limit_range(values_list, rng=rng))
-                results_list.append(Gini(values_list).g)
+                        values_list = np.array(limit_range(values_list, rng=rng))
+                    results_list.append(Gini(values_list).g)
+                else:
+                    results_list.append(0)
             else:
-                results_list.append(0)
+                results_list.append(np.nan)
 
         self.series = pd.Series(results_list, index=gdf.index)