Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

migrate ArcGIS Online datasets to ingest #1330

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions dcpy/connectors/esri/arcgis_feature_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
TimeRemainingColumn,
)
import yaml
import json

from dcpy.models.connectors.esri import FeatureServer, FeatureServerLayer
import dcpy.models.product.dataset.metadata as models
Expand Down Expand Up @@ -184,6 +185,12 @@ def _downcase_properties_keys(feat):
return {"type": "FeatureCollection", "crs": crs, "features": features}


def download_layer(layer: FeatureServerLayer, crs: str, path: Path) -> None:
geojson = get_layer(layer, crs=int(crs.strip("EPSG:")))
with open(path, "w") as f:
json.dump(geojson, f)


def make_dcp_metadata(layer_url: str) -> models.Metadata:
if layer_url.endswith("FeatureServer/0"):
layer_url = layer_url + "?f=pjson"
Expand Down
3 changes: 3 additions & 0 deletions dcpy/lifecycle/ingest/configure.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
S3Source,
ScriptSource,
DEPublished,
ESRIFeatureServer,
Source,
ProcessingStep,
Template,
Expand Down Expand Up @@ -102,6 +103,8 @@ def get_filename(source: Source, ds_id: str) -> str:
return f"{ds_id}.{source.extension}"
case S3Source():
return Path(source.key).name
case ESRIFeatureServer():
return f"{ds_id}.geojson"
case _:
raise NotImplementedError(
f"Source type {source} not supported for get_filename"
Expand Down
17 changes: 16 additions & 1 deletion dcpy/lifecycle/ingest/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,17 @@
S3Source,
ScriptSource,
DEPublished,
ESRIFeatureServer,
Source,
)
from dcpy.models.connectors import socrata, web as web_models
from dcpy.models.connectors import socrata
from dcpy.models.connectors import esri as esri_models
from dcpy.models.connectors import web as web_models
from dcpy.models.connectors.edm.publishing import GisDataset
from dcpy.utils import s3
from dcpy.connectors.edm import publishing
from dcpy.connectors.socrata import extract as extract_socrata
from dcpy.connectors.esri import arcgis_feature_service
from dcpy.connectors import web


Expand Down Expand Up @@ -48,6 +52,17 @@ def download_file_from_source(
web.download_file(source.endpoint, path)
case socrata.Source():
extract_socrata.download(source, path)
case ESRIFeatureServer():
arcgis_feature_service.download_layer(
arcgis_feature_service.FeatureServerLayer(
server=esri_models.Server[source.server],
name=source.dataset,
layer_name=source.layer_name,
layer_id=int(source.layer_id),
),
source.crs,
path,
)
case _:
raise NotImplementedError(
f"Source type {source.type} not supported for download_file_from_source"
Expand Down
Empty file removed dcpy/models/connectors/doe.py
Empty file.
10 changes: 10 additions & 0 deletions dcpy/models/lifecycle/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,15 @@ class DEPublished(BaseModel, extra="forbid"):
filename: str


class ESRIFeatureServer(BaseModel, extra="forbid"):
type: Literal["esri"]
server: str
dataset: str
layer_name: str
layer_id: str
crs: str


Source: TypeAlias = (
LocalFileSource
| web.FileDownloadSource
Expand All @@ -44,6 +53,7 @@ class DEPublished(BaseModel, extra="forbid"):
| DEPublished
| S3Source
| ScriptSource
| ESRIFeatureServer
)


Expand Down
20 changes: 20 additions & 0 deletions dcpy/test/connectors/test_esri.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,3 +141,23 @@ def test_get_layer_chunked(self, post: MagicMock):

# one call to get ids, three calls to get data
assert post.call_count == 4


@patch("requests.get", side_effect=mock_request_get)
@patch("requests.post", side_effect=mock_query_layer)
def test_download_layer(get, post, create_temp_filesystem):
filename = "does_not_exist.geojson"
dataset = "National_Register_Building_Listings"
layer = FeatureServerLayer(
server=Server.nys_parks,
name=dataset,
layer_name=LAYER_NAME,
layer_id=LAYER_ID,
)
arcfs.download_layer(
layer=layer,
crs="EPSG:3857",
path=create_temp_filesystem / filename,
)
print(create_temp_filesystem / filename)
assert (create_temp_filesystem / filename).exists()
10 changes: 10 additions & 0 deletions dcpy/test/lifecycle/ingest/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
ScriptSource,
S3Source,
DEPublished,
ESRIFeatureServer,
)
from dcpy.test.conftest import RECIPES_BUCKET

Expand Down Expand Up @@ -38,6 +39,14 @@ class Sources:
de_publish = DEPublished(
type="de-published", product=TEST_DATASET_NAME, filename="file.csv"
)
esri = ESRIFeatureServer(
type="esri",
server="nys_parks",
dataset="National_Register_Building_Listings",
layer_name="MADE_UP_LAYER_NAME",
layer_id="13",
crs="EPSG:3857",
)


SOURCE_FILENAMES = [
Expand All @@ -47,4 +56,5 @@ class Sources:
(Sources.api, f"{TEST_DATASET_NAME}.json"),
(Sources.socrata, f"{TEST_DATASET_NAME}.csv"),
(Sources.s3, "test.txt"),
(Sources.esri, f"{TEST_DATASET_NAME}.geojson"),
]
18 changes: 0 additions & 18 deletions dcpy/test/lifecycle/ingest/resources/sources.yml

This file was deleted.