Skip to content

Commit

Permalink
formatting list for terminal
Browse files Browse the repository at this point in the history
  • Loading branch information
g.trantham committed Dec 23, 2022
1 parent 377807d commit 1147e4a
Show file tree
Hide file tree
Showing 4 changed files with 83 additions and 40 deletions.
49 changes: 28 additions & 21 deletions notebooks/ORM.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 2,
"metadata": {},
"outputs": [
{
Expand All @@ -19,7 +19,7 @@
"'2.0.0b1'"
]
},
"execution_count": 1,
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -31,7 +31,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -44,7 +44,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -57,7 +57,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -67,7 +67,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -89,7 +89,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -99,34 +99,41 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 18,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1 :: https://www.waterqualitydata.us/data/Station/search?mimeType=geojson&minactivities=1&counts=no\n",
"2 :: https://www.sciencebase.gov/catalogMaps/mapping/ows/57336b02e4b0dae0d5dd619a?service=WFS&version=1.0.0&request=GetFeature&srsName=EPSG:4326&typeName=sb:fpp&outputFormat=json\n",
"5 :: https://www.sciencebase.gov/catalog/file/get/60c7b895d34e86b9389b2a6c?name=usgs_nldi_gages.geojson\n",
"6 :: https://www.hydroshare.org/resource/5f665b7b82d74476930712f7e423a0d2/data/contents/wade.geojson\n",
"7 :: https://www.hydroshare.org/resource/3295a17b4cc24d34bd6a5c5aaf753c50/data/contents/nldi_gages.geojson\n",
"8 :: https://sb19.linked-data.internetofwater.dev/collections/ca_gages/items?f=json&limit=10000\n",
"9 :: https://www.sciencebase.gov/catalogMaps/mapping/ows/609c8a63d34ea221ce3acfd3?service=WFS&version=1.0.0&request=GetFeature&srsName=EPSG:4326&typeName=sb::gfv11&outputFormat=json\n",
"10 :: https://www.sciencebase.gov/catalog/file/get/60c7b895d34e86b9389b2a6c?name=vigil.geojson\n",
"11 :: https://www.sciencebase.gov/catalog/file/get/60c7b895d34e86b9389b2a6c?name=nwis_wells.geojson\n",
"12 :: https://locations.newmexicowaterdata.org/collections/Things/items?f=json&limit=100000\n",
"13 :: https://geoconnex-demo-pages.internetofwater.dev/collections/demo-gpkg/items?f=json&limit=10000\n"
" 1 :: Water Quality Portal :: https://www.waterqualitydata.us/data/Station/sea...\n",
" 2 :: HUC12 Pour Points :: https://www.sciencebase.gov/catalogMaps/mapping/...\n",
" 5 :: NWIS Surface Water Sites :: https://www.sciencebase.gov/catalog/file/get/60c...\n",
" 6 :: Water Data Exchange 2.0 Sites :: https://www.hydroshare.org/resource/5f665b7b82d7...\n",
" 7 :: geoconnex.us reference gages :: https://www.hydroshare.org/resource/3295a17b4cc2...\n",
" 8 :: Streamgage catalog for CA SB19 :: https://sb19.linked-data.internetofwater.dev/col...\n",
" 9 :: USGS Geospatial Fabric V1.1 Poin :: https://www.sciencebase.gov/catalogMaps/mapping/...\n",
"10 :: Vigil Network Data :: https://www.sciencebase.gov/catalog/file/get/60c...\n",
"11 :: NWIS Groundwater Sites :: https://www.sciencebase.gov/catalog/file/get/60c...\n",
"12 :: New Mexico Water Data Initative :: https://locations.newmexicowaterdata.org/collect...\n",
"13 :: geoconnex contribution demo site :: https://geoconnex-demo-pages.internetofwater.dev...\n"
]
}
],
"source": [
"stmt = select(CrawlerSource).order_by(CrawlerSource.crawler_source_id) #.where(CrawlerSource.crawler_source_id == 1)\n",
"with Session(eng) as session:\n",
" for source in session.scalars(stmt):\n",
" print(f\"{source.crawler_source_id} :: {source.source_uri}\")"
" print(f\"{source.crawler_source_id:2} :: {source.source_name[0:32]:32} :: {source.source_uri[0:48]:48}...\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
Expand All @@ -151,7 +158,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.16 (main, Dec 14 2022, 13:52:45) \n[GCC 11.3.0]"
"version": "3.9.16"
},
"orig_nbformat": 4,
"vscode": {
Expand Down
31 changes: 23 additions & 8 deletions src/nldi_crawler/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,18 +48,33 @@ def main(list_, conf_, verbose_):
cfg.update(cfg_from_toml(conf_))

if list_:
for source_item in sources.fetch_source_table(db_url(cfg)):
print(f"{source_item.crawler_source_id} :: {source_item.source_name} :: {source_item.source_uri[0:64]}...")
print("\nID : Source Name : URI ")
print("== ", "="*48, " ", "="*48)
for source in sources.fetch_source_table(db_url(cfg)):
print(
f"{source.crawler_source_id:2} :",
f"{source.source_name[0:48]:48} :",
f"{source.source_uri[0:48]:48}...",
)
sys.exit(0)


def db_url(c:dict) -> str:
if "NLDI_DB_PASS" in c:
db_url = f"postgresql://{c['NLDI_DB_USER']}:{c['NLDI_DB_PASS']}@{c['NLDI_DB_HOST']}:{c['NLDI_DB_PORT']}/{c['NLDI_DB_NAME']}"
def db_url(conf: dict) -> str:
"""
Formats the full database connection URL using the configuration dict.
:param conf: config information retrieved from env variables or from toml file.
:type conf: dict
:return: connection string
:rtype: str
"""
if "NLDI_DB_PASS" in conf:
_url = f"postgresql://{conf['NLDI_DB_USER']}:{conf['NLDI_DB_PASS']}@{conf['NLDI_DB_HOST']}:{conf['NLDI_DB_PORT']}/{conf['NLDI_DB_NAME']}"
else:
db_url = f"postgresql://{c['NLDI_DB_USER']}@{c['NLDI_DB_HOST']}:{c['NLDI_DB_PORT']}/{c['NLDI_DB_NAME']}"
logging.info("Using DB Connect String %s", db_url)
return db_url
_url = f"postgresql://{conf['NLDI_DB_USER']}@{conf['NLDI_DB_HOST']}:{conf['NLDI_DB_PORT']}/{conf['NLDI_DB_NAME']}"
logging.info("Using DB Connect String %s", _url)
return _url


def cfg_from_toml(filepath: str) -> dict:
"""
Expand Down
33 changes: 22 additions & 11 deletions src/nldi_crawler/sources.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,20 @@
"""
routines to manage the table of crawler_sources
"""
from sqlalchemy import create_engine, Table, select
import dataclasses

from sqlalchemy import create_engine, Table, select
from sqlalchemy.orm import DeclarativeBase, Session

class NLDI_Base(DeclarativeBase):
pass

@dataclasses.dataclass
class NldiBase(DeclarativeBase):
"""Base class used to create reflected ORM objects."""

pass


def fetch_source_table(connect_string:str) -> list:
def fetch_source_table(connect_string: str) -> list:
"""
Fetches a list of crawler sources from the master NLDI-DB database. The returned list
holds one or mor CrawlerSource() objects, which are reflected from the database using
Expand All @@ -30,17 +35,23 @@ def fetch_source_table(connect_string:str) -> list:
eng = create_engine(connect_string, client_encoding="UTF-8", echo=False, future=True)
retval = []

class CrawlerSource(NLDI_Base):
@dataclasses.dataclass
class CrawlerSource(NldiBase):
"""
An ORM reflection of the crawler_source table
"""

__table__ = Table(
_tbl_name_, ## <--- name of the table
NLDI_Base.metadata,
_tbl_name_, ## <--- name of the table
NldiBase.metadata,
autoload_with=eng, ## <--- this is where the magic happens
schema=_schema_, ## <--- only need this if the table is not in
## the default schema.
schema=_schema_, ## <--- only need this if the table is not in
## the default schema.
)
stmt = select(CrawlerSource).order_by(CrawlerSource.crawler_source_id)

stmt = select(CrawlerSource).order_by(CrawlerSource.crawler_source_id) # pylint: disable=E1101
with Session(eng) as session:
for source in session.scalars(stmt):
retval.append(source)
eng=None
eng = None
return retval
10 changes: 10 additions & 0 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import nldi_crawler

from nldi_crawler import cli
from nldi_crawler import sources


def test_successful_import():
Expand Down Expand Up @@ -59,3 +60,12 @@ def test_main_w_config():
runner = click.testing.CliRunner()
result = runner.invoke(cli.main, args=["--config", os.path.join(_test_dir, "cfg-test-1.toml")])
assert result.exit_code == 0


def test_list_sources():
"""get table of sources from db"""
_test_dir = os.path.dirname(os.path.realpath(__file__))
cfg = cli.cfg_from_toml(os.path.join(_test_dir, "..", r"nldi-crawler.toml"))
_url = cli.db_url(cfg)
srcs = sources.fetch_source_table(_url)
assert len(srcs) >= 1

0 comments on commit 1147e4a

Please sign in to comment.