From 1147e4a79b35a65e1ced0554cca55c1831eada4f Mon Sep 17 00:00:00 2001 From: "g.trantham" Date: Fri, 23 Dec 2022 10:08:42 -0600 Subject: [PATCH] formatting list for terminal --- notebooks/ORM.ipynb | 49 +++++++++++++++++++++---------------- src/nldi_crawler/cli.py | 31 +++++++++++++++++------ src/nldi_crawler/sources.py | 33 ++++++++++++++++--------- tests/test_core.py | 10 ++++++++ 4 files changed, 83 insertions(+), 40 deletions(-) diff --git a/notebooks/ORM.ipynb b/notebooks/ORM.ipynb index ff94773..13c9794 100644 --- a/notebooks/ORM.ipynb +++ b/notebooks/ORM.ipynb @@ -10,7 +10,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -19,7 +19,7 @@ "'2.0.0b1'" ] }, - "execution_count": 1, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -31,7 +31,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -44,7 +44,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -57,7 +57,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -67,7 +67,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -89,7 +89,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -99,24 +99,24 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "1 :: https://www.waterqualitydata.us/data/Station/search?mimeType=geojson&minactivities=1&counts=no\n", - "2 :: https://www.sciencebase.gov/catalogMaps/mapping/ows/57336b02e4b0dae0d5dd619a?service=WFS&version=1.0.0&request=GetFeature&srsName=EPSG:4326&typeName=sb:fpp&outputFormat=json\n", - "5 :: https://www.sciencebase.gov/catalog/file/get/60c7b895d34e86b9389b2a6c?name=usgs_nldi_gages.geojson\n", - "6 :: https://www.hydroshare.org/resource/5f665b7b82d74476930712f7e423a0d2/data/contents/wade.geojson\n", - "7 :: https://www.hydroshare.org/resource/3295a17b4cc24d34bd6a5c5aaf753c50/data/contents/nldi_gages.geojson\n", - "8 :: https://sb19.linked-data.internetofwater.dev/collections/ca_gages/items?f=json&limit=10000\n", - "9 :: https://www.sciencebase.gov/catalogMaps/mapping/ows/609c8a63d34ea221ce3acfd3?service=WFS&version=1.0.0&request=GetFeature&srsName=EPSG:4326&typeName=sb::gfv11&outputFormat=json\n", - "10 :: https://www.sciencebase.gov/catalog/file/get/60c7b895d34e86b9389b2a6c?name=vigil.geojson\n", - "11 :: https://www.sciencebase.gov/catalog/file/get/60c7b895d34e86b9389b2a6c?name=nwis_wells.geojson\n", - "12 :: https://locations.newmexicowaterdata.org/collections/Things/items?f=json&limit=100000\n", - "13 :: https://geoconnex-demo-pages.internetofwater.dev/collections/demo-gpkg/items?f=json&limit=10000\n" + " 1 :: Water Quality Portal :: https://www.waterqualitydata.us/data/Station/sea...\n", + " 2 :: HUC12 Pour Points :: https://www.sciencebase.gov/catalogMaps/mapping/...\n", + " 5 :: NWIS Surface Water Sites :: https://www.sciencebase.gov/catalog/file/get/60c...\n", + " 6 :: Water Data Exchange 2.0 Sites :: https://www.hydroshare.org/resource/5f665b7b82d7...\n", + " 7 :: geoconnex.us reference gages :: https://www.hydroshare.org/resource/3295a17b4cc2...\n", + " 8 :: Streamgage catalog for CA SB19 :: https://sb19.linked-data.internetofwater.dev/col...\n", + " 9 :: USGS Geospatial Fabric V1.1 Poin :: https://www.sciencebase.gov/catalogMaps/mapping/...\n", + "10 :: Vigil Network Data :: https://www.sciencebase.gov/catalog/file/get/60c...\n", + "11 :: NWIS Groundwater Sites :: https://www.sciencebase.gov/catalog/file/get/60c...\n", + "12 :: New Mexico Water Data Initative :: https://locations.newmexicowaterdata.org/collect...\n", + "13 :: geoconnex contribution demo site :: https://geoconnex-demo-pages.internetofwater.dev...\n" ] } ], @@ -124,9 +124,16 @@ "stmt = select(CrawlerSource).order_by(CrawlerSource.crawler_source_id) #.where(CrawlerSource.crawler_source_id == 1)\n", "with Session(eng) as session:\n", " for source in session.scalars(stmt):\n", - " print(f\"{source.crawler_source_id} :: {source.source_uri}\")" + " print(f\"{source.crawler_source_id:2} :: {source.source_name[0:32]:32} :: {source.source_uri[0:48]:48}...\")" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "code", "execution_count": null, @@ -151,7 +158,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.16 (main, Dec 14 2022, 13:52:45) \n[GCC 11.3.0]" + "version": "3.9.16" }, "orig_nbformat": 4, "vscode": { diff --git a/src/nldi_crawler/cli.py b/src/nldi_crawler/cli.py index 47db7d2..e37c863 100644 --- a/src/nldi_crawler/cli.py +++ b/src/nldi_crawler/cli.py @@ -48,18 +48,33 @@ def main(list_, conf_, verbose_): cfg.update(cfg_from_toml(conf_)) if list_: - for source_item in sources.fetch_source_table(db_url(cfg)): - print(f"{source_item.crawler_source_id} :: {source_item.source_name} :: {source_item.source_uri[0:64]}...") + print("\nID : Source Name : URI ") + print("== ", "="*48, " ", "="*48) + for source in sources.fetch_source_table(db_url(cfg)): + print( + f"{source.crawler_source_id:2} :", + f"{source.source_name[0:48]:48} :", + f"{source.source_uri[0:48]:48}...", + ) sys.exit(0) -def db_url(c:dict) -> str: - if "NLDI_DB_PASS" in c: - db_url = f"postgresql://{c['NLDI_DB_USER']}:{c['NLDI_DB_PASS']}@{c['NLDI_DB_HOST']}:{c['NLDI_DB_PORT']}/{c['NLDI_DB_NAME']}" +def db_url(conf: dict) -> str: + """ + Formats the full database connection URL using the configuration dict. + + :param conf: config information retrieved from env variables or from toml file. + :type conf: dict + :return: connection string + :rtype: str + """ + if "NLDI_DB_PASS" in conf: + _url = f"postgresql://{conf['NLDI_DB_USER']}:{conf['NLDI_DB_PASS']}@{conf['NLDI_DB_HOST']}:{conf['NLDI_DB_PORT']}/{conf['NLDI_DB_NAME']}" else: - db_url = f"postgresql://{c['NLDI_DB_USER']}@{c['NLDI_DB_HOST']}:{c['NLDI_DB_PORT']}/{c['NLDI_DB_NAME']}" - logging.info("Using DB Connect String %s", db_url) - return db_url + _url = f"postgresql://{conf['NLDI_DB_USER']}@{conf['NLDI_DB_HOST']}:{conf['NLDI_DB_PORT']}/{conf['NLDI_DB_NAME']}" + logging.info("Using DB Connect String %s", _url) + return _url + def cfg_from_toml(filepath: str) -> dict: """ diff --git a/src/nldi_crawler/sources.py b/src/nldi_crawler/sources.py index 294f9a4..6690b26 100644 --- a/src/nldi_crawler/sources.py +++ b/src/nldi_crawler/sources.py @@ -6,15 +6,20 @@ """ routines to manage the table of crawler_sources """ -from sqlalchemy import create_engine, Table, select +import dataclasses + +from sqlalchemy import create_engine, Table, select from sqlalchemy.orm import DeclarativeBase, Session -class NLDI_Base(DeclarativeBase): - pass +@dataclasses.dataclass +class NldiBase(DeclarativeBase): + """Base class used to create reflected ORM objects.""" + + pass -def fetch_source_table(connect_string:str) -> list: +def fetch_source_table(connect_string: str) -> list: """ Fetches a list of crawler sources from the master NLDI-DB database. The returned list holds one or mor CrawlerSource() objects, which are reflected from the database using @@ -30,17 +35,23 @@ def fetch_source_table(connect_string:str) -> list: eng = create_engine(connect_string, client_encoding="UTF-8", echo=False, future=True) retval = [] - class CrawlerSource(NLDI_Base): + @dataclasses.dataclass + class CrawlerSource(NldiBase): + """ + An ORM reflection of the crawler_source table + """ + __table__ = Table( - _tbl_name_, ## <--- name of the table - NLDI_Base.metadata, + _tbl_name_, ## <--- name of the table + NldiBase.metadata, autoload_with=eng, ## <--- this is where the magic happens - schema=_schema_, ## <--- only need this if the table is not in - ## the default schema. + schema=_schema_, ## <--- only need this if the table is not in + ## the default schema. ) - stmt = select(CrawlerSource).order_by(CrawlerSource.crawler_source_id) + + stmt = select(CrawlerSource).order_by(CrawlerSource.crawler_source_id) # pylint: disable=E1101 with Session(eng) as session: for source in session.scalars(stmt): retval.append(source) - eng=None + eng = None return retval diff --git a/tests/test_core.py b/tests/test_core.py index dba2ee2..0313c74 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -11,6 +11,7 @@ import nldi_crawler from nldi_crawler import cli +from nldi_crawler import sources def test_successful_import(): @@ -59,3 +60,12 @@ def test_main_w_config(): runner = click.testing.CliRunner() result = runner.invoke(cli.main, args=["--config", os.path.join(_test_dir, "cfg-test-1.toml")]) assert result.exit_code == 0 + + +def test_list_sources(): + """get table of sources from db""" + _test_dir = os.path.dirname(os.path.realpath(__file__)) + cfg = cli.cfg_from_toml(os.path.join(_test_dir, "..", r"nldi-crawler.toml")) + _url = cli.db_url(cfg) + srcs = sources.fetch_source_table(_url) + assert len(srcs) >= 1