Skip to content

Commit

Permalink
support all ISO-formatted harvest timestamp strings
Browse files Browse the repository at this point in the history
dateutil is the official third-party library for parsing
  • Loading branch information
alexdunnjpl committed Nov 5, 2024
1 parent 0e9d6b2 commit 7147d65
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 6 deletions.
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ install_requires =
botocore~=1.34.91
botocore-stubs~=1.34.94
requests-aws4auth~=1.2.3
python-dateutil~=2.9.0

# Change this to False if you use things like __file__ or __path__—which you
# shouldn't use anyway, because that's what ``pkg_resources`` is for 🙂
Expand Down
13 changes: 7 additions & 6 deletions src/pds/registrysweepers/reindexer/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from typing import Iterable
from typing import Union

import dateutil.parser
import math
from opensearchpy import OpenSearch
from pds.registrysweepers.reindexer.constants import REINDEXER_FLAG_METADATA_KEY
Expand Down Expand Up @@ -159,19 +160,19 @@ def accumulate_missing_mappings(
if (mapping_missing or mapping_is_bad) and not problem_detected_in_document_already:
problem_detected_in_document_already = True
problem_docs_count += 1
attr_value = doc["_source"].get("ops:Harvest_Info/ops:harvest_date_time", None)
try:
doc_harvest_time = datetime.fromisoformat(
doc["_source"]["ops:Harvest_Info/ops:harvest_date_time"][0].replace("Z", ""),
)
doc_harvest_time = dateutil.parser.isoparse(attr_value[0]).astimezone(timezone.utc)

earliest_problem_doc_harvested_at = min(
doc_harvest_time, earliest_problem_doc_harvested_at or datetime.max
doc_harvest_time, earliest_problem_doc_harvested_at or doc_harvest_time
)
latest_problem_doc_harvested_at = max(
doc_harvest_time, latest_problem_doc_harvested_at or datetime.min
doc_harvest_time, latest_problem_doc_harvested_at or doc_harvest_time
)
except (KeyError, ValueError) as err:
log.warning(
f'Unable to parse "ops:Harvest_Info/ops:harvest_date_time" as zulu-formatted date from document {doc["_id"]}: {err}'
f'Unable to parse first element of "ops:Harvest_Info/ops:harvest_date_time" as ISO-formatted date from document {doc["_id"]}: {attr_value} ({err})'
)

try:
Expand Down

0 comments on commit 7147d65

Please sign in to comment.