From 294b6d4dae9090cae829e651072dd2bb58df4d79 Mon Sep 17 00:00:00 2001 From: jonasHanhan <130035609+jonasHanhan@users.noreply.github.com> Date: Fri, 5 Apr 2024 06:52:02 +0800 Subject: [PATCH] fix(ingestion/mongodb): MongoDB source unable to parse datetimes with years > 9999 (#10110) Co-authored-by: JonasHan --- metadata-ingestion/src/datahub/ingestion/source/mongodb.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/mongodb.py b/metadata-ingestion/src/datahub/ingestion/source/mongodb.py index f4afd441ba3ef0..516f5b717e6ea3 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/mongodb.py +++ b/metadata-ingestion/src/datahub/ingestion/source/mongodb.py @@ -4,7 +4,6 @@ from typing import Dict, Iterable, List, Optional, Tuple, Type, Union, ValuesView import bson.timestamp -import pymongo import pymongo.collection from packaging import version from pydantic import PositiveInt, validator @@ -282,7 +281,8 @@ def __init__(self, ctx: PipelineContext, config: MongoDBConfig): **self.config.options, } - self.mongo_client = pymongo.MongoClient(self.config.connect_uri, **options) # type: ignore + # See https://pymongo.readthedocs.io/en/stable/examples/datetimes.html#handling-out-of-range-datetimes + self.mongo_client = MongoClient(self.config.connect_uri, datetime_conversion="DATETIME_AUTO", **options) # type: ignore # This cheaply tests the connection. For details, see # https://pymongo.readthedocs.io/en/stable/api/pymongo/mongo_client.html#pymongo.mongo_client.MongoClient