Skip to content

Commit

Permalink
Make pymongoarrow optional (dlt-hub#568)
Browse files Browse the repository at this point in the history
* Add test for support for loading MongoDB data without pymongoarrow.

Signed-off-by: Marcel Coetzee <[email protected]>

* Make pymongoarrow optional in requirements file

Signed-off-by: Marcel Coetzee <[email protected]>

---------

Signed-off-by: Marcel Coetzee <[email protected]>
  • Loading branch information
Pipboyguy authored and Nicolas ESTRADA committed Oct 5, 2024
1 parent 820b67d commit 5626352
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 5 deletions.
17 changes: 15 additions & 2 deletions sources/mongodb/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,13 @@
TCollection = Any
TCursor = Any

try:
import pymongoarrow # type: ignore

PYMONGOARROW_AVAILABLE = True
except ImportError:
PYMONGOARROW_AVAILABLE = False


class CollectionLoader:
def __init__(
Expand Down Expand Up @@ -345,15 +352,21 @@ def collection_documents(
Returns:
Iterable[DltResource]: A list of DLT resources for each collection to be loaded.
"""
if data_item_format == "arrow" and not PYMONGOARROW_AVAILABLE:
dlt.common.logger.warn(
"'pymongoarrow' is not installed; falling back to standard MongoDB CollectionLoader."
)
data_item_format = "object"

if parallel:
if data_item_format == "arrow":
LoaderClass = CollectionArrowLoaderParallel
elif data_item_format == "object":
else:
LoaderClass = CollectionLoaderParallel # type: ignore
else:
if data_item_format == "arrow":
LoaderClass = CollectionArrowLoader # type: ignore
elif data_item_format == "object":
else:
LoaderClass = CollectionLoader # type: ignore

loader = LoaderClass(
Expand Down
3 changes: 1 addition & 2 deletions sources/mongodb/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
pymongo>=4.3.3
pymongoarrow>=1.3.0
pymongo>=3
dlt>=0.5.1
28 changes: 27 additions & 1 deletion tests/mongodb/test_mongodb_source.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
import bson
import json
from unittest import mock

import bson
import dlt
import pyarrow
import pytest
from pendulum import DateTime, timezone
Expand Down Expand Up @@ -404,3 +407,26 @@ def test_filter_intersect(destination_name):

with pytest.raises(PipelineStepFailed):
pipeline.run(movies)


@pytest.mark.parametrize("destination_name", ALL_DESTINATIONS)
@pytest.mark.parametrize("data_item_format", ["object", "arrow"])
def test_mongodb_without_pymongoarrow(
destination_name: str, data_item_format: str
) -> None:
with mock.patch.dict("sys.modules", {"pymongoarrow": None}):
pipeline = dlt.pipeline(
pipeline_name="test_mongodb_without_pymongoarrow",
destination=destination_name,
dataset_name="test_mongodb_without_pymongoarrow_data",
full_refresh=True,
)

comments = mongodb_collection(
collection="comments", limit=10, data_item_format=data_item_format
)
load_info = pipeline.run(comments)

assert load_info.loads_ids != []
table_counts = load_table_counts(pipeline, "comments")
assert table_counts["comments"] == 10

0 comments on commit 5626352

Please sign in to comment.