From 93c04a0d88006794fd428480efb2cb498d821e75 Mon Sep 17 00:00:00 2001 From: Tsung-Ju Lii Date: Thu, 17 Oct 2024 12:24:39 +0800 Subject: [PATCH] [sc-29393] MongoDB crawler ignore system collections --- metaphor/mongodb/README.md | 2 +- metaphor/mongodb/config.py | 6 +++++- pyproject.toml | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/metaphor/mongodb/README.md b/metaphor/mongodb/README.md index fa6df335..93243812 100644 --- a/metaphor/mongodb/README.md +++ b/metaphor/mongodb/README.md @@ -15,7 +15,7 @@ infer_schema_sample_size: # Number of documents to sample in a collection excluded_databases: # Databases to ignore. By default the databases "admin", "config", "local", "system" are excluded. - db1 - db2 -excluded_collections: # Collections to ignore. +excluded_collections: # Collections to ignore. By default the system specific collections are ignored, see https://www.mongodb.com/docs/manual/reference/system-collections/#database-specific-collections. - coll1 - coll2 ``` diff --git a/metaphor/mongodb/config.py b/metaphor/mongodb/config.py index c81deb68..8530102e 100644 --- a/metaphor/mongodb/config.py +++ b/metaphor/mongodb/config.py @@ -19,7 +19,11 @@ class MongoDBConfig(BaseConfig): excluded_databases: Set[str] = Field( default_factory=lambda: set(["admin", "config", "local", "system"]) ) - excluded_collections: Set[str] = Field(default_factory=set) + excluded_collections: Set[str] = Field( + default_factory=lambda: set( + ["system.buckets", "system.profile", "system.js", "system.views"] + ) + ) @field_validator("auth_mechanism", mode="before") def _validate_auth_mechanism(cls, auth_mechanism: str): diff --git a/pyproject.toml b/pyproject.toml index 55d1806e..df4b2bd7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "metaphor-connectors" -version = "0.14.126" +version = "0.14.127" license = "Apache-2.0" description = "A collection of Python-based 'connectors' that extract metadata from various sources to ingest into the Metaphor app." authors = ["Metaphor "]