diff --git a/CHANGELOG.md b/CHANGELOG.md index 69379a7d..33d2d8f2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,10 @@ +## [1.19.0](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.18.3...v1.19.0) (2024-09-13) + + +### Features + +* integration of o1 ([5c25da2](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/5c25da2fe64b4b64a00f1879f3d5dcfbf1512848)) + ## [1.19.0-beta.12](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.19.0-beta.11...v1.19.0-beta.12) (2024-09-14) @@ -10,6 +17,7 @@ * added telemetry info ([62912c2](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/62912c263ec7144e2d509925593027a60d258672)) + ## [1.19.0-beta.11](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.19.0-beta.10...v1.19.0-beta.11) (2024-09-13) diff --git a/examples/openai/smart_scraper_openai.py b/examples/openai/smart_scraper_openai.py index 2962f51b..4d9d6330 100644 --- a/examples/openai/smart_scraper_openai.py +++ b/examples/openai/smart_scraper_openai.py @@ -18,7 +18,7 @@ graph_config = { "llm": { "api_key": os.getenv("OPENAI_API_KEY"), - "model": "openai/gpt-4o", + "model": "openai/o1-preview", }, "verbose": True, "headless": False, diff --git a/pyproject.toml b/pyproject.toml index 7f70f40f..6650e765 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,9 +1,9 @@ [project] name = "scrapegraphai" - version = "1.19.0b12" + description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines." authors = [ { name = "Marco Vinciguerra", email = "mvincig11@gmail.com" }, diff --git a/scrapegraphai/helpers/models_tokens.py b/scrapegraphai/helpers/models_tokens.py index c90cf1f6..99f1f7df 100644 --- a/scrapegraphai/helpers/models_tokens.py +++ b/scrapegraphai/helpers/models_tokens.py @@ -23,7 +23,8 @@ "gpt-4o-2024-08-06": 128000, "gpt-4o-2024-05-13": 128000, "gpt-4o-mini":128000, - + "o1-preview":128000, + "o1-mini":128000 }, "azure_openai": { "gpt-3.5-turbo-0125": 16385, @@ -43,7 +44,9 @@ "gpt-4-32k-0613": 32768, "gpt-4o": 128000, "gpt-4o-mini":128000, - "chatgpt-4o-latest": 128000 + "chatgpt-4o-latest": 128000, + "o1-preview":128000, + "o1-mini":128000 }, "google_genai": { "gemini-pro": 128000, diff --git a/scrapegraphai/utils/copy.py b/scrapegraphai/utils/copy.py index d6bb839a..838f3c05 100644 --- a/scrapegraphai/utils/copy.py +++ b/scrapegraphai/utils/copy.py @@ -1,20 +1,39 @@ -""" -copy module -""" import copy -from typing import Any, Dict, Optional -from pydantic.v1 import BaseModel +from typing import Any + class DeepCopyError(Exception): - """Custom exception raised when an object cannot be deep-copied.""" + """ + Custom exception raised when an object cannot be deep-copied. + """ + pass + +def is_boto3_client(obj): + """ + Function for understanding if the script is using boto3 or not + """ + import sys + + boto3_module = sys.modules.get("boto3") + + if boto3_module: + try: + from botocore.client import BaseClient + + return isinstance(obj, BaseClient) + except (AttributeError, ImportError): + return False + return False + + def safe_deepcopy(obj: Any) -> Any: """ Attempts to create a deep copy of the object using `copy.deepcopy` whenever possible. If that fails, it falls back to custom deep copy logic. If that also fails, it raises a `DeepCopyError`. - + Args: obj (Any): The object to be copied, which can be of any type. @@ -27,36 +46,40 @@ def safe_deepcopy(obj: Any) -> Any: """ try: + return copy.deepcopy(obj) except (TypeError, AttributeError) as e: + if isinstance(obj, dict): new_obj = {} + for k, v in obj.items(): new_obj[k] = safe_deepcopy(v) return new_obj elif isinstance(obj, list): new_obj = [] + for v in obj: new_obj.append(safe_deepcopy(v)) return new_obj elif isinstance(obj, tuple): new_obj = tuple(safe_deepcopy(v) for v in obj) + return new_obj elif isinstance(obj, frozenset): new_obj = frozenset(safe_deepcopy(v) for v in obj) return new_obj - elif hasattr(obj, "__dict__"): + elif is_boto3_client(obj): + return obj + + else: try: return copy.copy(obj) except (TypeError, AttributeError): - raise DeepCopyError(f"Cannot deep copy the object of type {type(obj)}") from e - - - try: - return copy.copy(obj) - except (TypeError, AttributeError): - raise DeepCopyError(f"Cannot deep copy the object of type {type(obj)}") from e + raise DeepCopyError( + f"Cannot deep copy the object of type {type(obj)}" + ) from e diff --git a/tests/utils/copy_utils_test.py b/tests/utils/copy_utils_test.py index 90c85d34..4f684088 100644 --- a/tests/utils/copy_utils_test.py +++ b/tests/utils/copy_utils_test.py @@ -184,3 +184,9 @@ def test_with_pydantic(): copy_obj = safe_deepcopy(original) assert copy_obj.value == original.value assert copy_obj is not original + +def test_with_boto3(): + import boto3 + boto_client = boto3.client("bedrock-runtime", region_name="us-west-2") + copy_obj = safe_deepcopy(boto_client) + assert copy_obj == boto_client \ No newline at end of file