Skip to content

Commit

Permalink
Replace numpy usage and remove from pyproject.toml (#1272)
Browse files Browse the repository at this point in the history
* use random instead of numpy

* remove numpy from pyproject.toml
  • Loading branch information
kevinjqliu authored Oct 31, 2024
1 parent f7139fd commit 0cebec4
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 44 deletions.
10 changes: 5 additions & 5 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

35 changes: 4 additions & 31 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,6 @@ gcsfs = { version = ">=2023.1.0,<2024.1.0", optional = true }
psycopg2-binary = { version = ">=2.9.6", optional = true }
sqlalchemy = { version = "^2.0.18", optional = true }
getdaft = { version = ">=0.2.12", optional = true }
numpy = [
{ version = "1.26.0", python = ">=3.9,<3.13", optional = true },
]
cachetools = "^5.5.0"

[tool.poetry.group.dev.dependencies]
Expand Down Expand Up @@ -238,10 +235,6 @@ ignore_missing_imports = true
module = "sortedcontainers.*"
ignore_missing_imports = true

[[tool.mypy.overrides]]
module = "numpy.*"
ignore_missing_imports = true

[[tool.mypy.overrides]]
module = "sqlalchemy.*"
ignore_missing_imports = true
Expand Down Expand Up @@ -394,10 +387,6 @@ ignore_missing_imports = true
module = "sortedcontainers.*"
ignore_missing_imports = true

[[tool.mypy.overrides]]
module = "numpy.*"
ignore_missing_imports = true

[[tool.mypy.overrides]]
module = "sqlalchemy.*"
ignore_missing_imports = true
Expand Down Expand Up @@ -550,10 +539,6 @@ ignore_missing_imports = true
module = "sortedcontainers.*"
ignore_missing_imports = true

[[tool.mypy.overrides]]
module = "numpy.*"
ignore_missing_imports = true

[[tool.mypy.overrides]]
module = "sqlalchemy.*"
ignore_missing_imports = true
Expand Down Expand Up @@ -706,10 +691,6 @@ ignore_missing_imports = true
module = "sortedcontainers.*"
ignore_missing_imports = true

[[tool.mypy.overrides]]
module = "numpy.*"
ignore_missing_imports = true

[[tool.mypy.overrides]]
module = "sqlalchemy.*"
ignore_missing_imports = true
Expand Down Expand Up @@ -862,10 +843,6 @@ ignore_missing_imports = true
module = "sortedcontainers.*"
ignore_missing_imports = true

[[tool.mypy.overrides]]
module = "numpy.*"
ignore_missing_imports = true

[[tool.mypy.overrides]]
module = "sqlalchemy.*"
ignore_missing_imports = true
Expand Down Expand Up @@ -894,10 +871,10 @@ generate-setup-file = false
script = "build-module.py"

[tool.poetry.extras]
pyarrow = ["pyarrow", "numpy"]
pandas = ["pandas", "pyarrow", "numpy"]
duckdb = ["duckdb", "pyarrow", "numpy"]
ray = ["ray", "pyarrow", "pandas", "numpy"]
pyarrow = ["pyarrow"]
pandas = ["pandas", "pyarrow"]
duckdb = ["duckdb", "pyarrow"]
ray = ["ray", "pyarrow", "pandas"]
daft = ["getdaft"]
snappy = ["python-snappy"]
hive = ["thrift"]
Expand Down Expand Up @@ -1084,10 +1061,6 @@ ignore_missing_imports = true
module = "sortedcontainers.*"
ignore_missing_imports = true

[[tool.mypy.overrides]]
module = "numpy.*"
ignore_missing_imports = true

[[tool.mypy.overrides]]
module = "sqlalchemy.*"
ignore_missing_imports = true
Expand Down
16 changes: 8 additions & 8 deletions tests/integration/test_writes/test_writes.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,13 @@
# pylint:disable=redefined-outer-name
import math
import os
import random
import time
from datetime import date, datetime, timedelta
from pathlib import Path
from typing import Any, Dict
from urllib.parse import urlparse

import numpy as np
import pandas as pd
import pyarrow as pa
import pyarrow.compute as pc
Expand Down Expand Up @@ -1373,14 +1373,14 @@ def test_delete_threshold(session_catalog: Catalog) -> None:
date_start, date_end = date(2024, 1, 1), date(2024, 2, 1)

# Generate the 'id' column
id_column = np.random.randint(id_min, id_max, num_rows)
id_column = [random.randint(id_min, id_max) for _ in range(num_rows)]

# Generate the 'created_at' column as dates only
date_range = pd.date_range(start=date_start, end=date_end, freq="D") # Daily frequency for dates
created_at_column = np.random.choice(date_range, num_rows) # Convert to string (YYYY-MM-DD format)
date_range = pd.date_range(start=date_start, end=date_end, freq="D").to_list() # Daily frequency for dates
created_at_column = [random.choice(date_range) for _ in range(num_rows)] # Convert to string (YYYY-MM-DD format)

# Generate the 'relevancy_score' column with a peak around 0.1
relevancy_score_column = np.random.beta(a=2, b=20, size=num_rows) # Adjusting parameters to peak around 0.1
relevancy_score_column = [random.betavariate(2, 20) for _ in range(num_rows)] # Adjusting parameters to peak around 0.1

# Create the dataframe
df = pd.DataFrame({"id": id_column, "created_at": created_at_column, "relevancy_score": relevancy_score_column})
Expand All @@ -1403,12 +1403,12 @@ def test_delete_threshold(session_catalog: Catalog) -> None:

@pytest.mark.integration
def test_rewrite_manifest_after_partition_evolution(session_catalog: Catalog) -> None:
np.random.seed(876)
random.seed(876)
N = 1440
d = {
"timestamp": pa.array([datetime(2023, 1, 1, 0, 0, 0) + timedelta(minutes=i) for i in range(N)]),
"category": pa.array([np.random.choice(["A", "B", "C"]) for _ in range(N)]),
"value": pa.array(np.random.normal(size=N)),
"category": pa.array([random.choice(["A", "B", "C"]) for _ in range(N)]),
"value": pa.array([random.gauss(0, 1) for _ in range(N)]),
}
data = pa.Table.from_pydict(d)

Expand Down

0 comments on commit 0cebec4

Please sign in to comment.