Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Re-enabled linting in build process #2586

Merged
merged 6 commits into from
Sep 8, 2023
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 15 additions & 15 deletions .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,9 @@ name: Lint
on:
push:
branches:
- master
- 'main'
pull_request:
branches:
- '**'


concurrency:
cancel-in-progress: true
Expand All @@ -17,36 +16,37 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Set up Python 3.10
uses: actions/setup-python@v3.1.4
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.10'
- uses: actions/cache@v2
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-${{ hashFiles('deeplake/requirements/*.txt') }}
restore-keys: |
${{ runner.os }}-pip-
python-version: "3.10"
cache: pip
cache-dependency-path: deeplake/requirements/*.txt

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r deeplake/requirements/common.txt
pip install -r deeplake/requirements/tests.txt
pip install -r deeplake/requirements/plugins.txt

- name: Install deeplake
run: pip install -e .

- name: Check formatting with black
continue-on-error: true
if: always()
run: |
black --version
black --check .

- name: Lint docstrings with darglint
continue-on-error: true
if: always()
run: |
darglint --version
darglint .

- name: Check typing with mypy
continue-on-error: true
if: always()
run: |
mypy --version
mypy .
6 changes: 3 additions & 3 deletions deeplake/auto/structured/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from deeplake.util.dataset import sanitize_tensor_name

from collections import defaultdict
from typing import DefaultDict, List, Union, Optional
from typing import DefaultDict, List, Union, Optional, Dict
from deeplake.core.sample import Sample
from deeplake.core.linked_sample import LinkedSample
import pathlib
Expand Down Expand Up @@ -53,7 +53,7 @@ def _initialize_params(self, column_params):
column_params[key] = {"name": sanitize_tensor_name(key)}
self.column_params = column_params

def _get_most_frequent_image_extension(self, fn_iterator):
def _get_most_frequent_image_extension(self, fn_iterator: List[str]):
# TODO: Make this generic and work for any htype that requires compression

if len(fn_iterator) == 0:
Expand All @@ -77,7 +77,7 @@ def _get_most_frequent_image_extension(self, fn_iterator):
)
return most_frequent_image_extension

def _parse_tensor_params(self, key, inspect_limit=1000):
def _parse_tensor_params(self, key: str, inspect_limit: int = 1000):
"""Parse the tensor parameters for a column. Required parameters that are not specified will be inferred by inspecting up to 'inspect_limit' rows in the data."""

tensor_params: Dict = self.column_params[key]
Expand Down
16 changes: 11 additions & 5 deletions deeplake/core/chunk_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from deeplake.client.log import logger
import deeplake
import numpy as np
from tqdm import tqdm # type: ignore
from tqdm import tqdm
from typing import (
Any,
Callable,
Expand Down Expand Up @@ -1739,7 +1739,7 @@
buffer = chunk.memoryview_data
if not buffer:
return b""
if self.is_sequence:
if self.is_sequence and self.sequence_encoder is not None:
start_idx, end_idx = self.sequence_encoder[global_sample_index]
end_idx -= 1
start_idx, end_idx = map(
Expand Down Expand Up @@ -2211,7 +2211,7 @@
for chunk in self.list_all_chunks()
]

def list_orphaned_chunks(self, storage):
def list_orphaned_chunks(self, storage: StorageProvider) -> List[str]:
"""Return paths for orphaned chunks (chunks what are not linked to the `current_version`)"""

commit_id = self.commit_id
Expand Down Expand Up @@ -2242,7 +2242,7 @@
sample_id: Optional[int] = None,
):
if global_sample_index is None:
if self.is_sequence:
if self.is_sequence and self.sequence_encoder is not None:
global_sample_index = self.sequence_encoder.num_samples - 1
else:
global_sample_index = self.num_samples - 1
Expand All @@ -2263,7 +2263,7 @@
link_callback(global_sample_index)

self.commit_diff.pop(global_sample_index, sample_id)
if self.is_sequence:
if self.is_sequence and self.sequence_encoder is not None:
# pop in reverse order else indices get shifted
for idx in reversed(range(*self.sequence_encoder[global_sample_index])):
self.pop_item(idx)
Expand Down Expand Up @@ -2431,6 +2431,7 @@
return self.get_empty_sample()
if index.subscriptable_at(0) and index.subscriptable_at(1):
item_lengths = []
assert self.sequence_encoder is not None
for i in index.values[0].indices(self._sequence_length):
item_length = index.length_at(
1, -int(np.subtract(*self.sequence_encoder[i]))
Expand Down Expand Up @@ -2475,6 +2476,7 @@
for j in y.indices(_item_length):
yield i * _item_length + j

assert self.sequence_encoder is not None
idx0_gen.__len__ = ( # type: ignore
(
lambda: sum(
Expand Down Expand Up @@ -2693,6 +2695,7 @@
sample_shapes = np.zeros((num_samples, sample_ndim), dtype=np.int32)

if flatten:
assert self.sequence_encoder is not None
# fill sample shapes with sequence item shapes, no nesting
start, end = self.sequence_encoder[idx]
length = end - start
Expand Down Expand Up @@ -2993,6 +2996,7 @@
chunk_engine._transform_callback(vs, flat)
except Exception:
for k, num_samples in updated_tensors.items():
assert self._all_chunk_engines is not None

Check warning on line 2999 in deeplake/core/chunk_engine.py

View check run for this annotation

Codecov / codecov/patch

deeplake/core/chunk_engine.py#L2999

Added line #L2999 was not covered by tests
chunk_engine = self._all_chunk_engines[k]
num_samples_added = chunk_engine.tensor_length - num_samples
for _ in range(num_samples_added):
Expand All @@ -3009,6 +3013,8 @@

if flat_links:
seq_enc = self.sequence_encoder
assert seq_enc is not None
assert self._all_chunk_engines is not None

Check warning on line 3017 in deeplake/core/chunk_engine.py

View check run for this annotation

Codecov / codecov/patch

deeplake/core/chunk_engine.py#L3016-L3017

Added lines #L3016 - L3017 were not covered by tests
for link in flat_links:
link_chunk_engine = self._all_chunk_engines[link]
for idx in reversed(range(*seq_enc[index])):
Expand Down
2 changes: 1 addition & 1 deletion deeplake/core/dataset/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import pathlib
import numpy as np
from time import time, sleep
from tqdm import tqdm # type: ignore
from tqdm import tqdm

import deeplake
from deeplake.core.index.index import IndexEntry
Expand Down
4 changes: 3 additions & 1 deletion deeplake/core/linked_chunk_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,9 @@ def get_video_sample(self, global_sample_index, index, decompress=True):
except Exception as e:
raise GetDataFromLinkError(path)

def get_full_tiled_sample(self, global_sample_index, fetch_chunks=False):
def get_full_tiled_sample(
self, global_sample_index: int, fetch_chunks: bool = False
):
tile_enc = self.tile_encoder
shape = tile_enc.get_sample_shape(global_sample_index)
tile_shape = tile_enc.get_tile_shape(global_sample_index)
Expand Down
6 changes: 5 additions & 1 deletion deeplake/core/meta/encode/tile.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,11 @@


class TileEncoder(DeepLakeMemoryObject):
def __init__(self, entries=None, version=None):
def __init__(
self,
entries: Optional[Dict[int, Tuple[Tuple[int, ...], Tuple[int, ...]]]] = None,
version: Optional[str] = None,
):
self.is_dirty = False
self.entries: Dict[int, Tuple[Tuple[int, ...], Tuple[int, ...]]] = entries or {}
self.version = version or deeplake.__version__
Expand Down
2 changes: 1 addition & 1 deletion deeplake/core/partial_sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ def __init__(
self,
sample_shape: Tuple[int, ...],
tile_shape: Optional[Tuple[int, ...]] = None,
dtype: Union[str, np.dtype] = np.dtype("uint8"),
dtype: Optional[Union[str, np.dtype]] = np.dtype("uint8"),
):
self.sample_shape = sample_shape
self.tile_shape = tile_shape
Expand Down
4 changes: 2 additions & 2 deletions deeplake/core/query/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,9 +288,9 @@ def filter_inplace(
vds_thread = _get_vds_thread(vds, vds_queue, num_samples)
vds_thread.start()
if progressbar:
from tqdm import tqdm # type: ignore
from tqdm import tqdm

it = tqdm(it, total=num_samples)
it = tqdm(it, total=num_samples) # type: ignore

query_id = hash_inputs(dataset.path, dataset.pending_commit_id, query_text)

Expand Down
3 changes: 3 additions & 0 deletions deeplake/core/tensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -1112,6 +1112,7 @@

if flat_links:
seq_enc = self.chunk_engine.sequence_encoder
assert seq_enc is not None
for link in flat_links:
link_tensor = self.dataset[rev_tensor_names.get(link)]
for idx in reversed(range(*seq_enc[global_sample_index])):
Expand Down Expand Up @@ -1159,6 +1160,7 @@
if self.is_sequence:

def get_sample_shape(global_sample_index: int):
assert self.chunk_engine.sequence_encoder is not None
seq_pos = slice(
*self.chunk_engine.sequence_encoder[global_sample_index]
)
Expand All @@ -1179,6 +1181,7 @@

def _get_sample_info_at_index(self, global_sample_index: int, sample_info_tensor):
if self.is_sequence:
assert self.chunk_engine.sequence_encoder is not None

Check warning on line 1184 in deeplake/core/tensor.py

View check run for this annotation

Codecov / codecov/patch

deeplake/core/tensor.py#L1184

Added line #L1184 was not covered by tests
return [
sample_info_tensor[i].data()
for i in range(*self.chunk_engine.sequence_encoder[global_sample_index])
Expand Down
2 changes: 1 addition & 1 deletion deeplake/core/vectorstore/deeplake_vectorstore.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,7 @@ def add(

def search(
self,
embedding_data: Union[str, List[str]] = None,
embedding_data: Union[str, List[str], None] = None,
embedding_function: Optional[Callable] = None,
embedding: Optional[Union[List[float], np.ndarray]] = None,
k: int = 4,
Expand Down
8 changes: 4 additions & 4 deletions deeplake/core/vectorstore/vector_search/dataset/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,7 @@ def extend(
embedding_function: List[Callable],
embedding_data: List[Any],
embedding_tensor: Union[str, List[str]],
processed_tensors: Dict[str, List[Any]],
processed_tensors: Dict[str, Union[List[Any], np.ndarray]],
dataset: deeplake.core.dataset.Dataset,
):
"""
Expand Down Expand Up @@ -437,14 +437,14 @@ def extend(
if diff > 0:
time.sleep(diff)
try:
embedded_data = np.vstack(embedded_data).astype(dtype=np.float32)
return_embedded_data = np.vstack(embedded_data).astype(dtype=np.float32)
except ValueError:
raise IncorrectEmbeddingShapeError()

if len(embedded_data) == 0:
if len(return_embedded_data) == 0:
raise ValueError("embedding function returned empty list")

processed_tensors[tensor] = embedded_data
processed_tensors[tensor] = return_embedded_data

dataset.extend(processed_tensors)

Expand Down
2 changes: 1 addition & 1 deletion deeplake/core/vectorstore/vector_search/filter/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def attribute_based_filtering_tql(
val_str = (
f"'{filter[tensor]}'"
if isinstance(filter[tensor], str)
or isinstance(filter[tensor], np._str)
or isinstance(filter[tensor], np.str_)
else f"{filter[tensor]}"
)
tql_filter += f"{tensor} == {val_str} and "
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def search(
else:
if not INDRA_INSTALLED:
raise raise_indra_installation_error(
indra_import_error=False
indra_import_error=None
) # pragma: no cover
return_data = {}

Expand Down
3 changes: 2 additions & 1 deletion deeplake/core/version_control/dataset_diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from deeplake.util.keys import get_dataset_diff_key
import typing
from collections import OrderedDict
import deeplake.core.dataset


class DatasetDiff(DeepLakeMemoryObject):
Expand Down Expand Up @@ -118,7 +119,7 @@ def tensor_deleted(self, name):
self.is_dirty = True


def load_dataset_diff(dataset):
def load_dataset_diff(dataset: "deeplake.core.dataset.Dataset"):
storage: LRUCache = dataset.storage
path = get_dataset_diff_key(dataset.version_state["commit_id"])
try:
Expand Down
6 changes: 5 additions & 1 deletion deeplake/enterprise/convert_to_libdeeplake.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from deeplake import Dataset

from deeplake.core.storage.gcs import GCSProvider
from deeplake.enterprise.util import raise_indra_installation_error # type: ignore
from deeplake.core.storage import S3Provider
Expand Down Expand Up @@ -143,7 +145,7 @@ def _get_indra_ds_from_s3_provider(
)


def dataset_to_libdeeplake(hub2_dataset):
def dataset_to_libdeeplake(hub2_dataset: Dataset):
"""Convert a hub 2.x dataset object to a libdeeplake dataset object."""
try_flushing(hub2_dataset)
api = import_indra_api()
Expand Down Expand Up @@ -208,6 +210,8 @@ def dataset_to_libdeeplake(hub2_dataset):
hub2_dataset.libdeeplake_dataset = libdeeplake_dataset
else:
libdeeplake_dataset = hub2_dataset.libdeeplake_dataset

assert libdeeplake_dataset is not None
commit_id = hub2_dataset.pending_commit_id
libdeeplake_dataset.checkout(commit_id)
slice_ = hub2_dataset.index.values[0].value
Expand Down
2 changes: 1 addition & 1 deletion deeplake/enterprise/test_pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -758,7 +758,7 @@ def test_batch_sampler_attribute(local_auth_ds):
@pytest.mark.slow
@pytest.mark.flaky
def test_pil_decode_method(local_auth_ds):
from indra.pytorch.exceptions import CollateExceptionWrapper
from indra.pytorch.exceptions import CollateExceptionWrapper # type: ignore

with local_auth_ds as ds:
ds.create_tensor("x", htype="image", sample_compression="jpeg")
Expand Down
2 changes: 1 addition & 1 deletion deeplake/integrations/huggingface/huggingface.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import posixpath
import deeplake
from typing import Optional
from tqdm import tqdm # type: ignore
from tqdm import tqdm
from deeplake.util.bugout_reporter import feature_report_path, deeplake_reporter


Expand Down
2 changes: 1 addition & 1 deletion deeplake/integrations/mmdet/mmdet_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
import json
import mmcv # type: ignore
import math
from tqdm import tqdm # type: ignore
from tqdm import tqdm

Check warning on line 25 in deeplake/integrations/mmdet/mmdet_utils.py

View check run for this annotation

Codecov / codecov/patch

deeplake/integrations/mmdet/mmdet_utils.py#L25

Added line #L25 was not covered by tests


def _isArrayLike(obj):
Expand Down
2 changes: 1 addition & 1 deletion deeplake/integrations/pytorch/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ def __init__(
streaming.list_blocks()
)

def __iter__(self):
def __iter__(self: "TorchDataset"):
worker_info = torch.utils.data.get_worker_info()
schedule: Schedule = self.schedules[0]

Expand Down
2 changes: 1 addition & 1 deletion deeplake/integrations/pytorch/shuffle_buffer.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

from PIL import Image # type: ignore
from io import BytesIO
from tqdm import tqdm # type: ignore
from tqdm import tqdm
from deeplake.util.warnings import always_warn
from deeplake.constants import MB
import deeplake
Expand Down
Loading
Loading