Skip to content

Commit

Permalink
fix: add missing pipeline methods and tests in parallelize
Browse files Browse the repository at this point in the history
  • Loading branch information
percevalw committed Nov 2, 2023
1 parent 5ec4c57 commit 43cee30
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 175 deletions.
28 changes: 27 additions & 1 deletion edsnlp/core/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,8 @@ def pipeline(self) -> List[Tuple[str, Pipe]]:
def pipe_names(self) -> List[str]:
return FrozenList([name for name, _ in self._components])

component_names = pipe_names

def get_pipe(self, name: str) -> Pipe:
"""
Get a component by its name.
Expand Down Expand Up @@ -204,6 +206,9 @@ def create_pipe(
def add_pipe(
self,
factory: Union[str, Pipe],
first: bool = False,
before: Optional[str] = None,
after: Optional[str] = None,
name: Optional[str] = None,
config: Optional[Dict[str, Any]] = None,
) -> Pipe:
Expand All @@ -217,6 +222,15 @@ def add_pipe(
name: Optional[str]
The name of the component. If not provided, the name of the component
will be used if it has one (.name), otherwise the factory name will be used.
first: bool
Whether to add the component to the beginning of the pipeline. This argument
is mutually exclusive with `before` and `after`.
before: Optional[str]
The name of the component to add the new component before. This argument is
mutually exclusive with `after` and `first`.
after: Optional[str]
The name of the component to add the new component after. This argument is
mutually exclusive with `before` and `first`.
config: Dict[str, Any]
The arguments to pass to the component factory.
Expand Down Expand Up @@ -251,7 +265,19 @@ def add_pipe(
"The component does not have a name, so you must provide one",
)
pipe.name = name
self._components.append((name, pipe))
assert sum([before is not None, after is not None, first]) <= 1, (
"You can only use one of before, after, or first",
)
insertion_idx = (
0
if first
else self.pipe_names.index(before)
if before is not None
else self.pipe_names.index(after) + 1
if after is not None
else len(self._components)
)
self._components.insert(insertion_idx, (name, pipe))
return pipe

def get_pipe_meta(self, name: str) -> FactoryMeta:
Expand Down
169 changes: 0 additions & 169 deletions tests/pipelines/trainable/test_span_classifier.py

This file was deleted.

7 changes: 2 additions & 5 deletions tests/processing/test_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import databricks.koalas # noqa F401
import pandas as pd
import pytest
import spacy
from pyspark.sql import types as T
from pyspark.sql.session import SparkSession

Expand Down Expand Up @@ -58,14 +57,12 @@ def note(module: DataFrameModules):


@pytest.fixture
def model(lang):
def model(blank_nlp):
# Creates the spaCy instance
nlp = spacy.blank(lang)
nlp = blank_nlp

# Normalisation of accents, case and other special characters
nlp.add_pipe("eds.normalizer")
# Detecting end of lines
nlp.add_pipe("eds.sentences")

# Extraction of named entities
nlp.add_pipe(
Expand Down

0 comments on commit 43cee30

Please sign in to comment.