Skip to content

Commit

Permalink
Impala (#96)
Browse files Browse the repository at this point in the history
* First draft to include impala in ci.

* Use docker-compose.

* Remove postgres.

* Detach docker-compose.

* Return early for debugging.

* Debug db connection.

* Remove redundant statement.

* Wait for docker compose.

* Use environment variable instead of hardcoded reference.

* Remove wait flag again.

* Fix impala incompatibilities.

* Skip a whole lot of tests.

* Add tmate for debugging.

* Trim down ci for debugging.

* Use tmate version 3.13.

* Remove tmate.

* Add --wait flag.

* Reintroduce tmate.

* Remove tmate.

* Add tmate v3.

* Change docker compose invocation.

* Wait for specific service.

* Remove tmate again.

* Run integration tests only.

* Fix column capitalization integration tests.

* Reuse existing table.

* Enable parallelization.

* Disable stats tests for impala.

* Reintroduce tmate.

* Split up test execution.

* Remove concurrency.

* Add split up integration tests.

* Add further integration tests.

* Reinsert ordinary integration tests.

* Fix date difference query.

* Clean up tests.

* Fix redundant clause.

* Add impala entry to CHANGELOG.
  • Loading branch information
kklein authored Jan 17, 2023
1 parent 675043a commit 04e747f
Show file tree
Hide file tree
Showing 11 changed files with 427 additions and 18 deletions.
263 changes: 262 additions & 1 deletion .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ jobs:
- name: Run pre-commit checks
uses: pre-commit/[email protected]


linux:
name: "Linux - unit tests - Python ${{ matrix.PYTHON_VERSION }}"
runs-on: ubuntu-latest
Expand Down Expand Up @@ -228,3 +227,265 @@ jobs:
uses: codecov/[email protected]
with:
file: ./coverage.xml

linux-integration_tests-impala-column-pt1:
name: "Linux - integration tests - Python ${{ matrix.PYTHON_VERSION }} - Impala - pt1"
runs-on: ubuntu-20.04
env:
CI: True
strategy:
fail-fast: false
matrix:
PYTHON_VERSION: [ '3.8']

steps:
- name: Checkout branch
uses: actions/checkout@v3
with:
ref: ${{ github.head_ref }}
- name: Fetch full git history
run: git fetch --prune --unshallow
- uses: conda-incubator/setup-miniconda@v2
with:
python-version: ${{ matrix.PYTHON_VERSION }}
miniforge-variant: Mambaforge
miniforge-version: 4.11.0-2
use-mamba: true
environment-file: environment.yml
activate-environment: datajudge
- name: Set up container
run: docker compose up --wait impala
- name: Run Integration Tests
shell: bash -l {0}
run: |
flit install -s
pytest --cov=datajudge --cov-report=xml --cov-append --backend=impala tests/integration/test_column_capitalization.py
- name: Generate code coverage report
uses: codecov/[email protected]
with:
file: ./coverage.xml


linux-integration_tests-impala-column-pt2:
name: "Linux - integration tests - Python ${{ matrix.PYTHON_VERSION }} - Impala - pt2"
runs-on: ubuntu-20.04
env:
CI: True
strategy:
fail-fast: false
matrix:
PYTHON_VERSION: [ '3.8']

steps:
- name: Checkout branch
uses: actions/checkout@v3
with:
ref: ${{ github.head_ref }}
- name: Fetch full git history
run: git fetch --prune --unshallow
- uses: conda-incubator/setup-miniconda@v2
with:
python-version: ${{ matrix.PYTHON_VERSION }}
miniforge-variant: Mambaforge
miniforge-version: 4.11.0-2
use-mamba: true
environment-file: environment.yml
activate-environment: datajudge
- name: Set up container
run: docker compose up --wait impala
- name: Run Integration Tests
shell: bash -l {0}
run: |
flit install -s
pytest --cov=datajudge --cov-report=xml --cov-append --backend=impala tests/integration/test_data_source.py
- name: Generate code coverage report
uses: codecov/[email protected]
with:
file: ./coverage.xml

linux-integration_tests-impala-column-pt3:
name: "Linux - integration tests - Python ${{ matrix.PYTHON_VERSION }} - Impala - pt3"
runs-on: ubuntu-20.04
env:
CI: True
strategy:
fail-fast: false
matrix:
PYTHON_VERSION: [ '3.8']

steps:
- name: Checkout branch
uses: actions/checkout@v3
with:
ref: ${{ github.head_ref }}
- name: Fetch full git history
run: git fetch --prune --unshallow
- uses: conda-incubator/setup-miniconda@v2
with:
python-version: ${{ matrix.PYTHON_VERSION }}
miniforge-variant: Mambaforge
miniforge-version: 4.11.0-2
use-mamba: true
environment-file: environment.yml
activate-environment: datajudge
- name: Set up container
run: docker compose up --wait impala
- name: Run Integration Tests
shell: bash -l {0}
run: |
flit install -s
pytest --cov=datajudge --cov-report=xml --cov-append --backend=impala tests/integration/test_integration.py -k row
- name: Generate code coverage report
uses: codecov/[email protected]
with:
file: ./coverage.xml


linux-integration_tests-impala-column-pt4:
name: "Linux - integration tests - Python ${{ matrix.PYTHON_VERSION }} - Impala - pt4"
runs-on: ubuntu-20.04
env:
CI: True
strategy:
fail-fast: false
matrix:
PYTHON_VERSION: [ '3.8']

steps:
- name: Checkout branch
uses: actions/checkout@v3
with:
ref: ${{ github.head_ref }}
- name: Fetch full git history
run: git fetch --prune --unshallow
- uses: conda-incubator/setup-miniconda@v2
with:
python-version: ${{ matrix.PYTHON_VERSION }}
miniforge-variant: Mambaforge
miniforge-version: 4.11.0-2
use-mamba: true
environment-file: environment.yml
activate-environment: datajudge
- name: Set up container
run: docker compose up --wait impala
- name: Run Integration Tests
shell: bash -l {0}
run: |
flit install -s
pytest --cov=datajudge --cov-report=xml --cov-append --backend=impala tests/integration/test_integration.py -k uniques
- name: Generate code coverage report
uses: codecov/[email protected]
with:
file: ./coverage.xml

linux-integration_tests-impala-column-pt5:
name: "Linux - integration tests - Python ${{ matrix.PYTHON_VERSION }} - Impala - pt5"
runs-on: ubuntu-20.04
env:
CI: True
strategy:
fail-fast: false
matrix:
PYTHON_VERSION: [ '3.8']

steps:
- name: Checkout branch
uses: actions/checkout@v3
with:
ref: ${{ github.head_ref }}
- name: Fetch full git history
run: git fetch --prune --unshallow
- uses: conda-incubator/setup-miniconda@v2
with:
python-version: ${{ matrix.PYTHON_VERSION }}
miniforge-variant: Mambaforge
miniforge-version: 4.11.0-2
use-mamba: true
environment-file: environment.yml
activate-environment: datajudge
- name: Set up container
run: docker compose up --wait impala
- name: Run Integration Tests
shell: bash -l {0}
run: |
flit install -s
pytest --cov=datajudge --cov-report=xml --cov-append --backend=impala tests/integration/test_integration.py -k date
- name: Generate code coverage report
uses: codecov/[email protected]
with:
file: ./coverage.xml

linux-integration_tests-impala-column-pt6:
name: "Linux - integration tests - Python ${{ matrix.PYTHON_VERSION }} - Impala - pt6"
runs-on: ubuntu-20.04
env:
CI: True
strategy:
fail-fast: false
matrix:
PYTHON_VERSION: [ '3.8']

steps:
- name: Checkout branch
uses: actions/checkout@v3
with:
ref: ${{ github.head_ref }}
- name: Fetch full git history
run: git fetch --prune --unshallow
- uses: conda-incubator/setup-miniconda@v2
with:
python-version: ${{ matrix.PYTHON_VERSION }}
miniforge-variant: Mambaforge
miniforge-version: 4.11.0-2
use-mamba: true
environment-file: environment.yml
activate-environment: datajudge
- name: Set up container
run: docker compose up --wait impala
- name: Run Integration Tests
shell: bash -l {0}
run: |
flit install -s
pytest --cov=datajudge --cov-report=xml --cov-append --backend=impala tests/integration/test_integration.py -k varchar
- name: Generate code coverage report
uses: codecov/[email protected]
with:
file: ./coverage.xml


linux-integration_tests-impala-column-pt7:
name: "Linux - integration tests - Python ${{ matrix.PYTHON_VERSION }} - Impala - pt7"
runs-on: ubuntu-20.04
env:
CI: True
strategy:
fail-fast: false
matrix:
PYTHON_VERSION: [ '3.8']

steps:
- name: Checkout branch
uses: actions/checkout@v3
with:
ref: ${{ github.head_ref }}
- name: Fetch full git history
run: git fetch --prune --unshallow
- uses: conda-incubator/setup-miniconda@v2
with:
python-version: ${{ matrix.PYTHON_VERSION }}
miniforge-variant: Mambaforge
miniforge-version: 4.11.0-2
use-mamba: true
environment-file: environment.yml
activate-environment: datajudge
- name: Set up container
run: docker compose up --wait impala
- name: Run Integration Tests
shell: bash -l {0}
run: |
flit install -s
pytest --cov=datajudge --cov-report=xml --cov-append --backend=impala tests/integration/test_integration.py -k numeric
- name: Generate code coverage report
uses: codecov/[email protected]
with:
file: ./coverage.xml
1 change: 1 addition & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ Changelog
- Implement :meth:`~datajudge.BetweenRequirement.add_column_type_constraint`. Previously, only the ``WithinRequirement`` method existed.
- Implemented an option ``infer_pk`` to automatically retrieve and primary key definition as part of :meth:`datajudge.WithinRequirement.add_uniqueness_constraint`.
- Added a ``name`` parameter to all ``add_x_constraint`` methods of ``WithinRequirement`` and ``BetweenRequirement``. This will give pytest test a custom name.
- Added preliminary support for Impala.


1.2.0 - 2022.10.21
Expand Down
64 changes: 64 additions & 0 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# Taken from
# https://github.com/ibis-project/ibis/blob/master/docker-compose.yml
version: "3.4"
services:
impala:
depends_on:
- impala-postgres
- kudu
environment:
PGPASSWORD: postgres
healthcheck:
interval: 30s
retries: 20
test:
- CMD-SHELL
- nc -z 127.0.0.1 21050 && nc -z 127.0.0.1 50070
timeout: 10s
hostname: localhost
image: ibisproject/impala:latest
ports:
- 21050:21050
networks:
- impala
impala-postgres:
user: postgres
hostname: postgres
environment:
POSTGRES_PASSWORD: postgres
healthcheck:
interval: 10s
retries: 3
test:
- CMD
- pg_isready
timeout: 5s
image: postgres:13.9-alpine
networks:
- impala
kudu:
cap_add:
- SYS_TIME
depends_on:
- kudu-tserver
environment:
KUDU_MASTER: "true"
image: ibisproject/kudu:latest
ports:
- 7051:7051
- 8051:8051
networks:
- impala
kudu-tserver:
cap_add:
- SYS_TIME
environment:
KUDU_MASTER: "false"
image: ibisproject/kudu:latest
ports:
- 7050:7050
- 8050:8050
networks:
- impala
networks:
impala:
1 change: 1 addition & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,4 @@ dependencies:
- flit-core
- flit
- sphinx-autodoc-typehints
- impyla
2 changes: 2 additions & 0 deletions src/datajudge/constraints/miscs.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ def __init__(self, ref, primary_keys: List[str], name: str = None):
def retrieve(
self, engine: sa.engine.Engine, ref: DataReference
) -> Tuple[Set[str], OptionalSelections]:
if db_access.is_impala(engine):
raise NotImplementedError("Primary key retrieval does not work for Impala.")
values, selections = db_access.get_primary_keys(engine, self.ref)
return set(values), selections

Expand Down
2 changes: 2 additions & 0 deletions src/datajudge/constraints/row.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ def __init__(
self.max_missing_fraction_getter = max_missing_fraction_getter

def test(self, engine: sa.engine.Engine) -> TestResult:
if db_access.is_impala(engine):
raise NotImplementedError("Currently not implemented for impala.")
self.max_missing_fraction = self.max_missing_fraction_getter(engine)
self.ref1_minus_ref2_sample, _ = db_access.get_row_difference_sample(
engine, self.ref, self.ref2
Expand Down
Loading

0 comments on commit 04e747f

Please sign in to comment.