Skip to content

Commit

Permalink
Add CI / CD github workflow (#11)
Browse files Browse the repository at this point in the history
* add pre-commit with black, flake8 and refactor code
* fix bugs and move contrib out of main package
* update makefile and github ci workflow
* bump version
  • Loading branch information
ChrisPappalardo authored Oct 1, 2024
1 parent 949d298 commit d4ee985
Show file tree
Hide file tree
Showing 23 changed files with 596 additions and 488 deletions.
21 changes: 21 additions & 0 deletions .github/actions/install/action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
name: "install"
description: "install requirements"

inputs:
python-version:
required: false
description: "python version"
default: "3.10"
outputs: {}
runs:
using: "composite"
steps:
- uses: actions/setup-python@v4
with:
python-version: ${{inputs.python-version}}
- name: install requirements
run: pip install -r requirements.txt
shell: bash
- name: install package
run: pip install .
shell: bash
33 changes: 33 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
name: ci

on:
pull_request:
push:
branches:
- "**"

jobs:
pre-commit:
runs-on: ubuntu-latest
steps:
- name: Checkout code repository
uses: actions/checkout@v4
- name: Install dependencies
uses: ./.github/actions/install
- name: Run pre-commit
run: pre-commit run --all-files

test:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.10"]
steps:
- name: Checkout code repository
uses: actions/checkout@v4
- name: Install dependencies
uses: ./.github/actions/install
- name: Run unit tests
run: pytest
env:
python-version: ${{ matrix.python-version }}
27 changes: 27 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.0.1
hooks:
- id: check-ast
- id: check-added-large-files
- id: check-merge-conflict
- id: check-case-conflict
- id: check-docstring-first
- id: check-json
- id: check-yaml
- id: debug-statements
- id: end-of-file-fixer
- id: trailing-whitespace
- id: mixed-line-ending
- repo: local
hooks:
- id: black
name: black
entry: black .
language: system
types: [python]
- id: flake8
name: flake8
entry: flake8 eparse tests
language: system
types: [python]
1 change: 0 additions & 1 deletion LICENSE
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,3 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

12 changes: 8 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
.PHONY: clean clean-build clean-pyc clean-test coverage dist docs help install lint lint/flake8 lint/black
.PHONY: clean clean-build clean-pyc clean-test coverage dist docs help install lint lint/black lint/flake8 pre-commit test test-all
.DEFAULT_GOAL := help

define BROWSER_PYSCRIPT
Expand Down Expand Up @@ -47,12 +47,16 @@ clean-test: ## remove test and coverage artifacts
rm -fr htmlcov/
rm -fr .pytest_cache

lint/black: ## check style with black
black --diff eparse tests

lint/flake8: ## check style with flake8
flake8 eparse tests
lint/black: ## check style with black
black --check -S eparse tests

lint: lint/flake8 lint/black ## check style
lint: lint/black lint/flake8 ## check style

pre-commit: ## run pre-commit on all files
pre-commit run --all-files

test: ## run tests quickly with the default Python
pytest
Expand Down
39 changes: 39 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,45 @@ If you would like to use eparse to partition xls[x] files alongside unstructured
Valid `eparse_mode` settings are available in `eparse.contrib.unstructured.xlsx._eparse_modes`.


Development
===========
Clone the repo:

.. code-block::
$ git clone https://github.com/ChrisPappalardo/eparse.git
Install devtest requirements and the package in editable mode:

.. code-block::
$ pip install -r requirements.txt
$ pip install -e .
Run unit tests:

.. code-block::
$ make test
Run the linter:

.. code-block::
$ make lint
Install pre-commit:

.. code-block::
$ pre-commit install
Run pre-commit:

.. code-block::
$
Contributing
============
As an open-source project, contributions are always welcome. Please see `Contributing <CONTRIBUTING.rst>`_ for more information.
Expand Down
48 changes: 22 additions & 26 deletions conftest.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,21 @@
# -*- coding: utf-8 -*-

'''
"""
unit test fixtures
'''

import pytest
"""

import pandas as pd
import pytest
from peewee import SqliteDatabase

from eparse.interfaces import (
DATABASE,
ExcelParse,
)
from eparse.interfaces import DATABASE, ExcelParse


@pytest.fixture
def ctx():
'''
"""
click style ctx object fixture
'''
"""

class Obj:
obj = {}
Expand All @@ -29,31 +25,31 @@ class Obj:

@pytest.fixture
def data():
'''
"""
serialized data fixture
'''
"""

return dict(
row=0,
column=0,
value='test',
type='test',
c_header='test',
r_header='test',
excel_RC='A1',
name='test',
sheet='test',
f_name='test',
value="test",
type="test",
c_header="test",
r_header="test",
excel_RC="A1",
name="test",
sheet="test",
f_name="test",
)


@pytest.fixture
def sqlite3_db(data):
'''
"""
sqlite3 in-memory database fixture
'''
"""

db = ':memory:'
db = ":memory:"
DATABASE.initialize(SqliteDatabase(db))
DATABASE.connect()
DATABASE.create_tables([ExcelParse])
Expand All @@ -65,12 +61,12 @@ def sqlite3_db(data):

@pytest.fixture
def xlsx():
'''
"""
excel file fixture
'''
"""

return pd.read_excel(
'tests/eparse_unit_test_data.xlsx',
"tests/eparse_unit_test_data.xlsx",
header=None,
index_col=None,
)
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,7 @@
from tempfile import SpooledTemporaryFile
from typing import IO, BinaryIO, List, Optional, Union, cast

from eparse.core import (
df_serialize_table,
get_df_from_file,
get_table_digest,
)
import lxml.html

from unstructured.documents.elements import (
DataSourceMetadata,
Element,
Expand All @@ -25,12 +19,13 @@
spooled_to_bytes_io_if_needed,
)

from eparse.core import df_serialize_table, get_df_from_file, get_table_digest

_eparse_modes = (
'eparse',
'digest',
'table-digest',
'unstructured',
"eparse",
"digest",
"table-digest",
"unstructured",
)


Expand Down
Loading

0 comments on commit d4ee985

Please sign in to comment.