diff --git a/.github/workflows/package.yml b/.github/workflows/package.yml index f7b2311..e3051cb 100644 --- a/.github/workflows/package.yml +++ b/.github/workflows/package.yml @@ -55,17 +55,14 @@ jobs: python: 310 platform_id: macosx_x86_64 steps: - - uses: actions/checkout@v3 - - name: Set up Python - uses: actions/setup-python@v4 + - uses: actions/checkout@v4 + - name: Set up QEMU + if: runner.os == 'Linux' + uses: docker/setup-qemu-action@v3 with: - python-version: '3.9' - - name: Install dependencies - run: | - python -m pip install --upgrade pip - python -m pip install setuptools build cibuildwheel - - name: Build wheel - uses: pypa/cibuildwheel@v2.16.2 + platforms: all + - name: Build wheels + uses: pypa/cibuildwheel@v2.22.0 env: CIBW_SKIP: cp36-* cp37-* cp38-* pp* CIBW_ARCHS_LINUX: "auto aarch64" @@ -75,9 +72,9 @@ jobs: output-dir: wheelhouse config-file: pyproject.toml - name: Upload wheels - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: - name: binary-wheels-${{ matrix.platform_id }} + name: cibw-wheels-${{ matrix.os }}-${{ strategy.job-index }} path: wheelhouse/*.whl test: needs: [build-sdist] diff --git a/doc/conf.py b/doc/conf.py index d4f4734..13b5136 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -167,4 +167,4 @@ # Example configuration for intersphinx: refer to the Python standard library. -intersphinx_mapping = {'https://docs.python.org/': None} +intersphinx_mapping = {'python': ('https://docs.python.org/', None)} diff --git a/doc/index.rst b/doc/index.rst index 5ff0499..e61a1e4 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -6,7 +6,7 @@ mashing-pumpkins : m(in|ax)hash =============================== -Flexible-yet-pretty-fast minhash/maxhash-related library for Python >= 3.5. +Flexible-yet-pretty-fast minhash/maxhash-related library for Python > 3.8. .. toctree:: :maxdepth: 2 @@ -233,8 +233,8 @@ function returns a non-numerical only use :class:`MaxSketch`. An other example is when all elements in the input set are :class:`bytes` and the common hashing function SHA1 is wanted. In that case the hashing function would look like follows. Note that hashing function can return non-numerical -values and the hashing function will plainly ignore the :param:`size` and -:param:`buffer` as no sliding window is wanted. +values and the hashing function will plainly ignore parameters `size` and +`buffer` as no sliding window is wanted. .. code-block:: python diff --git a/src/_xxhash.c b/src/_xxhash.c index e2b5878..0464936 100644 --- a/src/_xxhash.c +++ b/src/_xxhash.c @@ -83,6 +83,6 @@ PyInit__xxhash(void) } PyModule_AddIntConstant(m, "DEFAULT_SEED", XXH_DEFAULT_SEED); - + return m; } diff --git a/src/sequence.py b/src/sequence.py index 95c5127..9b41105 100644 --- a/src/sequence.py +++ b/src/sequence.py @@ -6,27 +6,31 @@ def chunkpos_iter(nsize: int, lseq: int, w: int) -> (int, int): """ Iterator of chunk indices. - + This is made to split a long sequence for the purpose of parallel - computing on its constituting ngrams/kmers while not using any - around the split points. - + computing on its constituting ngrams/kmers while ensuring no + duplicates or misses around the split points. + For example, a sequence of length 10 for which we want ngrams/kmers of length 3 can be decomposed into the following - chunks of length 5 would be split into the following 3 chunks: - - |0 1 2 3 4 5 6 7 8 9| - |---------| : : - : |---------| : - : : : |------| - 0 : 5 : : : - 3 : 8 : - 6 | - - - nsize: n in ngram - - lseq: length of sequence to chunk - - w: width of window - + chunks of length 5: + + .. code-block:: text + + |0 1 2 3 4 5 6 7 8 9| + |---------| : : + : |---------| : + : : : |------| + 0 : 5 : : : + 3 : 8 : + 6 |10 + + Depending of the combination of parameters, last chunk may be be shorter + than the desired size `w`. + + :param:nsize: n in ngram + :param:lseq: length of sequence to chunk + :param:w: width of window (that is the desired length/size of the chunk) """ assert nsize <= w