From cf1f019b2a6f5991b37fd2535876ff78fc931226 Mon Sep 17 00:00:00 2001 From: duncan Date: Wed, 31 Aug 2022 14:43:59 +0100 Subject: [PATCH] add python 3.8 support (#959) --- .circleci/config.yml | 14 ++++++++++++++ CHANGES.md | 1 + README.md | 4 ++-- arctic/date/_util.py | 13 ++++++++++++- arctic/store/_pickle_store.py | 3 +-- setup.py | 1 + tests/unit/date/test_util.py | 20 ++++++++++++++++++++ tests/unit/store/test_pickle_store.py | 9 +++++++-- 8 files changed, 58 insertions(+), 7 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index c7209ffb2..4bc55a55f 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -135,6 +135,19 @@ jobs: docker: - image: cimg/python:3.7-node <<: *defaults + build_3_8: + environment: + PYTHON_VERSION: "3_8" + CIRCLE_ARTIFACTS: /tmp/circleci-artifacts/3_8 + CIRCLE_TEST_REPORTS: /tmp/circleci-test-results/3_8 + VERSION: $VERSION + #PANDOC_RELEASES_URL: https://github.com/jgm/pandoc/releases + #YARN_STATIC_DIR: notebooker/web/static/ + IMAGE_NAME: mangroup/arctic + working_directory: ~/arctic_3_8 + docker: + - image: cimg/python:3.8-node + <<: *defaults publish-github-release: docker: - image: cibuilds/github:0.13 @@ -160,6 +173,7 @@ workflows: jobs: - build_3_6 - build_3_7 + - build_3_8 # do not publish #- publish-github-release: diff --git a/CHANGES.md b/CHANGES.md index 3a530eacc..15a0d9eed 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -2,6 +2,7 @@ ### 1.80.5 * Feature: #950 remove all traces of python 2.7 and six package + * Feature: #959 add python 3.8 support ### 1.80.4 (2022-01-25) * Bugfix: #940 fix rows per chunk causing divide by zero diff --git a/README.md b/README.md index c252e41f0..37fd329bb 100644 --- a/README.md +++ b/README.md @@ -3,8 +3,8 @@ [![Documentation Status](https://readthedocs.org/projects/arctic/badge/?version=latest)](https://arctic.readthedocs.io/en/latest/?badge=latest) [![CircleCI](https://circleci.com/gh/man-group/arctic/tree/master.svg?style=shield)](https://app.circleci.com/pipelines/github/man-group/arctic?branch=master) -[![PyPI](https://img.shields.io/pypi/v/arctic)](https://pypi.org/project/arctic) -[![Python](https://img.shields.io/badge/Python-3.6|3.7-green.svg)](https://github.com/man-group/arctic) +[![PyPI](https://img.shields.io/pypi/v/arctic)](https://pypi.org/project/arctic/) +[![Python](https://img.shields.io/badge/Python-3.6|3.7|3.8-green.svg)](https://github.com/man-group/arctic) [![Join the chat at https://gitter.im/man-group/arctic](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/man-group/arctic?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) Arctic is a high performance datastore for numeric data. It supports [Pandas](http://pandas.pydata.org/), diff --git a/arctic/date/_util.py b/arctic/date/_util.py index 70e837101..3e29bcc05 100644 --- a/arctic/date/_util.py +++ b/arctic/date/_util.py @@ -3,6 +3,8 @@ import sys from datetime import timedelta +import pandas as pd + from ._daterange import DateRange from ._generalslice import OPEN_OPEN, CLOSED_CLOSED, OPEN_CLOSED, CLOSED_OPEN from ._mktz import mktz @@ -165,7 +167,16 @@ def datetime_to_ms(d): """Convert a Python datetime object to a millisecond epoch (UTC) time value.""" try: millisecond = d.microsecond // 1000 - return calendar.timegm(_add_tzone(d).utctimetuple()) * 1000 + millisecond + + # python3.8 workaround https://github.com/pandas-dev/pandas/issues/32174 + if sys.version_info < (3, 8, 0): + return calendar.timegm(_add_tzone(d).utctimetuple()) * 1000 + millisecond + else: + tmp = _add_tzone(d) + if isinstance(tmp, pd.Timestamp): + return calendar.timegm(tmp.to_pydatetime().utctimetuple()) * 1000 + millisecond + else: + return calendar.timegm(tmp.utctimetuple()) * 1000 + millisecond except AttributeError: raise TypeError('expect Python datetime object, not %s' % type(d)) diff --git a/arctic/store/_pickle_store.py b/arctic/store/_pickle_store.py index 01642d87e..b72e4f9f2 100644 --- a/arctic/store/_pickle_store.py +++ b/arctic/store/_pickle_store.py @@ -93,8 +93,7 @@ def write(self, arctic_lib, version, symbol, item, _previous_version): # Python 3.8 onwards uses protocol 5 which cannot be unpickled in Python versions below that, so limiting # it to use a maximum of protocol 4 in Python which is understood by 3.4 onwards and is still fairly efficient. # The min() used to allow lower versions to be used in py2 (which supported a max of 2) - #pickle_protocol = min(cPickle.HIGHEST_PROTOCOL, 4) - pickle_protocol = 4 + pickle_protocol = min(pickle.HIGHEST_PROTOCOL, 4) pickled = pickle.dumps(item, protocol=pickle_protocol) data = compress_array([pickled[i * _CHUNK_SIZE: (i + 1) * _CHUNK_SIZE] for i in range(int(len(pickled) / _CHUNK_SIZE + 1))]) diff --git a/setup.py b/setup.py index 07912a4a0..e8ffc622a 100644 --- a/setup.py +++ b/setup.py @@ -111,6 +111,7 @@ def run_tests(self): "License :: OSI Approved :: GNU Library or Lesser General Public License (LGPL)", "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", "Programming Language :: Python :: Implementation :: CPython", "Operating System :: POSIX", "Operating System :: MacOS", diff --git a/tests/unit/date/test_util.py b/tests/unit/date/test_util.py index b4a7d3d25..a45ba7945 100644 --- a/tests/unit/date/test_util.py +++ b/tests/unit/date/test_util.py @@ -1,4 +1,6 @@ from datetime import datetime as dt +import pandas as pd +import sys import pytest from mock import patch @@ -129,3 +131,21 @@ def test_utc_dt_to_local_dt(): utc_time = dt(2000, 1, 1, 10, 0, 0) pek_time = utc_dt_to_local_dt(utc_time) # GMT +0800 assert(pek_time.hour - utc_time.hour == 8) + + +def test_pandas_timestamp_issue(): + # test to illustrate how pandas.Timestamp.utctimetuple is broken python>=3.8 + # see arctic.date._util.datetime_to_ms + + ts = pd.Timestamp("2020-11-27 16:00:00-0500", tz="US/Eastern") + + if sys.version_info < (3, 8, 0): + assert(ts.utctimetuple().tm_hour == 21) + assert(ts.timetuple().tm_hour == 16) + assert(ts.to_pydatetime().timetuple().tm_hour == 16) + else: + assert(ts.to_pydatetime().utctimetuple().tm_hour == 21) + assert(ts.timetuple().tm_hour == 16) + # fails + with pytest.raises(TypeError): + ts.utctimetuple() \ No newline at end of file diff --git a/tests/unit/store/test_pickle_store.py b/tests/unit/store/test_pickle_store.py index 2340318a0..121f23324 100644 --- a/tests/unit/store/test_pickle_store.py +++ b/tests/unit/store/test_pickle_store.py @@ -33,9 +33,14 @@ def test_write_object(): assert version['blob'] == '__chunked__V2' coll = arctic_lib.get_top_level_collection.return_value - assert coll.update_one.call_args_list == [call({'sha': checksum('sentinel.symbol', {'segment': 0, 'data': Binary(compress(pickle.dumps(sentinel.item, pickle.HIGHEST_PROTOCOL)))}), + + # Python 3.8 onwards uses protocol 5 which cannot be unpickled in Python versions below that, so limiting + # it to use a maximum of protocol 4 in Python which is understood by 3.4 onwards and is still fairly efficient. + # The min() used to allow lower versions to be used in py2 (which supported a max of 2) + pickle_protocol = min(4, pickle.HIGHEST_PROTOCOL) + assert coll.update_one.call_args_list == [call({'sha': checksum('sentinel.symbol', {'segment': 0, 'data': Binary(compress(pickle.dumps(sentinel.item, pickle_protocol)))}), 'symbol': 'sentinel.symbol'}, - {'$set': {'segment': 0, 'data': Binary(compress(pickle.dumps(sentinel.item, pickle.HIGHEST_PROTOCOL)), 0)}, + {'$set': {'segment': 0, 'data': Binary(compress(pickle.dumps(sentinel.item, pickle_protocol)), 0)}, '$addToSet': {'parent': version['_id']}}, upsert=True)]