From 6b5f525a563039cf940f14eb28977ed0ed532b99 Mon Sep 17 00:00:00 2001 From: Radovan Zvoncek Date: Wed, 9 Oct 2024 16:32:23 +0300 Subject: [PATCH 1/3] Make read_timeout default to None, only use it if set --- medusa-example.ini | 2 +- medusa/config.py | 1 - medusa/storage/azure_storage.py | 2 +- medusa/storage/google_storage.py | 6 +++--- medusa/storage/s3_base_storage.py | 4 +++- tests/resources/config/medusa-s3_us_west_oregon.ini | 1 + tests/storage/abstract_storage_test.py | 1 + 7 files changed, 10 insertions(+), 7 deletions(-) diff --git a/medusa-example.ini b/medusa-example.ini index 21e020a9..52a8bdaa 100644 --- a/medusa-example.ini +++ b/medusa-example.ini @@ -130,7 +130,7 @@ use_sudo_for_restore = True ;aws_cli_path = -; Read timeout in seconds for the storage provider. +; Read timeout in seconds for the storage provider. Not set by default. ;read_timeout = 60 [monitoring] diff --git a/medusa/config.py b/medusa/config.py index 7aae5807..5355fe4b 100644 --- a/medusa/config.py +++ b/medusa/config.py @@ -117,7 +117,6 @@ def _build_default_config(): 'region': 'default', 'backup_grace_period_in_days': 10, 'use_sudo_for_restore': 'True', - 'read_timeout': 60 } config['logging'] = { diff --git a/medusa/storage/azure_storage.py b/medusa/storage/azure_storage.py index 67d4778a..3662b6e2 100644 --- a/medusa/storage/azure_storage.py +++ b/medusa/storage/azure_storage.py @@ -56,7 +56,7 @@ def __init__(self, config): logging.getLogger('azure.core.pipeline.policies.http_logging_policy').setLevel(logging.WARNING) logging.getLogger('chardet.universaldetector').setLevel(logging.WARNING) - self.read_timeout = int(config.read_timeout) + self.read_timeout = int(config.read_timeout) if 'read_timeout' in dir(config) and config.read_timeout else None super().__init__(config) diff --git a/medusa/storage/google_storage.py b/medusa/storage/google_storage.py index 35e7bb15..940a8efb 100644 --- a/medusa/storage/google_storage.py +++ b/medusa/storage/google_storage.py @@ -49,7 +49,7 @@ def __init__(self, config): logging.getLogger('gcloud.aio.storage.storage').setLevel(logging.WARNING) - self.read_timeout = int(config.read_timeout) + self.read_timeout = int(config.read_timeout) if 'read_timeout' in dir(config) and config.read_timeout else None super().__init__(config) @@ -158,7 +158,7 @@ async def _download_blob(self, src: str, dest: str): stream = await self.gcs_storage.download_stream( bucket=self.bucket_name, object_name=object_key, - timeout=self.read_timeout if self.read_timeout is not None else -1, + timeout=self.read_timeout, ) Path(file_path).parent.mkdir(parents=True, exist_ok=True) with open(file_path, 'wb') as f: @@ -243,7 +243,7 @@ async def _read_blob_as_bytes(self, blob: AbstractBlob) -> bytes: bucket=self.bucket_name, object_name=blob.name, session=self.session, - timeout=self.read_timeout if self.read_timeout is not None else -1, + timeout=self.read_timeout, ) return content diff --git a/medusa/storage/s3_base_storage.py b/medusa/storage/s3_base_storage.py index 5733d5b9..837e27c1 100644 --- a/medusa/storage/s3_base_storage.py +++ b/medusa/storage/s3_base_storage.py @@ -119,6 +119,8 @@ def __init__(self, config): self.executor = concurrent.futures.ThreadPoolExecutor(int(config.concurrent_transfers)) + self.read_timeout = int(config.read_timeout) if 'read_timeout' in dir(config) and config.read_timeout else None + super().__init__(config) def connect(self): @@ -137,7 +139,7 @@ def connect(self): signature_version='v4', tcp_keepalive=True, max_pool_connections=max_pool_size, - read_timeout=int(self.config.read_timeout), + read_timeout=self.read_timeout, ) if self.credentials.access_key_id is not None: self.s3_client = boto3.client( diff --git a/tests/resources/config/medusa-s3_us_west_oregon.ini b/tests/resources/config/medusa-s3_us_west_oregon.ini index 5e1502d9..ffd50dac 100644 --- a/tests/resources/config/medusa-s3_us_west_oregon.ini +++ b/tests/resources/config/medusa-s3_us_west_oregon.ini @@ -17,6 +17,7 @@ concurrent_transfers = 16 backup_grace_period_in_days = 0 max_backup_count = 1 region = us-west-2 +read_timeout = 60 [monitoring] monitoring_provider = local diff --git a/tests/storage/abstract_storage_test.py b/tests/storage/abstract_storage_test.py index 123760eb..ac5d223b 100644 --- a/tests/storage/abstract_storage_test.py +++ b/tests/storage/abstract_storage_test.py @@ -26,6 +26,7 @@ class AttributeDict(dict): __slots__ = () __getattr__ = dict.__getitem__ __setattr__ = dict.__setitem__ + __dict__ = dict.__dict__ class TestAbstractStorage(AbstractStorage): From 8f801e42c9e654d2845c903149059786939bc6bd Mon Sep 17 00:00:00 2001 From: Radovan Zvoncek Date: Tue, 29 Oct 2024 12:33:16 +0200 Subject: [PATCH 2/3] For GCS, use -1 instead of None for default read timeout --- medusa/storage/google_storage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/medusa/storage/google_storage.py b/medusa/storage/google_storage.py index 940a8efb..c172579c 100644 --- a/medusa/storage/google_storage.py +++ b/medusa/storage/google_storage.py @@ -49,7 +49,7 @@ def __init__(self, config): logging.getLogger('gcloud.aio.storage.storage').setLevel(logging.WARNING) - self.read_timeout = int(config.read_timeout) if 'read_timeout' in dir(config) and config.read_timeout else None + self.read_timeout = int(config.read_timeout) if 'read_timeout' in dir(config) and config.read_timeout else -1 super().__init__(config) From d611396a27dfef950ee3113a46a80849bf84bd36 Mon Sep 17 00:00:00 2001 From: Radovan Zvoncek Date: Tue, 29 Oct 2024 14:40:56 +0200 Subject: [PATCH 3/3] Adjust ITs to use JDK 11 for 4.0.0 --- .github/workflows/ci.yml | 28 +++++++++++++++---- run_integration_tests.sh | 11 ++++++++ .../features/integration_tests.feature | 13 ++++----- .../features/steps/integration_steps.py | 17 ++++++++--- 4 files changed, 52 insertions(+), 17 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index be8b66a4..01daf085 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -94,8 +94,8 @@ jobs: exit $? integration-tests: - needs: build continue-on-error: ${{ matrix.experimental }} + needs: build strategy: fail-fast: false matrix: @@ -104,6 +104,7 @@ jobs: # IBM not included by default due to lite plan quota being easily exceeded #it-backend: [local, s3, gcs, minio, ibm, azure] cassandra-version: [2.2.19, 3.11.11, 4.0.0, 'github:apache/trunk'] + java-version: [8.0.252, 11.0.25] include: # tweak the experimental flag for cassandra versions - cassandra-version: 2.2.19 @@ -117,21 +118,35 @@ jobs: cassandra-version: 4.0.0 it-backend: gcs experimental: false + java-version: 11.0.25 # explicitly include tests against python 3.10 and one version of cassandra - python-version: "3.10" cassandra-version: 4.0.0 it-backend: gcs experimental: false + java-version: 11.0.25 # explicitly include tests against python 3.8 and one version of cassandra - python-version: 3.8 cassandra-version: 4.0.0 it-backend: gcs experimental: false + java-version: 11.0.25 exclude: # no tests against trunk - cassandra-version: 'github:apache/trunk' - # fewer tests against cassandra 3.11.11 (exclude all but local storage backends) - - it-backend: s3 + # no tests for C* 2.2 with java 11 + - cassandra-version: 2.2.19 + java-version: 11.0.25 + # no tests for C* 3.11 with java 11 + - cassandra-version: 3.11.11 + java-version: 11.0.25 + # no tests for C* 4.0 with java 8 + - cassandra-version: 4.0.0 + java-version: 8.0.252 + # fewer tests against cassandra 3.11.11 (exclude all but s3 storage backends) + # we are not doing the local because it would run a scenario with mgmt-api which no longer supports 3.11 + # but we still want some tests against 3.11.11, so we use s3 for at least some coverage + - it-backend: local cassandra-version: "3.11.11" - it-backend: gcs cassandra-version: "3.11.11" @@ -167,7 +182,7 @@ jobs: - name: Setup Java Action uses: actions/setup-java@v1 with: - java-version: '8.0.252' + java-version: ${{ matrix.java-version}} architecture: x64 - name: Setup Poetry uses: snok/install-poetry@v1 @@ -211,7 +226,7 @@ jobs: # Write GCS service account credentials to a file mkdir ~/.aws # This fake cluster needs to be created first so that the integration tests pass in GH actions. Don't ask me why... - ccm create test_cluster -v binary:3.11.4 -n 1 --vnodes + ccm create test_cluster -v binary:${{ matrix.cassandra-version }} -n 1 --vnodes ccm node1 updateconf 'storage_port: 7011' ccm node1 updateconf 'concurrent_reads: 4' ccm node1 updateconf 'concurrent_writes: 4' @@ -219,6 +234,9 @@ jobs: ccm node1 updateconf 'num_tokens: 4' sed -i 's/#MAX_HEAP_SIZE="4G"/MAX_HEAP_SIZE="256m"/' ~/.ccm/test_cluster/node1/conf/cassandra-env.sh sed -i 's/#HEAP_NEWSIZE="800M"/HEAP_NEWSIZE="200M"/' ~/.ccm/test_cluster/node1/conf/cassandra-env.sh + # remove the ThreadPriorityPolicy option for cases where we run with java 11 + sed -i 's/-XX:ThreadPriorityPolicy=42//' ~/.ccm/test_cluster/node1/conf/jvm.options || true + sed -i 's/-XX:ThreadPriorityPolicy=42//' ~/.ccm/test_cluster/node1/conf/jvm8-server.options || true ccm start -v ccm showlastlog|tail -100 ccm stop diff --git a/run_integration_tests.sh b/run_integration_tests.sh index fccc8fde..5d3455ef 100755 --- a/run_integration_tests.sh +++ b/run_integration_tests.sh @@ -175,6 +175,17 @@ else CASSANDRA_VERSION_FLAG="-D cassandra-version=${CASSANDRA_VERSION}" fi +java -version 2>&1 | grep version | grep -q 11 +if [ $? -ne 0 ]; then + # we're NOT having java 11, we can proceed + echo ${STORAGE_TAGS} | grep -q minio + if [ $? -eq 1 ]; then + # we cannot allow the DSE scenario with minio because in ITs it does not have the correct creds set up + STORAGE_TAGS="${STORAGE_TAGS},@dse" + fi + # do not add the dse scenario if minio is about to run +fi + if [ "$COVERAGE" == "yes" ] then PYTHONPATH=../.. poetry run coverage run --source='../../medusa' -m behave --stop $SCENARIO --tags=$STORAGE_TAGS $LOGGING $CASSANDRA_VERSION_FLAG diff --git a/tests/integration/features/integration_tests.feature b/tests/integration/features/integration_tests.feature index e8387393..ad937a25 100644 --- a/tests/integration/features/integration_tests.feature +++ b/tests/integration/features/integration_tests.feature @@ -1096,15 +1096,12 @@ Feature: Integration tests Then I stop the DSE cluster And I delete the DSE cluster - @local - Examples: Local storage - | storage | client encryption | - | local | without_client_encryption | + @dse + Examples: DSE Scenario + | storage | client encryption | + | local | without_client_encryption | + | s3_us_west_oregon | without_client_encryption | - @s3 - Examples: S3 storage - | storage | client encryption | - | s3_us_west_oregon | without_client_encryption | @30 Scenario Outline: Create an differential backup, corrupt it, then fix by doing another backup, and verify it diff --git a/tests/integration/features/steps/integration_steps.py b/tests/integration/features/steps/integration_steps.py index bc229d64..77cf6342 100644 --- a/tests/integration/features/steps/integration_steps.py +++ b/tests/integration/features/steps/integration_steps.py @@ -136,7 +136,7 @@ def get_client_encryption_opts(keystore_path, trustore_path): protocol: TLS,algorithm: SunX509,store_type: JKS,cipher_suites: [{cipher_suite}]}}'""" -def tune_ccm_settings(cluster_name, custom_settings=None): +def tune_ccm_settings(cassandra_version, cluster_name, custom_settings=None): if os.uname().sysname == "Linux": os.popen( """sed -i 's/#MAX_HEAP_SIZE="4G"/MAX_HEAP_SIZE="256m"/' ~/.ccm/""" @@ -148,6 +148,15 @@ def tune_ccm_settings(cluster_name, custom_settings=None): + cluster_name + """/node1/conf/cassandra-env.sh""" ).read() + if cassandra_version.startswith("4") or cassandra_version.startswith("5"): + jvm_options_file = "jvm8-server.options" + else: + jvm_options_file = "jvm.options" + os.popen( + """sed -i 's/-XX:ThreadPriorityPolicy=42//' ~/.ccm/""" + + cluster_name + + """/node1/conf/""" + jvm_options_file + ) # sed on some macos needs `-i .bak` instead of just `-i` if os.uname().sysname == "Darwin": @@ -285,7 +294,7 @@ def _i_have_a_fresh_ccm_cluster_running(context, cluster_name, client_encryption os.popen(update_client_encrytion_opts).read() custom_settings = context.custom_settings if hasattr(context, 'custom_settings') else None - tune_ccm_settings(context.cluster_name, custom_settings) + tune_ccm_settings(context.cassandra_version, context.cluster_name, custom_settings) context.session = connect_cassandra(is_client_encryption_enable) @@ -335,7 +344,7 @@ def _i_have_a_fresh_ccm_cluster_with_jolokia_running(context, cluster_name, clie ) shutil.copyfile("resources/grpc/jolokia-jvm-1.6.2-agent.jar", "/tmp/jolokia-jvm-1.6.2-agent.jar") - tune_ccm_settings(context.cluster_name) + tune_ccm_settings(context.cassandra_version, context.cluster_name) context.session = connect_cassandra(is_client_encryption_enable) @@ -370,7 +379,7 @@ def _i_have_a_fresh_ccm_cluster_with_mgmt_api_running(context, cluster_name, cli update_client_encrytion_opts = get_client_encryption_opts(keystore_path, trustore_path) os.popen(update_client_encrytion_opts).read() - tune_ccm_settings(context.cluster_name) + tune_ccm_settings(context.cassandra_version, context.cluster_name) context.session = connect_cassandra(is_client_encryption_enable) # stop the node via CCM as it needs to be started by the Management API os.popen(CCM_STOP).read()