diff --git a/Makefile b/Makefile
index 73ed9d7..678a4e0 100644
--- a/Makefile
+++ b/Makefile
@@ -7,7 +7,7 @@ SHELL := /bin/sh
# Set variables if testing locally
ifeq ($(IS_RELEASE_BUILD),)
- SPARK_VERSION := 3.3
+ SPARK_VERSION := 3.4
PROCESSOR := cpu
FRAMEWORK_VERSION := py39
SM_VERSION := 1.0
@@ -51,7 +51,7 @@ install-container-library: init
# temporarily bypass py=1.1.0 because pytest-parallel has a dependency on it however the module is no longer maitained.
# In the future the pylib will be removed from pytest-parallel dependency and 51457 should only impact the local tests.
# For more info, https://github.com/pytest-dev/py/issues/287
- pipenv run safety check -i 43975 -i 51457 # https://github.com/pyupio/safety
+ pipenv run safety check -i 43975 -i 51457 -i 39611 # https://github.com/pyupio/safety
build-static-config:
./scripts/fetch-ec2-instance-type-info.sh --region ${REGION} --use-case ${USE_CASE} --spark-version ${SPARK_VERSION} \
diff --git a/Pipfile b/Pipfile
index 08f9cfc..775d817 100644
--- a/Pipfile
+++ b/Pipfile
@@ -6,35 +6,38 @@ verify_ssl = true
[dev-packages]
[packages]
-tenacity = "==8.0.1"
-psutil = "==5.9.0"
-click = "==8.1.2"
-watchdog = "==0.10.3"
+pyyaml = "==5.3.1"
+tenacity = "==8.2.3"
+psutil = "==5.9.5"
+click = "==8.1.7"
+watchdog = "==3.0.0"
waitress = "==2.1.2"
-types-waitress = "==2.0.6"
-requests = "==2.27.1"
-types-requests = "==2.27.16"
-rsa = "==4.3"
-pyasn1 = "==0.4.8"
-boto3 = "==1.21.33"
-safety = "==2.3.1"
-black = "==22.3.0"
-mypy = "==0.942"
-flake8 = "==4.0.1"
-flake8-docstrings = "==1.5.0"
-pytest = "==7.1.1"
-pytest-cov = "==2.10.0"
-pytest-xdist = "==2.5.0"
-docker = "==5.0.3"
+types-waitress = "==2.1.4.9"
+requests = "==2.31.0"
+types-requests = "==2.31.0.2"
+rsa = "==4.7"
+pyasn1 = "==0.5.0"
+boto3 = "==1.28.38"
+safety = "==2.3.5"
+black = "==22.12.0"
+mypy = "==1.5.1"
+flake8 = "==6.1.0"
+flake8-docstrings = "==1.7.0"
+pytest = "==7.4.0"
+pytest-cov = "==4.1.0"
+pytest-xdist = "==3.3.1"
+docker = "==6.1.3"
docker-compose = "==1.29.2"
-cryptography = "==36.0.2"
-typing-extensions = "==4.1.1"
+cryptography = "==41.0.3"
+typing-extensions = "==4.7.1"
sagemaker = "==2.117.0"
smspark = {editable = true, path = "."}
-importlib-metadata = "==4.11.3"
+importlib-metadata = "==4.13.0"
pytest-parallel = "==0.1.1"
-pytest-rerunfailures = "10.0"
-numpy = "==1.22.2"
+pytest-rerunfailures = "==12.0"
+numpy = "==1.25.2"
+py = "==1.11.0"
+awscli = "==1.29.38"
[requires]
python_version = "3.9"
diff --git a/Pipfile.lock b/Pipfile.lock
index e50f52b..5c6d9aa 100644
--- a/Pipfile.lock
+++ b/Pipfile.lock
@@ -1049,4 +1049,4 @@
}
},
"develop": {}
-}
+}
\ No newline at end of file
diff --git a/cython_constraint.txt b/cython_constraint.txt
new file mode 100644
index 0000000..d5e3183
--- /dev/null
+++ b/cython_constraint.txt
@@ -0,0 +1 @@
+Cython<3
diff --git a/new_images.yml b/new_images.yml
index 3d7f115..6ba3e03 100644
--- a/new_images.yml
+++ b/new_images.yml
@@ -1,7 +1,7 @@
---
new_images:
- - spark: "3.3"
+ - spark: "3.4"
use-case: "processing"
processors: ["cpu"]
python: ["py39"]
- sm_version: "1.2"
+ sm_version: "1.0"
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..f83c639
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,3 @@
+[tool.black]
+line-length = 120
+target-version = ['py39']
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..5802b8b
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,48 @@
+# Copyright 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+# http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+import glob
+import os
+
+from setuptools import find_packages, setup
+
+with open("VERSION", "r") as version_file:
+ version = version_file.read()
+
+setup(
+ name="smspark",
+ description="Library that enables running Spark Processing jobs on Amazon SageMaker",
+ version=version,
+ python_requires=">3.7.0",
+ packages=find_packages("src"),
+ package_dir={"": "src"},
+ py_modules=[os.path.splitext(os.path.basename(path))[0] for path in glob.glob("src/smspark/*.py")],
+ author="Amazon Web Services",
+ url="https://github.com/aws/smspark/",
+ license="Apache License 2.0",
+ keywords="ML Amazon AWS AI SageMaker Processing Spark",
+ classifiers=[
+ "Development Status :: 3 - Alpha",
+ "Intended Audience :: Developers",
+ "Natural Language :: English",
+ "License :: OSI Approved :: Apache Software License",
+ "Programming Language :: Python",
+ "Programming Language :: Python :: 3.9",
+ ],
+ setup_requires=["setuptools", "wheel"],
+ entry_points={
+ "console_scripts": [
+ "smspark-submit=smspark.cli:submit_main",
+ "smspark-history-server=smspark.history_server_cli:run_history_server",
+ ]
+ },
+)
diff --git a/smsparkbuild/py39/Pipfile b/smsparkbuild/py39/Pipfile
index fdd8f94..775d817 100644
--- a/smsparkbuild/py39/Pipfile
+++ b/smsparkbuild/py39/Pipfile
@@ -6,36 +6,38 @@ verify_ssl = true
[dev-packages]
[packages]
-tenacity = "==8.0.1"
-psutil = "==5.9.0"
-click = "==8.1.2"
-watchdog = "==0.10.3"
+pyyaml = "==5.3.1"
+tenacity = "==8.2.3"
+psutil = "==5.9.5"
+click = "==8.1.7"
+watchdog = "==3.0.0"
waitress = "==2.1.2"
-types-waitress = "==2.0.6"
+types-waitress = "==2.1.4.9"
requests = "==2.31.0"
-types-requests = "==2.27.16"
-rsa = "==4.9"
-pyasn1 = "==0.4.8"
-boto3 = "==1.21.33"
+types-requests = "==2.31.0.2"
+rsa = "==4.7"
+pyasn1 = "==0.5.0"
+boto3 = "==1.28.38"
safety = "==2.3.5"
-black = "==22.3.0"
-mypy = "==0.942"
-flake8 = "==4.0.1"
-flake8-docstrings = "==1.5.0"
-pytest = "==7.2.2"
-pytest-cov = "==2.10.0"
-pytest-xdist = "==3.2.1"
-docker = "==5.0.3"
+black = "==22.12.0"
+mypy = "==1.5.1"
+flake8 = "==6.1.0"
+flake8-docstrings = "==1.7.0"
+pytest = "==7.4.0"
+pytest-cov = "==4.1.0"
+pytest-xdist = "==3.3.1"
+docker = "==6.1.3"
docker-compose = "==1.29.2"
-cryptography = "==39.0.2"
-typing-extensions = "==4.1.1"
+cryptography = "==41.0.3"
+typing-extensions = "==4.7.1"
sagemaker = "==2.117.0"
smspark = {editable = true, path = "."}
-importlib-metadata = "==4.11.3"
+importlib-metadata = "==4.13.0"
pytest-parallel = "==0.1.1"
-pytest-rerunfailures = "10.0"
-numpy = "==1.22.2"
+pytest-rerunfailures = "==12.0"
+numpy = "==1.25.2"
py = "==1.11.0"
+awscli = "==1.29.38"
[requires]
python_version = "3.9"
diff --git a/spark/processing/3.4/py3/container-bootstrap-config/bootstrap.sh b/spark/processing/3.4/py3/container-bootstrap-config/bootstrap.sh
new file mode 100644
index 0000000..e4e23bb
--- /dev/null
+++ b/spark/processing/3.4/py3/container-bootstrap-config/bootstrap.sh
@@ -0,0 +1 @@
+echo "Not implemented"
\ No newline at end of file
diff --git a/spark/processing/3.4/py3/docker/py39/Dockerfile.cpu b/spark/processing/3.4/py3/docker/py39/Dockerfile.cpu
new file mode 100644
index 0000000..860cf48
--- /dev/null
+++ b/spark/processing/3.4/py3/docker/py39/Dockerfile.cpu
@@ -0,0 +1,137 @@
+FROM 137112412989.dkr.ecr.us-west-2.amazonaws.com/amazonlinux:2
+ARG REGION
+ENV AWS_REGION ${REGION}
+
+ENV JAVA_HOME /etc/alternatives/jre
+
+RUN yum clean all \
+ && yum update -y \
+ && yum install -y awscli bigtop-utils curl gcc gzip unzip zip gunzip tar wget liblapack* libblas* libopencv* libopenblas*
+
+# Taken from EMR https://tiny.amazon.com/1dp4p55nm/codeamazpackAwsCblob8b00src
+RUN amazon-linux-extras enable corretto8 nginx1 \
+&& yum install -y java-1.8.0-amazon-corretto-devel nginx python-virtualenv \
+&& yum remove -y java-1.8.0-openjdk-headless
+
+# Install python 3.9
+ARG PYTHON_BASE_VERSION=3.9
+ARG PYTHON_WITH_BASE_VERSION=python${PYTHON_BASE_VERSION}
+ARG PIP_WITH_BASE_VERSION=pip${PYTHON_BASE_VERSION}
+ARG PYTHON_VERSION=${PYTHON_BASE_VERSION}.12
+RUN yum -y groupinstall 'Development Tools' \
+ && yum -y install openssl-devel bzip2-devel libffi-devel sqlite-devel xz-devel \
+ && wget https://www.python.org/ftp/python/${PYTHON_VERSION}/Python-${PYTHON_VERSION}.tgz \
+ && tar xzf Python-${PYTHON_VERSION}.tgz \
+ && cd Python-*/ \
+ && ./configure --enable-optimizations \
+ && make altinstall \
+ && echo -e 'alias python3=python3.9\nalias pip3=pip3.9' >> ~/.bashrc \
+ && ln -s $(which ${PYTHON_WITH_BASE_VERSION}) /usr/local/bin/python3 \
+ && ln -s $(which ${PIP_WITH_BASE_VERSION}) /usr/local/bin/pip3 \
+ && cd .. \
+ && rm Python-${PYTHON_VERSION}.tgz \
+ && rm -rf Python-${PYTHON_VERSION}
+
+# install nginx amazonlinux:2.0.20200304.0 does not have nginx, so need to install epel-release first
+RUN wget https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm
+RUN yum install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm
+RUN yum install -y nginx
+
+RUN rm -rf /var/cache/yum
+
+ENV PYTHONDONTWRITEBYTECODE=1
+ENV PYTHONUNBUFFERED=1
+# http://blog.stuart.axelbrooke.com/python-3-on-spark-return-of-the-pythonhashseed
+ENV PYTHONHASHSEED 0
+ENV PYTHONIOENCODING UTF-8
+ENV PIP_DISABLE_PIP_VERSION_CHECK 1
+
+# Install EMR Spark/Hadoop
+ENV HADOOP_HOME /usr/lib/hadoop
+ENV HADOOP_CONF_DIR /usr/lib/hadoop/etc/hadoop
+ENV SPARK_HOME /usr/lib/spark
+
+COPY yum/emr-apps.repo /etc/yum.repos.d/emr-apps.repo
+
+# Install hadoop / spark dependencies from EMR's yum repository for Spark optimizations.
+# replace placeholder with region in repository URL
+RUN sed -i "s/REGION/${AWS_REGION}/g" /etc/yum.repos.d/emr-apps.repo
+RUN adduser -N hadoop
+
+# These packages are a subset of what EMR installs in a cluster with the
+# "hadoop", "spark", and "hive" applications.
+# They include EMR-optimized libraries and extras.
+RUN yum install -y aws-hm-client \
+ aws-java-sdk \
+ aws-sagemaker-spark-sdk \
+ emr-goodies \
+ emr-ruby \
+ emr-scripts \
+ emr-s3-select \
+ emrfs \
+ hadoop \
+ hadoop-client \
+ hadoop-hdfs \
+ hadoop-hdfs-datanode \
+ hadoop-hdfs-namenode \
+ hadoop-httpfs \
+ hadoop-kms \
+ hadoop-lzo \
+ hadoop-yarn \
+ hadoop-yarn-nodemanager \
+ hadoop-yarn-proxyserver \
+ hadoop-yarn-resourcemanager \
+ hadoop-yarn-timelineserver \
+ hive \
+ hive-hcatalog \
+ hive-hcatalog-server \
+ hive-jdbc \
+ hive-server2 \
+ s3-dist-cp \
+ spark-core \
+ spark-datanucleus \
+ spark-external \
+ spark-history-server \
+ spark-python
+
+# Point Spark at proper python binary
+ENV PYSPARK_PYTHON=/usr/local/bin/python3.9
+
+# Setup Spark/Yarn/HDFS user as root
+ENV PATH="/usr/bin:/opt/program:${PATH}"
+ENV YARN_RESOURCEMANAGER_USER="root"
+ENV YARN_NODEMANAGER_USER="root"
+ENV HDFS_NAMENODE_USER="root"
+ENV HDFS_DATANODE_USER="root"
+ENV HDFS_SECONDARYNAMENODE_USER="root"
+
+# Set up bootstrapping program and Spark configuration
+COPY hadoop-config /opt/hadoop-config
+COPY nginx-config /opt/nginx-config
+COPY aws-config /opt/aws-config
+COPY Pipfile Pipfile.lock setup.py *.whl /opt/program/
+ENV PIPENV_PIPFILE=/opt/program/Pipfile
+# Use --system flag, so it will install all packages into the system python,
+# and not into the virtualenv. Since docker containers do not need to have virtualenvs
+# pipenv > 2022.4.8 fails to build smspark
+RUN /usr/local/bin/python3.9 -m pip install pipenv==2022.4.8 \
+ && pipenv install --system \
+ && /usr/local/bin/python3.9 -m pip install /opt/program/*.whl
+
+# Setup container bootstrapper
+COPY container-bootstrap-config /opt/container-bootstrap-config
+RUN chmod +x /opt/container-bootstrap-config/bootstrap.sh \
+ && /opt/container-bootstrap-config/bootstrap.sh
+
+# With this config, spark history server will not run as daemon, otherwise there
+# will be no server running and container will terminate immediately
+ENV SPARK_NO_DAEMONIZE TRUE
+
+WORKDIR $SPARK_HOME
+
+# Install the sagemaker feature store spark connector
+# https://docs.aws.amazon.com/sagemaker/latest/dg/batch-ingestion-spark-connector-setup.html
+# Feature store connector library currently does not support spark 3.4 so commenting out this line
+# RUN /usr/local/bin/python3.9 -m pip install sagemaker-feature-store-pyspark-3.3==1.1.2 --no-binary :all:
+
+ENTRYPOINT ["smspark-submit"]
diff --git a/spark/processing/3.4/py3/hadoop-config/core-site.xml b/spark/processing/3.4/py3/hadoop-config/core-site.xml
new file mode 100644
index 0000000..2b032ff
--- /dev/null
+++ b/spark/processing/3.4/py3/hadoop-config/core-site.xml
@@ -0,0 +1,31 @@
+
+
+
+
+
+
+ fs.defaultFS
+ hdfs://nn_uri/
+ NameNode URI
+
+
+ fs.s3a.aws.credentials.provider
+ com.amazonaws.auth.DefaultAWSCredentialsProviderChain
+ AWS S3 credential provider
+
+
+ fs.s3.impl
+ org.apache.hadoop.fs.s3a.S3AFileSystem
+ s3a filesystem implementation
+
+
+ fs.AbstractFileSystem.s3a.imp
+ org.apache.hadoop.fs.s3a.S3A
+ s3a filesystem implementation
+
+
+ fs.s3a.connection.maximum
+ 100
+ s3a filesystem maximum connection
+
+
diff --git a/spark/processing/3.4/py3/hadoop-config/hdfs-site.xml b/spark/processing/3.4/py3/hadoop-config/hdfs-site.xml
new file mode 100644
index 0000000..37e0a5b
--- /dev/null
+++ b/spark/processing/3.4/py3/hadoop-config/hdfs-site.xml
@@ -0,0 +1,67 @@
+
+
+
+
+
+
+ dfs.datanode.data.dir
+ file:///opt/amazon/hadoop/hdfs/datanode
+ Comma separated list of paths on the local filesystem of a DataNode where it should store its\
+ blocks.
+
+
+
+ dfs.namenode.name.dir
+ file:///opt/amazon/hadoop/hdfs/namenode
+ Path on the local filesystem where the NameNode stores the namespace and transaction logs per\
+ sistently.
+
+
+
+
+ dfs.client.block.write.replace-datanode-on-failure.enable
+ true
+
+ If there is a datanode/network failure in the write pipeline,
+ DFSClient will try to remove the failed datanode from the pipeline
+ and then continue writing with the remaining datanodes. As a result,
+ the number of datanodes in the pipeline is decreased. The feature is
+ to add new datanodes to the pipeline.
+
+ This is a site-wide property to enable/disable the feature.
+
+ When the cluster size is extremely small, e.g. 3 nodes or less, cluster
+ administrators may want to set the policy to NEVER in the default
+ configuration file or disable this feature. Otherwise, users may
+ experience an unusually high rate of pipeline failures since it is
+ impossible to find new datanodes for replacement.
+
+ See also dfs.client.block.write.replace-datanode-on-failure.policy
+
+
+
+
+ dfs.client.block.write.replace-datanode-on-failure.policy
+ ALWAYS
+
+ This property is used only if the value of
+ dfs.client.block.write.replace-datanode-on-failure.enable is true.
+
+ ALWAYS: always add a new datanode when an existing datanode is
+ removed.
+
+ NEVER: never add a new datanode.
+
+ DEFAULT:
+ Let r be the replication number.
+ Let n be the number of existing datanodes.
+ Add a new datanode only if r is greater than or equal to 3 and either
+ (1) floor(r/2) is greater than or equal to n; or
+ (2) r is greater than n and the block is hflushed/appended.
+
+
+
diff --git a/spark/processing/3.4/py3/hadoop-config/spark-defaults.conf b/spark/processing/3.4/py3/hadoop-config/spark-defaults.conf
new file mode 100644
index 0000000..734086a
--- /dev/null
+++ b/spark/processing/3.4/py3/hadoop-config/spark-defaults.conf
@@ -0,0 +1,10 @@
+spark.driver.extraClassPath /usr/lib/hadoop-lzo/lib/*:/usr/lib/hadoop/hadoop-aws.jar:/usr/share/aws/aws-java-sdk/*:/usr/share/aws/emr/emrfs/conf:/usr/share/aws/emr/emrfs/lib/*:/usr/share/aws/emr/emrfs/auxlib/*:/usr/share/aws/emr/goodies/lib/emr-spark-goodies.jar:/usr/share/aws/emr/security/conf:/usr/share/aws/emr/security/lib/*:/usr/share/aws/hmclient/lib/aws-glue-datacatalog-spark-client.jar:/usr/share/java/Hive-JSON-Serde/hive-openx-serde.jar:/usr/share/aws/sagemaker-spark-sdk/lib/sagemaker-spark-sdk.jar:/usr/share/aws/emr/s3select/lib/emr-s3-select-spark-connector.jar
+spark.driver.extraLibraryPath /usr/lib/hadoop/lib/native:/usr/lib/hadoop-lzo/lib/native
+spark.executor.extraClassPath /usr/lib/hadoop-lzo/lib/*:/usr/lib/hadoop/hadoop-aws.jar:/usr/share/aws/aws-java-sdk/*:/usr/share/aws/emr/emrfs/conf:/usr/share/aws/emr/emrfs/lib/*:/usr/share/aws/emr/emrfs/auxlib/*:/usr/share/aws/emr/goodies/lib/emr-spark-goodies.jar:/usr/share/aws/emr/security/conf:/usr/share/aws/emr/security/lib/*:/usr/share/aws/hmclient/lib/aws-glue-datacatalog-spark-client.jar:/usr/share/java/Hive-JSON-Serde/hive-openx-serde.jar:/usr/share/aws/sagemaker-spark-sdk/lib/sagemaker-spark-sdk.jar:/usr/share/aws/emr/s3select/lib/emr-s3-select-spark-connector.jar
+spark.executor.extraLibraryPath /usr/lib/hadoop/lib/native:/usr/lib/hadoop-lzo/lib/native
+spark.driver.host=sd_host
+spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version=2
+
+# Fix for "Uncaught exception: org.apache.spark.rpc.RpcTimeoutException: Cannot
+# receive any reply from 10.0.109.30:35219 in 120 seconds.""
+spark.rpc.askTimeout=300s
diff --git a/spark/processing/3.4/py3/hadoop-config/spark-env.sh b/spark/processing/3.4/py3/hadoop-config/spark-env.sh
new file mode 100644
index 0000000..1b58aa1
--- /dev/null
+++ b/spark/processing/3.4/py3/hadoop-config/spark-env.sh
@@ -0,0 +1,3 @@
+#EMPTY FILE AVOID OVERRIDDING ENV VARS
+# Specifically, without copying the empty file, SPARK_HISTORY_OPTS will be overriden,
+# spark.history.ui.port defaults to 18082, and spark.eventLog.dir defaults to local fs
diff --git a/spark/processing/3.4/py3/hadoop-config/yarn-site.xml b/spark/processing/3.4/py3/hadoop-config/yarn-site.xml
new file mode 100644
index 0000000..1c92988
--- /dev/null
+++ b/spark/processing/3.4/py3/hadoop-config/yarn-site.xml
@@ -0,0 +1,34 @@
+
+
+
+
+ yarn.resourcemanager.hostname
+ rm_hostname
+ The hostname of the RM.
+
+
+ yarn.nodemanager.hostname
+ nm_hostname
+ The hostname of the NM.
+
+
+ yarn.nodemanager.webapp.address
+ nm_webapp_address
+
+
+ yarn.nodemanager.vmem-pmem-ratio
+ 5
+ Ratio between virtual memory to physical memory.
+
+
+ yarn.resourcemanager.am.max-attempts
+ 1
+ The maximum number of application attempts.
+
+
+ yarn.nodemanager.env-whitelist
+ JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,YARN_HOME,AWS_CONTAINER_CREDENTIALS_RELATIVE_URI,AWS_REGION
+ Environment variable whitelist
+
+
+
diff --git a/spark/processing/3.4/py3/nginx-config/default.conf b/spark/processing/3.4/py3/nginx-config/default.conf
new file mode 100644
index 0000000..a8a50a5
--- /dev/null
+++ b/spark/processing/3.4/py3/nginx-config/default.conf
@@ -0,0 +1,17 @@
+server {
+ listen 15050;
+ server_name localhost;
+ client_header_buffer_size 128k;
+ large_client_header_buffers 4 128k;
+
+ location ~ ^/history/(.*)/(.*)/jobs/$ {
+ proxy_pass http://localhost:18080/history/$1/jobs/;
+ proxy_redirect http://localhost:18080/history/$1/jobs/ $domain_name/proxy/15050/history/$1/jobs/;
+ expires off;
+ }
+
+ location / {
+ proxy_pass http://localhost:18080;
+ expires off;
+ }
+}
\ No newline at end of file
diff --git a/spark/processing/3.4/py3/nginx-config/nginx.conf b/spark/processing/3.4/py3/nginx-config/nginx.conf
new file mode 100644
index 0000000..1e3a51c
--- /dev/null
+++ b/spark/processing/3.4/py3/nginx-config/nginx.conf
@@ -0,0 +1,66 @@
+# For more information on configuration, see:
+# * Official English Documentation: http://nginx.org/en/docs/
+# * Official Russian Documentation: http://nginx.org/ru/docs/
+
+user nginx;
+worker_processes auto;
+error_log /var/log/nginx/error.log;
+pid /run/nginx.pid;
+
+# Load dynamic modules. See /usr/share/doc/nginx/README.dynamic.
+include /usr/share/nginx/modules/*.conf;
+
+events {
+ worker_connections 1024;
+}
+
+http {
+ log_format main '$remote_addr - $remote_user [$time_local] "$request" '
+ '$status $body_bytes_sent "$http_referer" '
+ '"$http_user_agent" "$http_x_forwarded_for"';
+
+ access_log /var/log/nginx/access.log main;
+
+ sendfile on;
+ tcp_nopush on;
+ tcp_nodelay on;
+ keepalive_timeout 65;
+ types_hash_max_size 2048;
+
+ include /etc/nginx/mime.types;
+ default_type application/octet-stream;
+
+ # Load modular configuration files from the /etc/nginx/conf.d directory.
+ # See http://nginx.org/en/docs/ngx_core_module.html#include
+ # for more information.
+ include /etc/nginx/conf.d/*.conf;
+
+ server {
+ listen 80 default_server;
+ listen [::]:80 default_server;
+ server_name _;
+ root /usr/share/nginx/html;
+
+ # Load configuration files for the default server block.
+ include /etc/nginx/default.d/*.conf;
+
+ location /proxy/15050 {
+ proxy_pass http://localhost:15050/;
+ }
+
+ location ~ ^/proxy/15050/(.*) {
+ proxy_pass http://localhost:15050/$1;
+ }
+
+ location / {
+ }
+
+ error_page 404 /404.html;
+ location = /40x.html {
+ }
+
+ error_page 500 502 503 504 /50x.html;
+ location = /50x.html {
+ }
+ }
+}
\ No newline at end of file
diff --git a/spark/processing/3.4/py3/yum/emr-apps.repo b/spark/processing/3.4/py3/yum/emr-apps.repo
new file mode 100644
index 0000000..2d361d6
--- /dev/null
+++ b/spark/processing/3.4/py3/yum/emr-apps.repo
@@ -0,0 +1,17 @@
+[emr-apps]
+name = EMR Application Repository
+gpgkey = https://s3-REGION.amazonaws.com/repo.REGION.emr.amazonaws.com/apps-repository/emr-6.12.0/cfe221e4-88c4-464c-ad10-42f53049e5e7/repoPublicKey.txt
+enabled = 1
+baseurl = https://s3-REGION.amazonaws.com/repo.REGION.emr.amazonaws.com/apps-repository/emr-6.12.0/cfe221e4-88c4-464c-ad10-42f53049e5e7
+priority = 5
+gpgcheck = 0
+
+[emr-puppet]
+mirrorlist: http://amazonlinux.$awsregion.$awsdomain/$releasever/extras/emr-puppet/latest/$basearch/mirror.list
+enabled: 1
+gpgcheck: 1
+name: Amazon Extras repo for emr-puppet
+gpgkey: file:///etc/pki/rpm-gpg/RPM-GPG-KEY-amazon-linux-2
+priority: 10
+skip_if_unavailable: 1
+report_instanceid: yes
diff --git a/test/integration/conftest.py b/test/integration/conftest.py
index 96d328f..528863f 100644
--- a/test/integration/conftest.py
+++ b/test/integration/conftest.py
@@ -121,31 +121,9 @@ def sagemaker_session(boto_session, sagemaker_client) -> Session:
@pytest.fixture(scope="session")
def is_feature_store_available(region) -> bool:
- """Check if feature store is available in current region."""
+ """Check if feature store is available in current region. Setting list empty since FS library not available for
+ spark releases later than 3.3. Otherwise it will fail tests during release."""
feature_store_released_regions = [
- "us-east-1",
- "us-west-2",
- "us-east-2",
- "us-west-1",
- "ap-northeast-1",
- "ap-northeast-2",
- "ap-northeast-3",
- "ap-east-1",
- "ap-south-1",
- "ap-southeast-1",
- "ap-southeast-2",
- "me-south-1",
- "sa-east-1",
- "ca-central-1",
- "eu-central-1",
- "eu-north-1",
- "eu-west-1",
- "eu-west-2",
- "eu-west-3",
- "af-south-1",
- "eu-south-1",
- "cn-northwest-1",
- "cn-north-1",
]
return region in feature_store_released_regions