diff --git a/setup.py b/setup.py index bee6a64..dffbf93 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ setup( name = "sioworkers", - version = '1.5.2', + version = '1.5.3', author = "SIO2 Project Team", author_email = 'sio2@sio2project.mimuw.edu.pl', description = "Programming contest judging infrastructure", diff --git a/sio/archive_utils.py b/sio/archive_utils.py new file mode 100644 index 0000000..af8d1a2 --- /dev/null +++ b/sio/archive_utils.py @@ -0,0 +1,194 @@ +# Taken from +# https://github.com/gdub/python-archive/blob/master/archive/__init__.py +# Modified + +# Copyright (c) Gary Wilson Jr. and contributors. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +import os +import tarfile +import zipfile + +from sio.workers.util import RegisteredSubclassesBase + + +class ArchiveException(Exception): + """Base exception class for all archive errors.""" + + +class UnrecognizedArchiveFormat(ArchiveException): + """Error raised when passed file is not a recognized archive format.""" + + +class UnsafeArchive(ArchiveException): + """ + Error raised when passed file contains paths that would be extracted + outside of the target directory. + """ + + +def extract(path, member, to_path='', ext='', **kwargs): + """ + Unpack member from the tar or zip file at the specified path to the directory + specified by to_path. + """ + Archive(path, ext=ext).extract(member, to_path, **kwargs) + + +class Archive(RegisteredSubclassesBase): + """ + The external API class that encapsulates an archive implementation. + """ + + @classmethod + def __classinit__(cls): + this_cls = globals().get('Archive', cls) + super(this_cls, cls).__classinit__() + cls.handled_archives = set() + + @classmethod + def register_subclass(cls, subcls): + if cls is not subcls: + cls.handled_archives.add(subcls) + + @classmethod + def get(cls, file): + """ + Return the proper Archive implementation class, based on the file type. + """ + filename = None + if isinstance(file, str): + filename = file + else: + try: + filename = file.name + except AttributeError: + raise UnrecognizedArchiveFormat( + "File object not a recognized archive format.") + for subcls in cls.handled_archives: + if subcls.is_archive(filename): + return subcls(filename) + + raise UnrecognizedArchiveFormat( + "Path not a recognized archive format: %s" % filename) + + def extract(self, *args, **kwargs): + self._archive.extract(*args, **kwargs) + + def list(self): + self._archive.list() + + def filenames(self): + return self._archive.filenames() + + +class BaseArchive(Archive): + """ + Base Archive class. Implementations should inherit this class. + """ + abstract = True + + def __del__(self): + if hasattr(self, "_archive"): + self._archive.close() + + def list(self): + raise NotImplementedError() + + def filenames(self): + """ + Return a list of the filenames contained in the archive. + """ + raise NotImplementedError() + + @staticmethod + def is_archive(filename): + return NotImplementedError() + + def _extract(self, member, to_path): + """ + Performs the actual extraction. Separate from 'extract' method so that + we don't recurse when subclasses don't declare their own 'extract' + method. + """ + self._archive.extract(member, path=to_path) + + def extract(self, member, to_path='', method='safe'): + if method == 'safe': + self.check_files(to_path) + elif method == 'insecure': + pass + else: + raise ValueError("Invalid method option") + self._extract(member, to_path) + + def check_files(self, to_path=None): + """ + Check that all of the files contained in the archive are within the + target directory. + """ + if to_path: + target_path = os.path.normpath(os.path.realpath(to_path)) + else: + target_path = os.getcwd() + for filename in self.filenames(): + extract_path = os.path.join(target_path, filename) + extract_path = os.path.normpath(os.path.realpath(extract_path)) + if not extract_path.startswith(target_path): + raise UnsafeArchive( + "Archive member destination is outside the target" + " directory. member: %s" % filename) + + +class TarArchive(BaseArchive): + + def __init__(self, file): + # tarfile's open uses different parameters for file path vs. file obj. + if isinstance(file, str): + self._archive = tarfile.open(name=file) + else: + self._archive = tarfile.open(fileobj=file) + + def list(self, *args, **kwargs): + self._archive.list(*args, **kwargs) + + def filenames(self): + return self._archive.getnames() + + @staticmethod + def is_archive(filename): + return tarfile.is_tarfile(filename) + + +class ZipArchive(BaseArchive): + + def __init__(self, file): + # ZipFile's 'file' parameter can be path (string) or file-like obj. + self._archive = zipfile.ZipFile(file) + + def list(self, *args, **kwargs): + self._archive.printdir(*args, **kwargs) + + def filenames(self): + return self._archive.namelist() + + @staticmethod + def is_archive(filename): + return zipfile.is_zipfile(filename) diff --git a/sio/compilers/test/sources/simple.zip b/sio/compilers/test/sources/simple.zip new file mode 100644 index 0000000..d6ccbe8 Binary files /dev/null and b/sio/compilers/test/sources/simple.zip differ diff --git a/sio/compilers/test/test_compilers.py b/sio/compilers/test/test_compilers.py index 5c16ecd..7d65710 100644 --- a/sio/compilers/test/test_compilers.py +++ b/sio/compilers/test/test_compilers.py @@ -175,13 +175,51 @@ def test_output_compilation_and_running(source): ) eq_(post_run_env['result_code'], 'OK') - ft.download(post_run_env, 'out_file', tempcwd('out.txt')) ft.download({'source_file': source}, 'source_file', tempcwd('source.txt')) - with open(tempcwd('out.txt'), 'r') as outfile: + with open(tempcwd('out'), 'r') as outfile: with open(tempcwd('source.txt'), 'r') as sourcefile: eq_(outfile.read(), sourcefile.read()) +@pytest.mark.parametrize("source", [('/simple.zip')]) +def test_output_archive_compilation_and_running(source): + with TemporaryCwd(): + upload_files() + result_env = run( + { + 'source_file': source, + 'compiler': 'output-only', + } + ) + eq_(result_env['result_code'], 'OK') + eq_(result_env['exec_info'], {'mode': 'output-only'}) + + ft.download(result_env, 'out_file', tempcwd('out.zip')) + ft.download({'source_file': source}, 'source_file', tempcwd('source.zip')) + with open(tempcwd('out.zip'), 'rb') as outfile: + with open(tempcwd('source.zip'), 'rb') as sourcefile: + eq_(outfile.read(), sourcefile.read()) + + # abc0.out in simple.zip is the same as simple.txt + post_run_env = run_from_executors( + { + 'exec_info': result_env['exec_info'], + 'exe_file': result_env['out_file'], + 'check_output': True, + 'hint_file': '/simple.txt', + 'name': '0', + 'problem_short_name': 'abc', + }, + executor=None, + ) + eq_(post_run_env['result_code'], 'OK') + + ft.download({'source_file': '/simple.txt'}, 'source_file', tempcwd('simple.txt')) + with open(tempcwd('out'), 'r') as outfile: + with open(tempcwd('simple.txt'), 'r') as sourcefile: + eq_(outfile.read(), sourcefile.read()) + + def _make_compilation_with_additional_library_cases(): compilers = ['system-'] if ENABLE_SANDBOXED_COMPILERS: diff --git a/sio/executors/common.py b/sio/executors/common.py index 462d626..438b545 100644 --- a/sio/executors/common.py +++ b/sio/executors/common.py @@ -2,6 +2,7 @@ import os from shutil import rmtree from zipfile import ZipFile, is_zipfile +from sio.archive_utils import Archive, UnrecognizedArchiveFormat, UnsafeArchive from sio.workers import ft from sio.workers.util import decode_fields, replace_invalid_UTF, tempcwd from sio.workers.file_runners import get_file_runner @@ -10,6 +11,9 @@ import six +import logging +logger = logging.getLogger(__name__) + def _populate_environ(renv, environ): """Takes interesting fields from renv into environ""" for key in ('time_used', 'mem_used', 'num_syscalls'): @@ -112,13 +116,32 @@ def _run(environ, executor, use_sandboxes): def _fake_run_as_exe_is_output_file(environ): - # later code expects 'out' file to be present after compilation - ft.download(environ, 'exe_file', tempcwd('out')) + try: + ft.download(environ, 'exe_file', tempcwd('outs_archive')) + archive = Archive.get(tempcwd('outs_archive')) + problem_short_name = environ['problem_short_name'] + test_name = f'{problem_short_name}{environ["name"]}.out' + logger.info('Archive with outs provided') + if test_name in archive.filenames(): + archive.extract(test_name, to_path=tempcwd()) + os.rename(os.path.join(tempcwd(), test_name), tempcwd('out')) + else: + logger.info(f'Output {test_name} not found in archive') + return { + 'result_code': 'WA', + 'result_string': 'output not provided', + } + except UnrecognizedArchiveFormat as e: + # regular text file + logger.info('Text out provided') + # later code expects 'out' file to be present after compilation + ft.download(environ, 'exe_file', tempcwd('out')) + except UnsafeArchive as e: + logger.warning(six.text_type(e)) return { - # copy filetracker id of 'exe_file' as 'out_file' (thanks to that checker will grab it) - 'out_file': environ['exe_file'], # 'result_code' is left by executor, as executor is not used # this variable has to be set manually 'result_code': 'OK', 'result_string': 'ok', } +