diff --git a/corehq/apps/fixtures/exceptions.py b/corehq/apps/fixtures/exceptions.py index c7ce7c102f6b..0975282dea94 100644 --- a/corehq/apps/fixtures/exceptions.py +++ b/corehq/apps/fixtures/exceptions.py @@ -11,10 +11,15 @@ class FixtureAPIRequestError(FixtureException): class FixtureUploadError(FixtureException): + def __init__(self, errors): self.errors = errors +class FixtureTooManyRows(FixtureException): + """Raised when an uploaded fixture exceeds MAX_FIXTURE_ROWS""" + + class FixtureTypeCheckError(Exception): pass diff --git a/corehq/apps/fixtures/upload/const.py b/corehq/apps/fixtures/upload/const.py index 66771685d137..68fedd0535de 100644 --- a/corehq/apps/fixtures/upload/const.py +++ b/corehq/apps/fixtures/upload/const.py @@ -1,4 +1,5 @@ DELETE_HEADER = "Delete(Y/N)" +MAX_FIXTURE_ROWS = 500_000 class MULTIPLE: diff --git a/corehq/apps/fixtures/upload/workbook.py b/corehq/apps/fixtures/upload/workbook.py index 306a9a50e878..9ad6c78cccfe 100644 --- a/corehq/apps/fixtures/upload/workbook.py +++ b/corehq/apps/fixtures/upload/workbook.py @@ -3,11 +3,12 @@ from django.utils.translation import gettext as _, gettext_lazy from corehq.apps.fixtures.exceptions import FixtureUploadError -from corehq.apps.fixtures.upload.const import DELETE_HEADER, INVALID, MULTIPLE +from corehq.apps.fixtures.upload.const import DELETE_HEADER, INVALID, MAX_FIXTURE_ROWS, MULTIPLE from corehq.apps.fixtures.upload.failure_messages import FAILURE_MESSAGES from corehq.apps.fixtures.utils import is_identifier_invalid from corehq.util.workbook_json.excel import ( WorkbookJSONError, + WorkbookTooManyRows, WorksheetNotFound, ) from corehq.util.workbook_json.excel import ( @@ -36,9 +37,14 @@ class _FixtureWorkbook(object): def __init__(self, file_or_filename): try: - self.workbook = excel_get_workbook(file_or_filename) + self.workbook = excel_get_workbook(file_or_filename, max_row_count=MAX_FIXTURE_ROWS) except WorkbookJSONError as e: raise FixtureUploadError([str(e)]) + except WorkbookTooManyRows as e: + raise FixtureUploadError([ + f"Lookup tables can contain a maximum of {e.max_row_count} rows. " + f"The uploaded file contains {e.actual_row_count} rows." + ]) self._rows = {} self.item_keys = WeakKeyDictionary() self.ownership = WeakKeyDictionary() diff --git a/corehq/util/workbook_json/const.py b/corehq/util/workbook_json/const.py new file mode 100644 index 000000000000..7f401bebad52 --- /dev/null +++ b/corehq/util/workbook_json/const.py @@ -0,0 +1 @@ +MAX_WORKBOOK_ROWS = 1_000_000 diff --git a/corehq/util/workbook_json/excel.py b/corehq/util/workbook_json/excel.py index 72a140138100..613c1d9e60c0 100644 --- a/corehq/util/workbook_json/excel.py +++ b/corehq/util/workbook_json/excel.py @@ -6,25 +6,16 @@ from django.core.files.uploadedfile import UploadedFile from django.utils.translation import gettext as _ +from corehq.util.workbook_json.const import MAX_WORKBOOK_ROWS -class InvalidExcelFileException(Exception): - pass - - -class JSONReaderError(Exception): - pass - - -class HeaderValueError(Exception): - pass - - -class StringTypeRequiredError(Exception): - pass - - -class WorkbookJSONError(Exception): - pass +from .exceptions import ( + HeaderValueError, + InvalidExcelFileException, + JSONReaderError, + StringTypeRequiredError, + WorkbookJSONError, + WorkbookTooManyRows, +) class IteratorJSONReader(object): @@ -145,9 +136,9 @@ def set_field_value(cls, obj, field, value): obj[field] = value -def get_workbook(file_or_filename): +def get_workbook(file_or_filename, max_row_count=MAX_WORKBOOK_ROWS): try: - return WorkbookJSONReader(file_or_filename) + return WorkbookJSONReader(file_or_filename, max_row_count=max_row_count) except (HeaderValueError, InvalidExcelFileException) as e: raise WorkbookJSONError(_( "Upload failed! " @@ -203,6 +194,8 @@ def __init__(self, worksheet, title=None): break else: width += 1 + + # ensure _max_row and _max_column properties are set self.worksheet.calculate_dimension(force=True) def iterator(): @@ -229,7 +222,7 @@ def _convert_float(value): class WorkbookJSONReader(object): - def __init__(self, file_or_filename): + def __init__(self, file_or_filename, max_row_count=MAX_WORKBOOK_ROWS): check_types = (UploadedFile, io.RawIOBase, io.BufferedIOBase) if isinstance(file_or_filename, check_types): tmp = NamedTemporaryFile(mode='wb', suffix='.xlsx', delete=False) @@ -246,12 +239,16 @@ def __init__(self, file_or_filename): self.worksheets = [] try: + total_row_count = 0 for worksheet in self.wb.worksheets: try: ws = WorksheetJSONReader(worksheet, title=worksheet.title) except IndexError: raise JSONReaderError('This Excel file has unrecognised formatting. Please try downloading ' 'the lookup table first, and then add data to it.') + total_row_count += worksheet.max_row + if total_row_count > max_row_count: + raise WorkbookTooManyRows(max_row_count, total_row_count) self.worksheets_by_title[worksheet.title] = ws self.worksheets.append(ws) finally: diff --git a/corehq/util/workbook_json/excel_importer.py b/corehq/util/workbook_json/excel_importer.py index d54d1bc8b375..36a669bf3b48 100644 --- a/corehq/util/workbook_json/excel_importer.py +++ b/corehq/util/workbook_json/excel_importer.py @@ -6,9 +6,7 @@ from corehq.util.workbook_json.excel import WorkbookJSONReader - -class UnknownFileRefException(Exception): - pass +from .exceptions import UnknownFileRefException class ExcelImporter(object): diff --git a/corehq/util/workbook_json/exceptions.py b/corehq/util/workbook_json/exceptions.py new file mode 100644 index 000000000000..a3fc529dd1b5 --- /dev/null +++ b/corehq/util/workbook_json/exceptions.py @@ -0,0 +1,31 @@ +class HeaderValueError(Exception): + pass + + +class InvalidExcelFileException(Exception): + pass + + +class JSONReaderError(Exception): + pass + + +class StringTypeRequiredError(Exception): + pass + + +class UnknownFileRefException(Exception): + pass + + +class WorkbookJSONError(Exception): + pass + + +class WorkbookTooManyRows(Exception): + """Workbook row count exceeds MAX_WORKBOOK_ROWS""" + + def __init__(self, max_row_count, actual_row_count): + super().__init__() + self.max_row_count = max_row_count + self.actual_row_count = actual_row_count