Skip to content

Commit

Permalink
Merge pull request #35426 from dimagi/gh/lookup-tables/row-limit
Browse files Browse the repository at this point in the history
Add row limit to lookup table uploads
  • Loading branch information
gherceg authored Dec 4, 2024
2 parents 103b01c + dc58ae4 commit dbe68f4
Show file tree
Hide file tree
Showing 7 changed files with 65 additions and 26 deletions.
5 changes: 5 additions & 0 deletions corehq/apps/fixtures/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,15 @@ class FixtureAPIRequestError(FixtureException):


class FixtureUploadError(FixtureException):

def __init__(self, errors):
self.errors = errors


class FixtureTooManyRows(FixtureException):
"""Raised when an uploaded fixture exceeds MAX_FIXTURE_ROWS"""


class FixtureTypeCheckError(Exception):
pass

Expand Down
1 change: 1 addition & 0 deletions corehq/apps/fixtures/upload/const.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
DELETE_HEADER = "Delete(Y/N)"
MAX_FIXTURE_ROWS = 500_000


class MULTIPLE:
Expand Down
10 changes: 8 additions & 2 deletions corehq/apps/fixtures/upload/workbook.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,12 @@
from django.utils.translation import gettext as _, gettext_lazy

from corehq.apps.fixtures.exceptions import FixtureUploadError
from corehq.apps.fixtures.upload.const import DELETE_HEADER, INVALID, MULTIPLE
from corehq.apps.fixtures.upload.const import DELETE_HEADER, INVALID, MAX_FIXTURE_ROWS, MULTIPLE
from corehq.apps.fixtures.upload.failure_messages import FAILURE_MESSAGES
from corehq.apps.fixtures.utils import is_identifier_invalid
from corehq.util.workbook_json.excel import (
WorkbookJSONError,
WorkbookTooManyRows,
WorksheetNotFound,
)
from corehq.util.workbook_json.excel import (
Expand Down Expand Up @@ -36,9 +37,14 @@ class _FixtureWorkbook(object):

def __init__(self, file_or_filename):
try:
self.workbook = excel_get_workbook(file_or_filename)
self.workbook = excel_get_workbook(file_or_filename, max_row_count=MAX_FIXTURE_ROWS)
except WorkbookJSONError as e:
raise FixtureUploadError([str(e)])
except WorkbookTooManyRows as e:
raise FixtureUploadError([
f"Lookup tables can contain a maximum of {e.max_row_count} rows. "
f"The uploaded file contains {e.actual_row_count} rows."
])
self._rows = {}
self.item_keys = WeakKeyDictionary()
self.ownership = WeakKeyDictionary()
Expand Down
1 change: 1 addition & 0 deletions corehq/util/workbook_json/const.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
MAX_WORKBOOK_ROWS = 1_000_000
39 changes: 18 additions & 21 deletions corehq/util/workbook_json/excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,25 +6,16 @@
from django.core.files.uploadedfile import UploadedFile
from django.utils.translation import gettext as _

from corehq.util.workbook_json.const import MAX_WORKBOOK_ROWS

class InvalidExcelFileException(Exception):
pass


class JSONReaderError(Exception):
pass


class HeaderValueError(Exception):
pass


class StringTypeRequiredError(Exception):
pass


class WorkbookJSONError(Exception):
pass
from .exceptions import (
HeaderValueError,
InvalidExcelFileException,
JSONReaderError,
StringTypeRequiredError,
WorkbookJSONError,
WorkbookTooManyRows,
)


class IteratorJSONReader(object):
Expand Down Expand Up @@ -145,9 +136,9 @@ def set_field_value(cls, obj, field, value):
obj[field] = value


def get_workbook(file_or_filename):
def get_workbook(file_or_filename, max_row_count=MAX_WORKBOOK_ROWS):
try:
return WorkbookJSONReader(file_or_filename)
return WorkbookJSONReader(file_or_filename, max_row_count=max_row_count)
except (HeaderValueError, InvalidExcelFileException) as e:
raise WorkbookJSONError(_(
"Upload failed! "
Expand Down Expand Up @@ -203,6 +194,8 @@ def __init__(self, worksheet, title=None):
break
else:
width += 1

# ensure _max_row and _max_column properties are set
self.worksheet.calculate_dimension(force=True)

def iterator():
Expand All @@ -229,7 +222,7 @@ def _convert_float(value):

class WorkbookJSONReader(object):

def __init__(self, file_or_filename):
def __init__(self, file_or_filename, max_row_count=MAX_WORKBOOK_ROWS):
check_types = (UploadedFile, io.RawIOBase, io.BufferedIOBase)
if isinstance(file_or_filename, check_types):
tmp = NamedTemporaryFile(mode='wb', suffix='.xlsx', delete=False)
Expand All @@ -246,12 +239,16 @@ def __init__(self, file_or_filename):
self.worksheets = []

try:
total_row_count = 0
for worksheet in self.wb.worksheets:
try:
ws = WorksheetJSONReader(worksheet, title=worksheet.title)
except IndexError:
raise JSONReaderError('This Excel file has unrecognised formatting. Please try downloading '
'the lookup table first, and then add data to it.')
total_row_count += worksheet.max_row
if total_row_count > max_row_count:
raise WorkbookTooManyRows(max_row_count, total_row_count)
self.worksheets_by_title[worksheet.title] = ws
self.worksheets.append(ws)
finally:
Expand Down
4 changes: 1 addition & 3 deletions corehq/util/workbook_json/excel_importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,7 @@

from corehq.util.workbook_json.excel import WorkbookJSONReader


class UnknownFileRefException(Exception):
pass
from .exceptions import UnknownFileRefException


class ExcelImporter(object):
Expand Down
31 changes: 31 additions & 0 deletions corehq/util/workbook_json/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
class HeaderValueError(Exception):
pass


class InvalidExcelFileException(Exception):
pass


class JSONReaderError(Exception):
pass


class StringTypeRequiredError(Exception):
pass


class UnknownFileRefException(Exception):
pass


class WorkbookJSONError(Exception):
pass


class WorkbookTooManyRows(Exception):
"""Workbook row count exceeds MAX_WORKBOOK_ROWS"""

def __init__(self, max_row_count, actual_row_count):
super().__init__()
self.max_row_count = max_row_count
self.actual_row_count = actual_row_count

0 comments on commit dbe68f4

Please sign in to comment.