Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add row limit to lookup table uploads #35426

Merged
merged 5 commits into from
Dec 4, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions corehq/apps/fixtures/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,15 @@ class FixtureAPIRequestError(FixtureException):


class FixtureUploadError(FixtureException):

def __init__(self, errors):
self.errors = errors


class FixtureTooManyRows(FixtureException):
"""Raised when an uploaded fixture exceeds MAX_FIXTURE_ROWS"""


class FixtureTypeCheckError(Exception):
pass

Expand Down
1 change: 1 addition & 0 deletions corehq/apps/fixtures/upload/const.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
DELETE_HEADER = "Delete(Y/N)"
MAX_FIXTURE_ROWS = 500_000


class MULTIPLE:
Expand Down
10 changes: 8 additions & 2 deletions corehq/apps/fixtures/upload/workbook.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,12 @@
from django.utils.translation import gettext as _, gettext_lazy

from corehq.apps.fixtures.exceptions import FixtureUploadError
from corehq.apps.fixtures.upload.const import DELETE_HEADER, INVALID, MULTIPLE
from corehq.apps.fixtures.upload.const import DELETE_HEADER, INVALID, MAX_FIXTURE_ROWS, MULTIPLE
from corehq.apps.fixtures.upload.failure_messages import FAILURE_MESSAGES
from corehq.apps.fixtures.utils import is_identifier_invalid
from corehq.util.workbook_json.excel import (
WorkbookJSONError,
WorkbookTooManyRows,
WorksheetNotFound,
)
from corehq.util.workbook_json.excel import (
Expand Down Expand Up @@ -36,9 +37,14 @@ class _FixtureWorkbook(object):

def __init__(self, file_or_filename):
try:
self.workbook = excel_get_workbook(file_or_filename)
self.workbook = excel_get_workbook(file_or_filename, max_row_count=MAX_FIXTURE_ROWS)
except WorkbookJSONError as e:
raise FixtureUploadError([str(e)])
except WorkbookTooManyRows as e:
raise FixtureUploadError([
f"Lookup tables can contain a maximum of {e.max_row_count} rows. "
f"The uploaded file contains {e.actual_row_count} rows."
])
self._rows = {}
self.item_keys = WeakKeyDictionary()
self.ownership = WeakKeyDictionary()
Expand Down
1 change: 1 addition & 0 deletions corehq/util/workbook_json/const.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
MAX_WORKBOOK_ROWS = 1_000_000
46 changes: 25 additions & 21 deletions corehq/util/workbook_json/excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,25 +6,16 @@
from django.core.files.uploadedfile import UploadedFile
from django.utils.translation import gettext as _

from corehq.util.workbook_json.const import MAX_WORKBOOK_ROWS

class InvalidExcelFileException(Exception):
pass


class JSONReaderError(Exception):
pass


class HeaderValueError(Exception):
pass


class StringTypeRequiredError(Exception):
pass


class WorkbookJSONError(Exception):
pass
from .exceptions import (
HeaderValueError,
InvalidExcelFileException,
JSONReaderError,
StringTypeRequiredError,
WorkbookJSONError,
WorkbookTooManyRows,
)


class IteratorJSONReader(object):
Expand Down Expand Up @@ -145,9 +136,9 @@ def set_field_value(cls, obj, field, value):
obj[field] = value


def get_workbook(file_or_filename):
def get_workbook(file_or_filename, max_row_count=MAX_WORKBOOK_ROWS):
try:
return WorkbookJSONReader(file_or_filename)
return WorkbookJSONReader(file_or_filename, max_row_count=max_row_count)
except (HeaderValueError, InvalidExcelFileException) as e:
raise WorkbookJSONError(_(
"Upload failed! "
Expand Down Expand Up @@ -226,10 +217,19 @@ def _convert_float(value):
yield cell_values
super(WorksheetJSONReader, self).__init__(iterator())

def row_count(self):
def parse_dimension(dimension):
import re
match = re.search(r'(\d+)', dimension)
if match:
return int(match.group(1))
return 0
return parse_dimension(self.worksheet.calculate_dimension())
gherceg marked this conversation as resolved.
Show resolved Hide resolved


class WorkbookJSONReader(object):

def __init__(self, file_or_filename):
def __init__(self, file_or_filename, max_row_count=MAX_WORKBOOK_ROWS):
check_types = (UploadedFile, io.RawIOBase, io.BufferedIOBase)
if isinstance(file_or_filename, check_types):
tmp = NamedTemporaryFile(mode='wb', suffix='.xlsx', delete=False)
Expand All @@ -246,12 +246,16 @@ def __init__(self, file_or_filename):
self.worksheets = []

try:
total_row_count = 0
for worksheet in self.wb.worksheets:
try:
ws = WorksheetJSONReader(worksheet, title=worksheet.title)
except IndexError:
raise JSONReaderError('This Excel file has unrecognised formatting. Please try downloading '
'the lookup table first, and then add data to it.')
total_row_count += ws.row_count()
if total_row_count > max_row_count:
raise WorkbookTooManyRows(max_row_count, total_row_count)
self.worksheets_by_title[worksheet.title] = ws
self.worksheets.append(ws)
finally:
Expand Down
4 changes: 1 addition & 3 deletions corehq/util/workbook_json/excel_importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,7 @@

from corehq.util.workbook_json.excel import WorkbookJSONReader


class UnknownFileRefException(Exception):
pass
from .exceptions import UnknownFileRefException


class ExcelImporter(object):
Expand Down
31 changes: 31 additions & 0 deletions corehq/util/workbook_json/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
class HeaderValueError(Exception):
pass


class InvalidExcelFileException(Exception):
pass


class JSONReaderError(Exception):
pass


class StringTypeRequiredError(Exception):
pass


class UnknownFileRefException(Exception):
pass


class WorkbookJSONError(Exception):
pass


class WorkbookTooManyRows(Exception):
"""Workbook row count exceeds MAX_WORKBOOK_ROWS"""

def __init__(self, max_row_count, actual_row_count):
super().__init__()
self.max_row_count = max_row_count
self.actual_row_count = actual_row_count
Loading