diff --git a/src/harmony/matching/matcher.py b/src/harmony/matching/matcher.py index 2dfd2da..e450f76 100644 --- a/src/harmony/matching/matcher.py +++ b/src/harmony/matching/matcher.py @@ -45,7 +45,8 @@ import os -def get_batch_size(default=50): +# This has been tested on 16 GB RAM production server, 1000 seems a safe number (TW, 15 Dec 2024) +def get_batch_size(default=1000): try: batch_size = int(os.getenv("BATCH_SIZE", default)) return max(batch_size, 0) diff --git a/tests/test_batching_in_matcher.py b/tests/test_batching_in_matcher.py index b69a5bf..da23f12 100644 --- a/tests/test_batching_in_matcher.py +++ b/tests/test_batching_in_matcher.py @@ -67,14 +67,14 @@ def test_negative_batch_size(self): @mock.patch.dict(os.environ, {}, clear=True) def test_default_batch_size(self): - """Test when BATCH_SIZE is not set, it defaults to 50.""" + """Test when BATCH_SIZE is not set, it defaults to 1000.""" items = [f"item{i}" for i in range(10)] results = process_items_in_batches(items, mock_llm_function) self.assertEqual(len(results), 10) @mock.patch.dict(os.environ, {"BATCH_SIZE": "invalid"}) def test_invalid_batch_size(self): - """Test when BATCH_SIZE is invalid, it defaults to 50.""" + """Test when BATCH_SIZE is invalid, it defaults to 1000.""" items = [f"item{i}" for i in range(10)] results = process_items_in_batches(items, mock_llm_function) self.assertEqual(len(results), 10)