From 76cd5b362d434b5855d0b3d178b40457a40e24bf Mon Sep 17 00:00:00 2001 From: Jono Yang Date: Mon, 16 Oct 2023 18:38:18 -0700 Subject: [PATCH] Create command to copy packages and related * Add minecode database Signed-off-by: Jono Yang --- .../commands/copy_legacy_package_data.py | 71 +++++++++++++++++++ purldb/settings.py | 11 ++- 2 files changed, 81 insertions(+), 1 deletion(-) create mode 100644 packagedb/management/commands/copy_legacy_package_data.py diff --git a/packagedb/management/commands/copy_legacy_package_data.py b/packagedb/management/commands/copy_legacy_package_data.py new file mode 100644 index 00000000..5714c20d --- /dev/null +++ b/packagedb/management/commands/copy_legacy_package_data.py @@ -0,0 +1,71 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# purldb is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/purldb for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import logging +import sys + +from minecode.management.commands import VerboseCommand +from packagedb.models import Package, DependentPackage, Party + +TRACE = False + +logger = logging.getLogger(__name__) +logging.basicConfig(stream=sys.stdout) +logger.setLevel(logging.INFO) + + +class Command(VerboseCommand): + def handle(self, *args, **options): + packages = Package.objects.using('minecode').all() + package_count = packages.count() + iterator = packages.iterator(chunk_size=2000) + unsaved_packages = [] + unsaved_dependencies = [] + unsaved_parties = [] + + i = 0 + for package in iterator: + if Package.objects.filter(download_url=package.download_url).exists(): + continue + if not (i % 100) and unsaved_packages: + Package.objects.bulk_create( + unsaved_packages + ) + DependentPackage.objects.bulk_create( + unsaved_dependencies + ) + Party.objects.bulk_create( + unsaved_parties + ) + unsaved_packages = [] + unsaved_dependencies = [] + unsaved_parties = [] + print(f" {i:,} / {package_count:,} saved") + else: + unsaved_packages.append(package) + dependencies = package.dependencies.all() + unsaved_dependencies.extend(list(dependencies)) + parties = package.parties.all() + unsaved_parties.extend(list(parties)) + i += 1 + + if unsaved_packages: + Package.objects.bulk_create( + unsaved_packages + ) + DependentPackage.objects.bulk_create( + unsaved_dependencies + ) + Party.objects.bulk_create( + unsaved_parties + ) + unsaved_packages = [] + unsaved_dependencies = [] + unsaved_parties = [] + print(f" {i:,} / {package_count:,} saved") diff --git a/purldb/settings.py b/purldb/settings.py index 02992fd3..56846a41 100644 --- a/purldb/settings.py +++ b/purldb/settings.py @@ -110,7 +110,16 @@ 'PASSWORD': env.str('PACKAGEDB_DB_PASSWORD', 'packagedb'), 'PORT': env.str('PACKAGEDB_DB_PORT', '5432'), 'ATOMIC_REQUESTS': True, - } + }, + 'minecode': { + 'ENGINE': env.str('PACKAGEDB_DB_ENGINE', 'django.db.backends.postgresql'), + 'HOST': env.str('PACKAGEDB_DB_HOST', 'localhost'), + 'NAME': 'minecode', + 'USER': 'minecode', + 'PASSWORD': 'minecode', + 'PORT': env.str('PACKAGEDB_DB_PORT', '5432'), + 'ATOMIC_REQUESTS': True, + }, } DEFAULT_AUTO_FIELD = "django.db.models.AutoField"