diff --git a/plugins/pom-analyzer/src/main/java/eu/f4sten/pomanalyzer/utils/DatabaseUtils.java b/plugins/pom-analyzer/src/main/java/eu/f4sten/pomanalyzer/utils/DatabaseUtils.java index 297a7fff..57bc0f20 100644 --- a/plugins/pom-analyzer/src/main/java/eu/f4sten/pomanalyzer/utils/DatabaseUtils.java +++ b/plugins/pom-analyzer/src/main/java/eu/f4sten/pomanalyzer/utils/DatabaseUtils.java @@ -18,13 +18,8 @@ import static eu.fasten.core.maven.utils.MavenUtilities.MAVEN_CENTRAL_REPO; import java.sql.Timestamp; -import java.util.List; -import eu.fasten.core.data.metadatadb.codegen.tables.Files; -import eu.fasten.core.data.metadatadb.codegen.tables.PackageVersions; -import eu.fasten.core.data.metadatadb.codegen.tables.Packages; import org.jooq.DSLContext; -import org.jooq.JSONB; import org.jooq.exception.DataAccessException; import org.jooq.impl.DSL; @@ -35,7 +30,6 @@ import eu.fasten.core.data.metadatadb.MetadataDao; import eu.fasten.core.exceptions.UnrecoverableError; import eu.fasten.core.maven.data.Pom; -import org.json.JSONObject; public class DatabaseUtils { @@ -148,40 +142,4 @@ public void pruneRetries(String key) { throw new UnrecoverableError(e); } } - - public Long getPkgVersionID(String pkgName, String version) { - try { - var pkgVerID = context.select(PackageVersions.PACKAGE_VERSIONS.ID). - from(Packages.PACKAGES, PackageVersions.PACKAGE_VERSIONS). - where(Packages.PACKAGES.PACKAGE_NAME.eq(pkgName). - and(PackageVersions.PACKAGE_VERSIONS.PACKAGE_ID.eq(Packages.PACKAGES.ID)). - and(PackageVersions.PACKAGE_VERSIONS.VERSION.eq(version))).fetchOne(); - // May produce null pointer exception - return pkgVerID.component1(); - } catch (DataAccessException e) { - throw new UnrecoverableError(e); - } - } - - public List getFilePaths4PkgVersion(Long pkgVersionID) { - try { - var filePaths = context.select(Files.FILES.PATH). - from(Files.FILES).where(Files.FILES.PACKAGE_VERSION_ID.eq(pkgVersionID)).fetch(); - return filePaths.getValues(Files.FILES.PATH); - } catch (DataAccessException e) { - throw new UnrecoverableError(e); - } - } - - public String addFileHash(Long pkgVersionID, String filePath, String fileHash) { - try { - var fileMetadata = JSONB.valueOf(String.valueOf(new JSONObject().put("swh_checksum", fileHash))); - return context.update(Files.FILES). - set(Files.FILES.METADATA, fileMetadata). - where(Files.FILES.PACKAGE_VERSION_ID.eq(pkgVersionID).and(Files.FILES.PATH.eq(filePath))). - returningResult(Files.FILES.PATH).fetchOne().getValue(Files.FILES.PATH); - } catch (DataAccessException e) { - throw new UnrecoverableError(e); - } - } } \ No newline at end of file diff --git a/plugins/swh-inserter/pom.xml b/plugins/swh-inserter/pom.xml index 91cde969..fcd4a0db 100644 --- a/plugins/swh-inserter/pom.xml +++ b/plugins/swh-inserter/pom.xml @@ -2,18 +2,19 @@ + 4.0.0 + plugins eu.fasten-project 0.0.12-SNAPSHOT - 4.0.0 - swh-inserter + eu.fasten-project - pom-analyzer + sources-provider 0.0.12-SNAPSHOT compile diff --git a/plugins/swh-inserter/src/main/java/eu/f4sten/swhinserter/DatabaseUtils.java b/plugins/swh-inserter/src/main/java/eu/f4sten/swhinserter/DatabaseUtils.java new file mode 100644 index 00000000..a34dbada --- /dev/null +++ b/plugins/swh-inserter/src/main/java/eu/f4sten/swhinserter/DatabaseUtils.java @@ -0,0 +1,73 @@ +/* + * Copyright 2022 Delft University of Technology + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package eu.f4sten.swhinserter; + +import java.util.List; + +import org.jooq.DSLContext; +import org.jooq.JSONB; +import org.jooq.exception.DataAccessException; +import org.json.JSONObject; + +import eu.fasten.core.data.metadatadb.codegen.tables.Files; +import eu.fasten.core.data.metadatadb.codegen.tables.PackageVersions; +import eu.fasten.core.data.metadatadb.codegen.tables.Packages; +import eu.fasten.core.exceptions.UnrecoverableError; + +public class DatabaseUtils { + + private final DSLContext context; + + public DatabaseUtils(DSLContext context) { + this.context = context; + } + + public Long getPkgVersionID(String pkgName, String version) { + try { + var pkgVerID = context.select(PackageVersions.PACKAGE_VERSIONS.ID) + .from(Packages.PACKAGES, PackageVersions.PACKAGE_VERSIONS) + .where(Packages.PACKAGES.PACKAGE_NAME.eq(pkgName) + .and(PackageVersions.PACKAGE_VERSIONS.PACKAGE_ID.eq(Packages.PACKAGES.ID)) + .and(PackageVersions.PACKAGE_VERSIONS.VERSION.eq(version))) + .fetchOne(); + // May produce null pointer exception + return pkgVerID.component1(); + } catch (DataAccessException e) { + throw new UnrecoverableError(e); + } + } + + public List getFilePaths4PkgVersion(Long pkgVersionID) { + try { + var filePaths = context.select(Files.FILES.PATH).from(Files.FILES) + .where(Files.FILES.PACKAGE_VERSION_ID.eq(pkgVersionID)).fetch(); + return filePaths.getValues(Files.FILES.PATH); + } catch (DataAccessException e) { + throw new UnrecoverableError(e); + } + } + + public String addFileHash(Long pkgVersionID, String filePath, String fileHash) { + try { + var fileMetadata = JSONB.valueOf(String.valueOf(new JSONObject().put("swh_checksum", fileHash))); + return context.update(Files.FILES).set(Files.FILES.METADATA, fileMetadata) + .where(Files.FILES.PACKAGE_VERSION_ID.eq(pkgVersionID).and(Files.FILES.PATH.eq(filePath))) + .returningResult(Files.FILES.PATH).fetchOne().getValue(Files.FILES.PATH); + } catch (DataAccessException e) { + throw new UnrecoverableError(e); + } + } +} \ No newline at end of file diff --git a/plugins/swh-inserter/src/main/java/eu/f4sten/swhinserter/Main.java b/plugins/swh-inserter/src/main/java/eu/f4sten/swhinserter/Main.java index d00a6d84..83569702 100644 --- a/plugins/swh-inserter/src/main/java/eu/f4sten/swhinserter/Main.java +++ b/plugins/swh-inserter/src/main/java/eu/f4sten/swhinserter/Main.java @@ -1,25 +1,40 @@ +/* + * Copyright 2022 Delft University of Technology + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package eu.f4sten.swhinserter; -import eu.f4sten.infra.AssertArgs; -import eu.f4sten.infra.Plugin; -import eu.f4sten.infra.kafka.Kafka; -import eu.f4sten.infra.kafka.Lane; -import eu.f4sten.infra.utils.IoUtils; -import eu.f4sten.pomanalyzer.utils.DatabaseUtils; -import org.apache.commons.codec.binary.Hex; -import org.apache.commons.io.FileUtils; -import org.json.JSONObject; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import javax.inject.Inject; import java.io.File; import java.io.IOException; import java.nio.charset.StandardCharsets; import java.nio.file.Path; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; -import java.util.LinkedHashMap; + +import javax.inject.Inject; + +import org.apache.commons.codec.binary.Hex; +import org.apache.commons.io.FileUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import eu.f4sten.infra.AssertArgs; +import eu.f4sten.infra.Plugin; +import eu.f4sten.infra.kafka.Kafka; +import eu.f4sten.infra.kafka.Lane; +import eu.f4sten.infra.utils.IoUtils; +import eu.f4sten.sourcesprovider.data.SourcePayload; public class Main implements Plugin { @@ -46,7 +61,7 @@ public void run() { .notNull(a -> a.kafkaIn, "kafka input topic"); // LOG.info("Subscribing to '{}'", args.kafkaIn); - kafka.subscribe(args.kafkaIn, LinkedHashMap.class, this::consume); + kafka.subscribe(args.kafkaIn, SourcePayload.class, this::consume); while (true) { LOG.debug("Polling ..."); kafka.poll(); @@ -56,49 +71,56 @@ public void run() { } } - private void consume(LinkedHashMap message, Lane lane) { - var json = new JSONObject(message); - LOG.info("Consuming next {} record {} ...", lane, json); - var pkgName = json.get("product").toString(); - var ver = json.get("version").toString(); - //var srcPath = json.get("version").toString(); + private void consume(SourcePayload payload, Lane lane) { + LOG.info("Consuming next {} record ...", lane); + var pkgName = payload.getProduct(); + var ver = payload.getVersion(); + var basePath = getBasePath(pkgName, ver); var pkgVerID = db.getPkgVersionID(pkgName, ver); - var pkgVerFilesPaths = db.getFilePaths4PkgVersion(pkgVerID); - - pkgVerFilesPaths.forEach(fp -> { - LOG.info("P: {}", fp); - var srcFileContent = readSrcFileContent(pkgName, ver, fp); - var srcFileHash = computeGitHash(srcFileContent.getBytes(StandardCharsets.UTF_8)); - db.addFileHash(pkgVerID, fp, srcFileHash); - LOG.info("Added file hash for {}", fp); + var paths = db.getFilePaths4PkgVersion(pkgVerID); + + paths.forEach(path -> { + var content = read(basePath, path); + var bytes = content.getBytes(StandardCharsets.UTF_8); + var hash = computeSwhHash(bytes); + db.addFileHash(pkgVerID, path, hash); + LOG.info("Added file hash for {}", path); }); } - private String readSrcFileContent(String pkgName, String version, String filePath) { + private File getBasePath(String pkgName, String version) { String[] ga = pkgName.split(":"); var groupID = ga[0]; var artifactID = ga[1]; - var baseDir = io.getBaseFolder(); - var srcFile = new File(Path.of(baseDir.toString(), "sources", "mvn", Character.toString(groupID.charAt(0)), - groupID, artifactID, version, filePath).toString()); + var baseDir = io.getBaseFolder().getAbsolutePath(); + var firstChar = Character.toString(groupID.charAt(0)); + var basePath = Path.of(baseDir, "sources", "mvn", firstChar, groupID, artifactID, version).toFile(); + return basePath; + } + + private String read(File basePath, String filePath) { try { + var srcFile = new File(basePath, filePath); return FileUtils.readFileToString(srcFile, StandardCharsets.UTF_8); } catch (IOException e) { - throw new RuntimeException("Could not read the file " + srcFile.toPath()); + throw new RuntimeException(e); } } - // This method computes a SWH-compatible hash - private String computeGitHash(byte[] fileContent) { - MessageDigest md = null; - try { - md = MessageDigest.getInstance("SHA-1"); - } catch (NoSuchAlgorithmException e) { - e.printStackTrace(); - } + private String computeSwhHash(byte[] fileContent) { + var md = getSha1Digest(); + // The SWH hash is based on Git, which saltes the content with "blob" md.update(String.format("blob %d\u0000", fileContent.length).getBytes()); md.update(fileContent); return Hex.encodeHexString(md.digest()); } -} + + private static MessageDigest getSha1Digest() { + try { + return MessageDigest.getInstance("SHA-1"); + } catch (NoSuchAlgorithmException e) { + throw new RuntimeException(e); + } + } +} \ No newline at end of file diff --git a/plugins/swh-inserter/src/main/java/eu/f4sten/swhinserter/SwhInserterArgs.java b/plugins/swh-inserter/src/main/java/eu/f4sten/swhinserter/SwhInserterArgs.java index 9fc6d90a..2867c947 100644 --- a/plugins/swh-inserter/src/main/java/eu/f4sten/swhinserter/SwhInserterArgs.java +++ b/plugins/swh-inserter/src/main/java/eu/f4sten/swhinserter/SwhInserterArgs.java @@ -1,9 +1,25 @@ +/* + * Copyright 2022 Delft University of Technology + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package eu.f4sten.swhinserter; import com.beust.jcommander.Parameter; + import eu.f4sten.infra.kafka.DefaultTopics; public class SwhInserterArgs { - @Parameter(names = "--swhinserter.kafkaIn", arity = 1) - public String kafkaIn = DefaultTopics.SOURCES_PROVIDER; + @Parameter(names = "--swhinserter.kafkaIn", arity = 1) + public String kafkaIn = DefaultTopics.SOURCES_PROVIDER; } \ No newline at end of file diff --git a/plugins/swh-inserter/src/main/java/eu/f4sten/swhinserter/SwhInserterInjectorConfig.java b/plugins/swh-inserter/src/main/java/eu/f4sten/swhinserter/SwhInserterInjectorConfig.java index d5d302f4..2586bb09 100644 --- a/plugins/swh-inserter/src/main/java/eu/f4sten/swhinserter/SwhInserterInjectorConfig.java +++ b/plugins/swh-inserter/src/main/java/eu/f4sten/swhinserter/SwhInserterInjectorConfig.java @@ -1,8 +1,31 @@ +/* + * Copyright 2022 Delft University of Technology + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package eu.f4sten.swhinserter; +import org.jooq.SQLDialect; +import org.jooq.impl.DSL; + import com.google.inject.Binder; +import com.google.inject.Provides; + import eu.f4sten.infra.IInjectorConfig; import eu.f4sten.infra.InjectorConfig; +import eu.f4sten.infra.json.JsonUtils; +import eu.f4sten.infra.utils.PostgresConnector; +import eu.f4sten.infra.utils.Version; @InjectorConfig public class SwhInserterInjectorConfig implements IInjectorConfig { @@ -18,4 +41,10 @@ public void configure(Binder binder) { binder.bind(SwhInserterArgs.class).toInstance(args); } -} + @Provides + public DatabaseUtils bindDatabaseUtils(PostgresConnector pc, JsonUtils json, Version version) { + var c = pc.getNewConnection(); + var dslContext = DSL.using(c, SQLDialect.POSTGRES); + return new DatabaseUtils(dslContext); + } +} \ No newline at end of file