Skip to content

Commit

Permalink
Pair-programming pass through the SWH-hash calculator
Browse files Browse the repository at this point in the history
  • Loading branch information
proksch committed Aug 15, 2022
1 parent 972a141 commit 1595fff
Show file tree
Hide file tree
Showing 6 changed files with 190 additions and 91 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,8 @@
import static eu.fasten.core.maven.utils.MavenUtilities.MAVEN_CENTRAL_REPO;

import java.sql.Timestamp;
import java.util.List;

import eu.fasten.core.data.metadatadb.codegen.tables.Files;
import eu.fasten.core.data.metadatadb.codegen.tables.PackageVersions;
import eu.fasten.core.data.metadatadb.codegen.tables.Packages;
import org.jooq.DSLContext;
import org.jooq.JSONB;
import org.jooq.exception.DataAccessException;
import org.jooq.impl.DSL;

Expand All @@ -35,7 +30,6 @@
import eu.fasten.core.data.metadatadb.MetadataDao;
import eu.fasten.core.exceptions.UnrecoverableError;
import eu.fasten.core.maven.data.Pom;
import org.json.JSONObject;

public class DatabaseUtils {

Expand Down Expand Up @@ -148,40 +142,4 @@ public void pruneRetries(String key) {
throw new UnrecoverableError(e);
}
}

public Long getPkgVersionID(String pkgName, String version) {
try {
var pkgVerID = context.select(PackageVersions.PACKAGE_VERSIONS.ID).
from(Packages.PACKAGES, PackageVersions.PACKAGE_VERSIONS).
where(Packages.PACKAGES.PACKAGE_NAME.eq(pkgName).
and(PackageVersions.PACKAGE_VERSIONS.PACKAGE_ID.eq(Packages.PACKAGES.ID)).
and(PackageVersions.PACKAGE_VERSIONS.VERSION.eq(version))).fetchOne();
// May produce null pointer exception
return pkgVerID.component1();
} catch (DataAccessException e) {
throw new UnrecoverableError(e);
}
}

public List<String> getFilePaths4PkgVersion(Long pkgVersionID) {
try {
var filePaths = context.select(Files.FILES.PATH).
from(Files.FILES).where(Files.FILES.PACKAGE_VERSION_ID.eq(pkgVersionID)).fetch();
return filePaths.getValues(Files.FILES.PATH);
} catch (DataAccessException e) {
throw new UnrecoverableError(e);
}
}

public String addFileHash(Long pkgVersionID, String filePath, String fileHash) {
try {
var fileMetadata = JSONB.valueOf(String.valueOf(new JSONObject().put("swh_checksum", fileHash)));
return context.update(Files.FILES).
set(Files.FILES.METADATA, fileMetadata).
where(Files.FILES.PACKAGE_VERSION_ID.eq(pkgVersionID).and(Files.FILES.PATH.eq(filePath))).
returningResult(Files.FILES.PATH).fetchOne().getValue(Files.FILES.PATH);
} catch (DataAccessException e) {
throw new UnrecoverableError(e);
}
}
}
7 changes: 4 additions & 3 deletions plugins/swh-inserter/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,19 @@
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<parent>
<artifactId>plugins</artifactId>
<groupId>eu.fasten-project</groupId>
<version>0.0.12-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>

<artifactId>swh-inserter</artifactId>

<dependencies>
<dependency>
<groupId>eu.fasten-project</groupId>
<artifactId>pom-analyzer</artifactId>
<artifactId>sources-provider</artifactId>
<version>0.0.12-SNAPSHOT</version>
<scope>compile</scope>
</dependency>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
/*
* Copyright 2022 Delft University of Technology
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package eu.f4sten.swhinserter;

import java.util.List;

import org.jooq.DSLContext;
import org.jooq.JSONB;
import org.jooq.exception.DataAccessException;
import org.json.JSONObject;

import eu.fasten.core.data.metadatadb.codegen.tables.Files;
import eu.fasten.core.data.metadatadb.codegen.tables.PackageVersions;
import eu.fasten.core.data.metadatadb.codegen.tables.Packages;
import eu.fasten.core.exceptions.UnrecoverableError;

public class DatabaseUtils {

private final DSLContext context;

public DatabaseUtils(DSLContext context) {
this.context = context;
}

public Long getPkgVersionID(String pkgName, String version) {
try {
var pkgVerID = context.select(PackageVersions.PACKAGE_VERSIONS.ID)
.from(Packages.PACKAGES, PackageVersions.PACKAGE_VERSIONS)
.where(Packages.PACKAGES.PACKAGE_NAME.eq(pkgName)
.and(PackageVersions.PACKAGE_VERSIONS.PACKAGE_ID.eq(Packages.PACKAGES.ID))
.and(PackageVersions.PACKAGE_VERSIONS.VERSION.eq(version)))
.fetchOne();
// May produce null pointer exception
return pkgVerID.component1();
} catch (DataAccessException e) {
throw new UnrecoverableError(e);
}
}

public List<String> getFilePaths4PkgVersion(Long pkgVersionID) {
try {
var filePaths = context.select(Files.FILES.PATH).from(Files.FILES)
.where(Files.FILES.PACKAGE_VERSION_ID.eq(pkgVersionID)).fetch();
return filePaths.getValues(Files.FILES.PATH);
} catch (DataAccessException e) {
throw new UnrecoverableError(e);
}
}

public String addFileHash(Long pkgVersionID, String filePath, String fileHash) {
try {
var fileMetadata = JSONB.valueOf(String.valueOf(new JSONObject().put("swh_checksum", fileHash)));
return context.update(Files.FILES).set(Files.FILES.METADATA, fileMetadata)
.where(Files.FILES.PACKAGE_VERSION_ID.eq(pkgVersionID).and(Files.FILES.PATH.eq(filePath)))
.returningResult(Files.FILES.PATH).fetchOne().getValue(Files.FILES.PATH);
} catch (DataAccessException e) {
throw new UnrecoverableError(e);
}
}
}
108 changes: 65 additions & 43 deletions plugins/swh-inserter/src/main/java/eu/f4sten/swhinserter/Main.java
Original file line number Diff line number Diff line change
@@ -1,25 +1,40 @@
/*
* Copyright 2022 Delft University of Technology
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package eu.f4sten.swhinserter;

import eu.f4sten.infra.AssertArgs;
import eu.f4sten.infra.Plugin;
import eu.f4sten.infra.kafka.Kafka;
import eu.f4sten.infra.kafka.Lane;
import eu.f4sten.infra.utils.IoUtils;
import eu.f4sten.pomanalyzer.utils.DatabaseUtils;
import org.apache.commons.codec.binary.Hex;
import org.apache.commons.io.FileUtils;
import org.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.inject.Inject;
import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Path;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.LinkedHashMap;

import javax.inject.Inject;

import org.apache.commons.codec.binary.Hex;
import org.apache.commons.io.FileUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import eu.f4sten.infra.AssertArgs;
import eu.f4sten.infra.Plugin;
import eu.f4sten.infra.kafka.Kafka;
import eu.f4sten.infra.kafka.Lane;
import eu.f4sten.infra.utils.IoUtils;
import eu.f4sten.sourcesprovider.data.SourcePayload;

public class Main implements Plugin {

Expand All @@ -46,7 +61,7 @@ public void run() {
.notNull(a -> a.kafkaIn, "kafka input topic"); //

LOG.info("Subscribing to '{}'", args.kafkaIn);
kafka.subscribe(args.kafkaIn, LinkedHashMap.class, this::consume);
kafka.subscribe(args.kafkaIn, SourcePayload.class, this::consume);
while (true) {
LOG.debug("Polling ...");
kafka.poll();
Expand All @@ -56,49 +71,56 @@ public void run() {
}
}

private void consume(LinkedHashMap<String, String> message, Lane lane) {
var json = new JSONObject(message);
LOG.info("Consuming next {} record {} ...", lane, json);
var pkgName = json.get("product").toString();
var ver = json.get("version").toString();
//var srcPath = json.get("version").toString();
private void consume(SourcePayload payload, Lane lane) {
LOG.info("Consuming next {} record ...", lane);
var pkgName = payload.getProduct();
var ver = payload.getVersion();

var basePath = getBasePath(pkgName, ver);
var pkgVerID = db.getPkgVersionID(pkgName, ver);
var pkgVerFilesPaths = db.getFilePaths4PkgVersion(pkgVerID);

pkgVerFilesPaths.forEach(fp -> {
LOG.info("P: {}", fp);
var srcFileContent = readSrcFileContent(pkgName, ver, fp);
var srcFileHash = computeGitHash(srcFileContent.getBytes(StandardCharsets.UTF_8));
db.addFileHash(pkgVerID, fp, srcFileHash);
LOG.info("Added file hash for {}", fp);
var paths = db.getFilePaths4PkgVersion(pkgVerID);

paths.forEach(path -> {
var content = read(basePath, path);
var bytes = content.getBytes(StandardCharsets.UTF_8);
var hash = computeSwhHash(bytes);
db.addFileHash(pkgVerID, path, hash);
LOG.info("Added file hash for {}", path);
});
}

private String readSrcFileContent(String pkgName, String version, String filePath) {
private File getBasePath(String pkgName, String version) {
String[] ga = pkgName.split(":");
var groupID = ga[0];
var artifactID = ga[1];
var baseDir = io.getBaseFolder();
var srcFile = new File(Path.of(baseDir.toString(), "sources", "mvn", Character.toString(groupID.charAt(0)),
groupID, artifactID, version, filePath).toString());
var baseDir = io.getBaseFolder().getAbsolutePath();
var firstChar = Character.toString(groupID.charAt(0));
var basePath = Path.of(baseDir, "sources", "mvn", firstChar, groupID, artifactID, version).toFile();
return basePath;
}

private String read(File basePath, String filePath) {
try {
var srcFile = new File(basePath, filePath);
return FileUtils.readFileToString(srcFile, StandardCharsets.UTF_8);
} catch (IOException e) {
throw new RuntimeException("Could not read the file " + srcFile.toPath());
throw new RuntimeException(e);
}
}

// This method computes a SWH-compatible hash
private String computeGitHash(byte[] fileContent) {
MessageDigest md = null;
try {
md = MessageDigest.getInstance("SHA-1");
} catch (NoSuchAlgorithmException e) {
e.printStackTrace();
}
private String computeSwhHash(byte[] fileContent) {
var md = getSha1Digest();
// The SWH hash is based on Git, which saltes the content with "blob"
md.update(String.format("blob %d\u0000", fileContent.length).getBytes());
md.update(fileContent);
return Hex.encodeHexString(md.digest());
}
}

private static MessageDigest getSha1Digest() {
try {
return MessageDigest.getInstance("SHA-1");
} catch (NoSuchAlgorithmException e) {
throw new RuntimeException(e);
}
}
}
Original file line number Diff line number Diff line change
@@ -1,9 +1,25 @@
/*
* Copyright 2022 Delft University of Technology
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package eu.f4sten.swhinserter;

import com.beust.jcommander.Parameter;

import eu.f4sten.infra.kafka.DefaultTopics;

public class SwhInserterArgs {
@Parameter(names = "--swhinserter.kafkaIn", arity = 1)
public String kafkaIn = DefaultTopics.SOURCES_PROVIDER;
@Parameter(names = "--swhinserter.kafkaIn", arity = 1)
public String kafkaIn = DefaultTopics.SOURCES_PROVIDER;
}
Original file line number Diff line number Diff line change
@@ -1,8 +1,31 @@
/*
* Copyright 2022 Delft University of Technology
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package eu.f4sten.swhinserter;

import org.jooq.SQLDialect;
import org.jooq.impl.DSL;

import com.google.inject.Binder;
import com.google.inject.Provides;

import eu.f4sten.infra.IInjectorConfig;
import eu.f4sten.infra.InjectorConfig;
import eu.f4sten.infra.json.JsonUtils;
import eu.f4sten.infra.utils.PostgresConnector;
import eu.f4sten.infra.utils.Version;

@InjectorConfig
public class SwhInserterInjectorConfig implements IInjectorConfig {
Expand All @@ -18,4 +41,10 @@ public void configure(Binder binder) {
binder.bind(SwhInserterArgs.class).toInstance(args);
}

}
@Provides
public DatabaseUtils bindDatabaseUtils(PostgresConnector pc, JsonUtils json, Version version) {
var c = pc.getNewConnection();
var dslContext = DSL.using(c, SQLDialect.POSTGRES);
return new DatabaseUtils(dslContext);
}
}

0 comments on commit 1595fff

Please sign in to comment.