Skip to content

Commit

Permalink
Remove 30 day staleness limit restriction for integration scanning
Browse files Browse the repository at this point in the history
  • Loading branch information
JeffreyHuynh1 committed Nov 9, 2023
1 parent d7b61a5 commit 6b4046f
Show file tree
Hide file tree
Showing 8 changed files with 13 additions and 127 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
## v0.3.1

Features:
- Remove 30 day limit restriction for integration scanning

## v0.3.0

Features:
Expand Down
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "broker"
version = "0.3.0"
version = "0.3.1"
edition = "2021"
description = "The bridge between FOSSA and internal DevOps services"
readme = "README.md"
Expand Down
4 changes: 2 additions & 2 deletions docs/reference/faq.md
Original file line number Diff line number Diff line change
Expand Up @@ -112,11 +112,11 @@ For `git` integrations, Broker considers the following to be a "reference":
- Any tag
- Any branch `HEAD` commit

Broker enumerates the list of references that were created in the last 30 days.
Broker enumerates the list of references

_Note that tags cannot be modified; once a tag has been created to "modify" it in `git` requires that the tag is_
_deleted and then created with the same name. Such modifications are actually creation of the tag,_
_and as such any tag that was "modified" within the last 30 days is also scanned by Broker._
_and as such any tag that was "modified" is also scanned by Broker._

After enumerating the list of references, Broker then uses its local database to filter any reference that it has already scanned.
Note that this means that a modified tag would then be filtered at this step,
Expand Down
2 changes: 1 addition & 1 deletion src/api/remote.rs
Original file line number Diff line number Diff line change
Expand Up @@ -406,7 +406,7 @@ pub trait RemoteProvider {
reference: &Self::Reference,
) -> Result<TempDir, Report<RemoteProviderError>>;

/// List references that have been updated in the last 30 days.
/// List references that have been updated
async fn references(&self) -> Result<Vec<Self::Reference>, Report<RemoteProviderError>>;
}

Expand Down
120 changes: 2 additions & 118 deletions src/api/remote/git/repository.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,13 @@
//! Wrapper for Git
use base64::{engine::general_purpose, Engine as _};
use error_stack::{bail, report, Report};
use futures::StreamExt;
use itertools::Itertools;
use std::env;
use std::fs::File;
use std::io::Write;
use std::path::{Path, PathBuf};
use tempfile::{tempdir, NamedTempFile, TempDir};
use thiserror::Error;
use time::{ext::NumericalDuration, format_description::well_known::Iso8601, OffsetDateTime};
use tracing::debug;

use super::Reference;
use crate::ext::command::{Command, CommandDescriber, Output, OutputProvider, Value};
Expand Down Expand Up @@ -72,11 +69,10 @@ impl Error {
}
}

/// List references that have been updated in the last 30 days.
/// List references that have been updated
#[tracing::instrument]
pub async fn list_references(transport: &Transport) -> Result<Vec<Reference>, Report<Error>> {
let references = get_all_references(transport).await?;
references_that_need_scanning(transport, references).await
get_all_references(transport).await
}

/// Clone a [`Reference`] into a temporary directory.
Expand Down Expand Up @@ -181,118 +177,6 @@ pub fn pastable_ls_remote_command(transport: &Transport) -> Result<String, Repor
pastable_git_command(transport, &ls_remote_args(transport), None)
}

// Days until a commit is considered stale and will not be scanned
const DAYS_UNTIL_STALE: i64 = 30;

/// Get a list of all branches and tags for the given integration
/// This is done by doing this:
///
/// git init
/// git remote add origin <URL to git repo>
/// git ls-remote --quiet
///
/// and then parsing the results of git ls-remote
/// Filter references by looking at the date of their head commit and only including repos
/// that have been updated in the last 30 days
/// To do this we need a cloned repository so that we can run
/// `git log <some format string that includes that date of the commit> <branch_or_tag_name>`
/// in the cloned repo for each branch or tag
#[tracing::instrument(skip_all)]
async fn references_that_need_scanning(
transport: &Transport,
references: Vec<Reference>,
) -> Result<Vec<Reference>, Report<Error>> {
let tmpdir = blobless_clone(transport, None).await?;

// Filtering over the references async is a little harder than a sync filter,
// since the item being filtered may cross threads.
// Using `filter_map` simplifies this for the most part.
//
// Despite the fact that this is an async stream, it is currently still operating
// serially; to make it parallel use `buffer` or `buffer_unordered`.
let filtered_references = futures::stream::iter(references)
.filter_map(|reference| async {
reference_needs_scanning(transport, &reference, PathBuf::from(tmpdir.path()))
.await
.ok()
.and_then(|needs_scanning| match needs_scanning {
true => Some(reference),
false => None,
})
})
.collect()
.await;

debug!(
"references that have been updated in the last {} days: {:?}",
DAYS_UNTIL_STALE, filtered_references
);

Ok(filtered_references)
}

/// A reference needs scanning if its head commit is less than 30 days old
#[tracing::instrument(skip(transport))]
async fn reference_needs_scanning(
transport: &Transport,
reference: &Reference,
cloned_repo_dir: PathBuf,
) -> Result<bool, Report<Error>> {
let reference_string = match reference {
Reference::Branch { name, .. } => format!("origin/{name}"),
Reference::Tag { name, .. } => name.clone(),
};

let args = vec![
"log",
"-n",
"1",
"--format=%aI:::%cI",
reference_string.as_str(),
]
.into_iter()
.map(Value::new_plain)
.collect_vec();

// git log -n 1 --format="%aI:::%cI" <name of tag or branch>
// This will return one line containing the author date and committer date separated by ":::". E.g.:
// The "I" in "aI" and "cI" forces the date to be in strict ISO-8601 format
//
// git log -n 1 --format="%ai:::%ci" parse-config-file
// 2023-02-17 17:14:52 -0800:::2023-02-17 17:14:52 -0800
//
// The author and committer dates are almost always the same, but we'll parse both and take the most
// recent, just to be super safe

let output = run_git(transport, &args, Some(&cloned_repo_dir)).await?;
let date_strings = output.stdout_string_lossy();
let mut dates = date_strings.split(":::");
let earliest_commit_date_that_needs_to_be_scanned =
OffsetDateTime::checked_sub(OffsetDateTime::now_utc(), DAYS_UNTIL_STALE.days())
.ok_or_else(|| report!(Error::ParseGitOutput))?;

let author_date = dates
.next()
.map(|d| OffsetDateTime::parse(d, &Iso8601::DEFAULT));
if let Some(Ok(author_date)) = author_date {
if author_date > earliest_commit_date_that_needs_to_be_scanned {
return Ok(true);
}
}

let committer_date = dates
.next()
.map(|d| OffsetDateTime::parse(d, &Iso8601::DEFAULT));
if let Some(Ok(committer_date)) = committer_date {
if committer_date > earliest_commit_date_that_needs_to_be_scanned {
return Ok(true);
}
}

Ok(false)
}

/// Do a blobless clone of the repository, checking out the Reference if it exists
#[tracing::instrument(skip(transport))]
async fn blobless_clone(
Expand Down
2 changes: 0 additions & 2 deletions testdata/config/fossa-one-http-no-auth.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@ integrations:
poll_interval: 1h
remote: https://github.com/fossas/one.git
import_branches: true
watched_branches:
- main
auth:
type: none
transport: http
3 changes: 1 addition & 2 deletions tests/it/remote_git.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,7 @@ async fn references_on_public_repo_with_no_auth() {
.references()
.await
.expect("no results returned from get_references_that_need_scanning on a public repo!");
let expected_empty_vec: Vec<Reference> = Vec::new();
assert_eq!(expected_empty_vec, references);
assert!(!references.is_empty());
}

#[tokio::test]
Expand Down

0 comments on commit 6b4046f

Please sign in to comment.