From 102051288703c67b5d4a7781f9d01f8cb832785e Mon Sep 17 00:00:00 2001 From: Jeffrey Huynh Date: Tue, 3 Oct 2023 17:08:59 -0600 Subject: [PATCH] Ane 1060 broker smart imports (#134) Broker smart imports --- Cargo.lock | 9 +- Cargo.toml | 3 +- db/canonical.db | Bin 28672 -> 28672 bytes .../20230912062551_repo_state_2.down.sql | 2 + .../20230912062551_repo_state_2.up.sql | 2 + docs/dev/README.md | 38 +++++ docs/reference/config.md | 51 ++++++- src/api/remote.rs | 130 ++++++++++++++++++ src/api/remote/git.rs | 5 + src/cmd/fix.rs | 8 +- src/cmd/run.rs | 60 +++++++- src/config/file.rs | 5 +- src/config/file/v1.rs | 93 +++++++++++-- src/db.rs | 10 +- src/db/sqlite.rs | 26 +++- .../config/basic-http-basic-bad-repo-name.yml | 3 + .../basic-http-basic-malformed-auth.yml | 3 + .../basic-http-basic-malformed-debugging.yml | 3 + testdata/config/basic-http-basic.yml | 3 + testdata/config/basic-http-header.yml | 3 + .../config/basic-http-no-auth-empty-repo.yml | 3 + testdata/config/basic-http-no-auth.yml | 3 + testdata/config/basic-ssh-key.yml | 3 + testdata/config/basic.yml | 3 + testdata/config/fossa-one-http-no-auth.yml | 3 + .../nonexistent-repo-bad-http-basic.yml | 3 + .../nonexistent-repo-bad-http-header.yml | 3 + testdata/config/private-repo-http-no-auth.yml | 3 + testdata/config/short-poll-interval.yml | 3 + testdata/config/smart_imports_malformed.yml | 20 +++ tests/it/config.rs | 31 +++++ tests/it/db/sqlite.rs | 123 ++++++++++++++++- ...__integration_smart_imports_malformed.snap | 18 +++ 33 files changed, 641 insertions(+), 35 deletions(-) create mode 100644 db/migrations/20230912062551_repo_state_2.down.sql create mode 100644 db/migrations/20230912062551_repo_state_2.up.sql create mode 100644 testdata/config/smart_imports_malformed.yml create mode 100644 tests/it/snapshots/it__config__integration_smart_imports_malformed.snap diff --git a/Cargo.lock b/Cargo.lock index 7c3d14b7..98bb9275 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -263,7 +263,7 @@ dependencies = [ [[package]] name = "broker" -version = "0.2.4-pre" +version = "0.3.0-pre" dependencies = [ "aho-corasick 0.7.20", "async-trait", @@ -283,6 +283,7 @@ dependencies = [ "error-stack", "futures", "getset", + "glob", "governor", "humantime", "indoc", @@ -1153,6 +1154,12 @@ version = "0.28.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6fb8d784f27acf97159b40fc4db5ecd8aa23b9ad5ef69cdd136d3bc80665f0c0" +[[package]] +name = "glob" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" + [[package]] name = "governor" version = "0.6.0" diff --git a/Cargo.toml b/Cargo.toml index 7d757d2e..9758fa6b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "broker" -version = "0.2.4-pre" +version = "0.3.0-pre" edition = "2021" description = "The bridge between FOSSA and internal DevOps services" readme = "README.md" @@ -70,6 +70,7 @@ tikv-jemallocator = { version = "0.5.4", optional = true } deadqueue = "0.2.4" governor = "0.6.0" nonzero_ext = "0.3.0" +glob = "0.3.1" [dev-dependencies] insta = { version = "1.31.0", features = ["filters", "json", "yaml"] } diff --git a/db/canonical.db b/db/canonical.db index 6c057191db963c13624f94f2ba59955873e1a0ff..53db70fb2b4b2a60f4bbec0cfce14722cf7f4cbb 100644 GIT binary patch delta 308 zcmZp8z}WDBae}m<00RR9I}pQw#6%rqNdX4Ev{$^mWejZG2O0Rf`L}Z)W85tNE>l#?<8X5!HRz~JlhL&nq%I8bBPP%$C zYl|_PP`~f|vw!~t+wN8FuuIr%Ie*8Y&Emz&oB!MwZvUe%_wf9dY1}2HlkMb>aWV5h fVBr78|C;~7W zC}6`gc?-`Wmd03S_RUqiT}&KI{B{id3;FFf3o01#Pqvdk1{8VC!2gZ^^=3hX`}~t1 M=(BD9sL$^J0Hrt`Q~&?~ diff --git a/db/migrations/20230912062551_repo_state_2.down.sql b/db/migrations/20230912062551_repo_state_2.down.sql new file mode 100644 index 00000000..18873c64 --- /dev/null +++ b/db/migrations/20230912062551_repo_state_2.down.sql @@ -0,0 +1,2 @@ +-- Add down migration script here +alter table repo_state drop column is_branch; \ No newline at end of file diff --git a/db/migrations/20230912062551_repo_state_2.up.sql b/db/migrations/20230912062551_repo_state_2.up.sql new file mode 100644 index 00000000..9b1e6e93 --- /dev/null +++ b/db/migrations/20230912062551_repo_state_2.up.sql @@ -0,0 +1,2 @@ +-- Add up migration script here +alter table repo_state add column is_branch integer; \ No newline at end of file diff --git a/docs/dev/README.md b/docs/dev/README.md index 429e244a..f24f45be 100644 --- a/docs/dev/README.md +++ b/docs/dev/README.md @@ -165,3 +165,41 @@ git pull # Ensure you're tagging the latest commit git tag v0.2.0 # Validate this is correct, and don't forget the `v` git push --tags # Push the new tag to the remote. ``` + +## smart imports + +Broker provides configurable branch/tag scanning for every integration. You can customize your scans +through these fields listed in the integrations section of your config.yml: + +``` +integrations: + - type: git + import_branches: true # Defaults to true + watched_branches: # If unspecified, Broker will try to set to main or master if present + - main + - release* + import_tags: false # Defaults to false +``` + +### default values + +If these fields are not set, `import_branches` will be set to `true`, `import_tags` will be set to `false`, and Broker +will make a best effort approach to set `watched_branches` to `main` or `master` if it is present in the remote. + +### branch scanning + +In order to scan specific branches, `import_branches` must be set to `true` and the list of branches you intend to scan should be provided under `watched_branches`. Having `watched_branches` set while having `import_branches` set to `false` is an invalid +combination and will cause Broker to throw errors. + +[Glob matching](https://en.wikipedia.org/wiki/Glob_(programming)) is also provided with your branches. If one of your watched_branches is `release*` and your remote contains branches `release1`, `release2`, and `release-3`. Then all three +of those branches will be scanned due to glob matching. + +### tag scanning + +In order to allow Broker to scan tags in your remote, `import_tags` must be set to `true` + +### toggling fields + +Toggling `import_branches` from `true` to `false` will remove all existing uploaded scans for ALL branches of that particular remote in your local database (this does NOT delete your scans in the FOSSA UI). If toggled from `false` to `true`, Broker will perform as if it is scanning the listed `watched_branches` for the first time. On subsequent poll cycles, Broker will import the latest changes from your configured branches since the last revision (skipping any intermediate commits). + +Toggling `import_tags` from `true` to `false` will remove all existing uploaded scans for ALL tags of that particular remote in your local database (this does NOT delete your scans in the FOSSA UI). If toggled from `false` to `true`, Broker will perform as if it is scanning all the remote's tags for the first time. This would mean that all tags for that remote would be scanned. On subsequent poll cycles, Broker will import all created or changed tags since the last poll cycle. diff --git a/docs/reference/config.md b/docs/reference/config.md index e69bae7a..1661036a 100644 --- a/docs/reference/config.md +++ b/docs/reference/config.md @@ -69,11 +69,14 @@ This block specifies how to configure Broker to communicate with a git server fo | Value | Required? | Description | Suggested default | Minimum value | |-----------------|-----------|-----------------------------------------------------------------------------------------------|-------------------|---------------| -| `poll_interval` | Required | How often Broker checks with the remote repository to see whether it has changed.1 | `1 hour` | `1 hour` | -| `remote` | Required | The remote git repository address. | N/A | N/A | -| `auth` | Required | Required authentication to clone this repository. | N/A | N/A | -| `team` | Optional | The team in FOSSA to which this project should be assigned.2 | N/A | N/A | -| `title` | Optional | Specify a custom title for the project instead of using the default.3 | N/A | N/A | +| `poll_interval` | Required | How often Broker checks with the remote repository to see whether it has changed.1 | `1 hour` | `1 hour` | +| `remote` | Required | The remote git repository address. | N/A | N/A | +| `auth` | Required | Required authentication to clone this repository. | N/A | N/A | +| `team` | Optional | The team in FOSSA to which this project should be assigned.2 | N/A | N/A | +| `title` | Optional | Specify a custom title for the project instead of using the default.3 | N/A | N/A | +| `import_branches` | Optional | Initialize to scan specific branches for the remote repository | N/A | N/A | +| `import_tags` | Optional | Initialize to scan tags for the remote repository | N/A | N/A | +| `watched_branches`| Optional | The name of the branches that you intend to scan | N/A | N/A | **[1]**: The poll interval defines the interval at which Broker _checks for updates_, not the interval at which Broker actually analyzes the repository. For more details on authentication, see [integration authentication](#integration-authentication). @@ -118,6 +121,44 @@ Examples for valid durations: | `300ms 20s 5day` | 5 days, 20 seconds, and 300 milliseconds | | `5day 4hours 10days` | 15 days and 4 hours | +## Smart Imports + +Broker provides configurable branch/tag scanning for every integration. You can customize your scans +through these fields listed in the integrations section of your config.yml: + +``` +integrations: + - type: git + import_branches: true # Defaults to true + watched_branches: # If unspecified, Broker will try to set to main or master if present + - main + - release* + import_tags: false # Defaults to false +``` + +### default values + +If these fields are not set, `import_branches` will be set to `true`, `import_tags` will be set to `false`, and Broker +will make a best effort approach to set `watched_branches` to `main` or `master` if it is present in the remote. + +### branch scanning + +In order to scan specific branches, `import_branches` must be set to `true` and the list of branches you intend to scan should be provided under `watched_branches`. Having `watched_branches` set while having `import_branches` set to `false` is an invalid +combination and will cause Broker to throw errors. + +[Glob matching](https://en.wikipedia.org/wiki/Glob_(programming)) is also provided with your branches. If one of your watched_branches is `release*` and your remote contains branches `release1`, `release2`, and `release-3`. Then all three +of those branches will be scanned due to glob matching. + +### tag scanning + +In order to allow Broker to scan tags in your remote, `import_tags` must be set to `true` + +### toggling fields + +Toggling `import_branches` from `true` to `false` will remove all existing uploaded scans for ALL branches of that particular remote in your local database (this does NOT delete your scans in the FOSSA UI). If toggled from `false` to `true`, Broker will perform as if it is scanning the listed `watched_branches` for the first time. On subsequent poll cycles, Broker will import the latest changes from your configured branches since the last revision (skipping any intermediate commits). + +Toggling `import_tags` from `true` to `false` will remove all existing uploaded scans for ALL tags of that particular remote in your local database (this does NOT delete your scans in the FOSSA UI). If toggled from `false` to `true`, Broker will perform as if it is scanning all the remote's tags for the first time. This would mean that all tags for that remote would be scanned. On subsequent poll cycles, Broker will import all created or changed tags since the last poll cycle. + ## Integration authentication Integrations support several possible authentication schemes, specified by `type`. diff --git a/src/api/remote.rs b/src/api/remote.rs index a3af1fa0..93b00820 100644 --- a/src/api/remote.rs +++ b/src/api/remote.rs @@ -17,6 +17,7 @@ use derive_more::{AsRef, Display, From}; use derive_new::new; use error_stack::{ensure, report, Report, ResultExt}; use getset::{CopyGetters, Getters}; +use glob::Pattern; use humantime::parse_duration; use serde::{Deserialize, Serialize}; use tempfile::TempDir; @@ -51,6 +52,14 @@ pub enum ValidationError { /// The provided value is empty. #[error("value is empty")] ValueEmpty, + + /// Invalid combination of import branches and watched branches + #[error("validate import branches and watched branches")] + ImportBranches, + + /// Unable to infer primary branch + #[error("primary branch could not be inferred")] + PrimaryBranch, } /// Validated config values for external code host integrations. @@ -128,6 +137,18 @@ pub struct Integration { #[getset(get = "pub")] #[builder(setter(into))] protocol: Protocol, + + /// Specifies if we want to scan specific branches + #[getset(get = "pub")] + import_branches: BranchImportStrategy, + + /// Specifies if we want to scan tags + #[getset(get = "pub")] + import_tags: TagImportStrategy, + + /// The name of the branches we want to scan + #[getset(get = "pub")] + watched_branches: Vec, } impl Display for Integration { @@ -151,6 +172,28 @@ impl Integration { pub fn endpoint(&self) -> &Remote { self.protocol().endpoint() } + + /// Checks if the reference branch should be scanned by comparing it to our watched branches + pub fn should_scan_reference(&self, reference: &str) -> bool { + let branches = self.watched_branches(); + for branch in branches { + match Pattern::new(branch.name()) { + Ok(p) => { + if p.matches(reference) { + return true; + } + } + // In the case of error continue on and have the function return false if there are no matches + Err(_e) => continue, + } + } + false + } + + /// Mutable reference for watched branches + pub fn add_watched_branch(&mut self, watched_branch: WatchedBranch) { + self.watched_branches.push(watched_branch) + } } /// Code is stored in many kinds of locations, from git repos to @@ -214,6 +257,93 @@ impl TryFrom for PollInterval { } } +/// Specificies if we want to scan branches +#[derive(Debug, Clone, Copy, PartialEq, Eq, Display, Deserialize, Serialize, new)] +pub enum BranchImportStrategy { + /// Scanning branches is not allowed + Disabled, + /// Scanning branches is allowed + Enabled, +} + +impl From> for BranchImportStrategy { + fn from(val: Option) -> BranchImportStrategy { + match val { + Some(false) => BranchImportStrategy::Disabled, + // True case maps to enabled and if it is None we default to enabled + _ => BranchImportStrategy::Enabled, + } + } +} + +impl BranchImportStrategy { + /// Validates branch import configurations + pub fn is_valid(&self, watched_branches: &[WatchedBranch]) -> bool { + match self { + BranchImportStrategy::Disabled => watched_branches.is_empty(), + // On the Enabled variant, we can't check if watched branches is not empty here because Broker will try to infer watched branches + // Therefore just have the Enabled case map to true, and the validation on the enabled case will be checked later + BranchImportStrategy::Enabled => true, + } + } + + /// Checks if we need to infer watched branches based on configuration + pub fn infer_watched_branches(&self, watched_branches: &[WatchedBranch]) -> bool { + match self { + BranchImportStrategy::Disabled => false, + BranchImportStrategy::Enabled => watched_branches.is_empty(), + } + } + + /// Checks if scanning on branches should be skipped based on the enum variant + pub fn should_skip_branches(&self) -> bool { + match self { + BranchImportStrategy::Disabled => true, + BranchImportStrategy::Enabled => false, + } + } +} + +/// Specifies if the we want to scan tags +#[derive(Debug, Clone, Copy, PartialEq, Eq, Display, Deserialize, Serialize, new)] +pub enum TagImportStrategy { + /// Scanning tags is not allowed + Disabled, + /// Scanning tags is allowed + Enabled, +} + +impl From> for TagImportStrategy { + fn from(val: Option) -> TagImportStrategy { + match val { + Some(true) => TagImportStrategy::Enabled, + // False case maps to disabled, and if it is None we default to disabled + _ => TagImportStrategy::Disabled, + } + } +} + +impl TagImportStrategy { + /// Checks if scanning tags should be skipped based on the enum variant + pub fn should_skip_tags(&self) -> bool { + match self { + TagImportStrategy::Disabled => true, + TagImportStrategy::Enabled => false, + } + } +} + +/// The integration's branch that you intend to scan +#[derive(Debug, Clone, PartialEq, Eq, AsRef, Display, Deserialize, Serialize, new)] +pub struct WatchedBranch(String); + +impl WatchedBranch { + /// The name of the watched branch + pub fn name(&self) -> &str { + &self.0 + } +} + /// Errors encountered while working with remotes #[derive(Debug, thiserror::Error)] pub enum RemoteProviderError { diff --git a/src/api/remote/git.rs b/src/api/remote/git.rs index b34ebb6a..cae25457 100644 --- a/src/api/remote/git.rs +++ b/src/api/remote/git.rs @@ -5,6 +5,11 @@ use std::fmt::Display; use derive_new::new; use serde::{Deserialize, Serialize}; +/// Used to filter for main branch in an integration +pub const MAIN_BRANCH: &str = "main"; +/// Used to filter for master branch in an integration +pub const MASTER_BRANCH: &str = "master"; + /// A git reference's type (branch or tag) #[derive(Debug, Clone, Hash, Eq, PartialEq, Serialize, Deserialize, new)] pub enum Reference { diff --git a/src/cmd/fix.rs b/src/cmd/fix.rs index 0461c7fc..21f1f5d1 100644 --- a/src/cmd/fix.rs +++ b/src/cmd/fix.rs @@ -1,7 +1,10 @@ //! Implementation for the fix command use crate::{ - api::remote::{Reference, RemoteProvider, RemoteProviderError}, + api::remote::{ + git::{MAIN_BRANCH, MASTER_BRANCH}, + Reference, RemoteProvider, RemoteProviderError, + }, debug::{self, bundler, Bundle, BundleExport}, ext::secrecy::REDACTION_LITERAL, fossa_cli::{self, DesiredVersion}, @@ -31,9 +34,6 @@ use crate::{ ext::result::WrapErr, }; -const MAIN_BRANCH: &str = "main"; -const MASTER_BRANCH: &str = "master"; - /// Errors encountered when running the fix command. #[derive(Debug, thiserror::Error)] pub enum Error { diff --git a/src/cmd/run.rs b/src/cmd/run.rs index 706e2390..eb8248fd 100644 --- a/src/cmd/run.rs +++ b/src/cmd/run.rs @@ -18,7 +18,7 @@ use tracing::{debug, info}; use uuid::Uuid; use crate::api::fossa::{self, CliMetadata, ProjectMetadata}; -use crate::api::remote::Reference; +use crate::api::remote::{git, BranchImportStrategy, Reference, TagImportStrategy}; use crate::ext::tracing::span_record; use crate::fossa_cli::{self, DesiredVersion, Location, SourceUnits}; use crate::queue::Queue; @@ -80,6 +80,10 @@ pub enum Error { /// If we fail to run FOSSA CLI, this error is raised. #[error("run FOSSA CLI")] RunFossaCli, + + /// If we fail to delete tasks' state in the sqlite DB, this error is raised + #[error("delete tasks' state")] + TaskDeleteState, } /// Similar to [`AppContext`], but scoped for this subcommand. @@ -104,11 +108,40 @@ pub async fn main(ctx: &AppContext, config: Config, db: D) -> Resul db, }; + for integration in ctx.config.integrations().iter() { + if let Err(err) = remove_repository_scan_targets(&ctx.db, integration).await { + warn!("Unable to remove scan targets for '{integration}': {err:#?}. Contact Support for further guidance."); + } + } + let healthcheck_worker = healthcheck(&ctx.db); let integration_worker = integrations(&ctx); try_join!(healthcheck_worker, integration_worker).discard_ok() } +#[tracing::instrument(skip_all)] +async fn remove_repository_scan_targets( + db: &D, + integration: &Integration, +) -> Result<(), Error> { + let repository = integration.remote().for_coordinate(); + let import_branches = integration.import_branches(); + let import_tags = integration.import_tags(); + + if let BranchImportStrategy::Disabled = import_branches { + db.delete_states(&repository, true) + .await + .change_context(Error::TaskDeleteState)?; + } + if let TagImportStrategy::Disabled = import_tags { + db.delete_states(&repository, false) + .await + .change_context(Error::TaskDeleteState)? + } + + Ok(()) +} + /// Conduct internal diagnostics to ensure Broker is still in a good state. #[tracing::instrument(skip_all)] async fn healthcheck(db: &D) -> Result<(), Error> { @@ -271,6 +304,23 @@ async fn execute_poll_integration( // Using `filter_map` instead of `filter` so that this closure gets ownership of `reference`, // which makes binding it across an await point easier (no lifetimes to mess with). .filter_map(|reference| async { + match &reference { + Reference::Git(git_reference) => match git_reference { + git::Reference::Branch {..} => { + // Skipping because integration is not configured to scan branches or branch was not in the integration's watched branches + if integration.import_branches().should_skip_branches() || !integration.should_scan_reference(reference.name()){ + return None + } + }, + git::Reference::Tag{..} => { + // Skipping because integration was not configured to scan tags + if integration.import_tags().should_skip_tags() { + return None + } + }, + } + } + let coordinate = reference.as_coordinate(&remote); match db.state(&coordinate).await { // No previous state; this must be a new reference. @@ -445,10 +495,16 @@ async fn execute_upload_scans( let remote = job.integration.remote().to_owned(); let coordinate = job.reference.as_coordinate(&remote); let state = job.reference.as_state(); + let is_branch = match &job.reference { + Reference::Git(git_reference) => match git_reference { + git::Reference::Branch { .. } => true, + git::Reference::Tag { .. } => false, + }, + }; // Mark this reference as scanned in the local DB. ctx.db - .set_state(&coordinate, state) + .set_state(&coordinate, state, &is_branch) .await .change_context(Error::TaskSetState) } diff --git a/src/config/file.rs b/src/config/file.rs index a2314870..e6f7a0c1 100644 --- a/src/config/file.rs +++ b/src/config/file.rs @@ -41,7 +41,8 @@ use getset::Getters; use serde::Deserialize; use crate::{ - api, debug, + api::{self}, + debug, ext::{ error_stack::{DescribeContext, ErrorHelper, IntoContext}, result::WrapErr, @@ -101,7 +102,7 @@ impl Config { .describe("prior to parsing the config file, Broker checks just the 'version' field to select the correct parser")?; match version { - 1 => v1::load(content).change_context(Error::ParseV1), + 1 => v1::load(content).await.change_context(Error::ParseV1), 0 => fail(Error::Incompatible, 0).help("update the config file to a newer format"), v => fail(Error::Unsupported, v).help("ensure that Broker is at the latest version"), } diff --git a/src/config/file/v1.rs b/src/config/file/v1.rs index d0d4662b..1ffac45e 100644 --- a/src/config/file/v1.rs +++ b/src/config/file/v1.rs @@ -1,19 +1,25 @@ //! Types and functions for parsing v1 config files. -use std::path::PathBuf; - use error_stack::{report, Report, ResultExt}; +use futures::future::join_all; use serde::Deserialize; +use std::path::PathBuf; +use tap::Pipe; +use tracing::warn; use crate::{ api::{ fossa, http, - remote::{self, git}, + remote::{ + self, + git::{self, MAIN_BRANCH, MASTER_BRANCH}, + RemoteProvider, + }, ssh, }, - debug, + debug, doc, ext::{ - error_stack::{DescribeContext, ErrorHelper, IntoContext}, + error_stack::{DescribeContext, ErrorDocReference, ErrorHelper, IntoContext}, result::{WrapErr, WrapOk}, secrecy::ComparableSecretString, }, @@ -30,8 +36,9 @@ pub enum Error { } /// Load the config at v1 for the application. -pub fn load(content: String) -> Result> { - RawConfigV1::parse(content).and_then(validate) +pub async fn load(content: String) -> Result> { + let parsed = RawConfigV1::parse(content)?; + validate(parsed).await } /// Config values as parsed from disk. @@ -64,7 +71,7 @@ impl RawConfigV1 { } } -fn validate(config: RawConfigV1) -> Result> { +async fn validate(config: RawConfigV1) -> Result> { let endpoint = fossa::Endpoint::try_from(config.endpoint).change_context(Error::Validate)?; let key = fossa::Key::try_from(config.integration_key).change_context(Error::Validate)?; let api = fossa::Config::new(endpoint, key); @@ -72,7 +79,10 @@ fn validate(config: RawConfigV1) -> Result> { let integrations = config .integrations .into_iter() - .map(remote::Integration::try_from) + .map(|integration| async { remote::Integration::validate(integration).await }) + .pipe(join_all) + .await + .into_iter() .collect::, Report>>() .change_context(Error::Validate) .map(remote::Integrations::new)?; @@ -135,23 +145,45 @@ pub(super) enum Integration { title: Option, remote: String, auth: Auth, + import_branches: Option, + import_tags: Option, + // Option> is generally not meaningful, because None is generally equivalent to an empty vector. + // However, this needs to be an option due to serde deny_unknown_fields. + // An empty vector will throw errors, which is not the intended action for users on these new changes + watched_branches: Option>, }, } -impl TryFrom for remote::Integration { - type Error = Report; - - fn try_from(value: Integration) -> Result { - match value { +impl remote::Integration { + async fn validate(value: Integration) -> Result> { + let mut integration = match value { Integration::Git { poll_interval, remote, team, title, auth, + import_branches, + import_tags, + watched_branches, } => { let poll_interval = remote::PollInterval::try_from(poll_interval)?; let endpoint = remote::Remote::try_from(remote)?; + let import_branches = remote::BranchImportStrategy::from(import_branches); + let import_tags = remote::TagImportStrategy::from(import_tags); + let watched_branches = watched_branches + .unwrap_or_default() + .into_iter() + .map(remote::WatchedBranch::new) + .collect::>(); + + if !import_branches.is_valid(&watched_branches) { + return report!(remote::ValidationError::ImportBranches) + .wrap_err() + .help("import branches must be 'true' if watched branches are provided") + .describe_lazy(|| "import branches: 'false'".to_string()); + } + let protocol = match auth { Auth::SshKeyFile { path } => { let auth = ssh::Auth::KeyFile(path); @@ -190,10 +222,41 @@ impl TryFrom for remote::Integration { .team(team) .title(title) .protocol(protocol) + .import_branches(import_branches) + .import_tags(import_tags) + .watched_branches(watched_branches) .build() - .wrap_ok() + } + }; + + if integration + .import_branches() + .infer_watched_branches(integration.watched_branches()) + { + let references = integration.references().await.unwrap_or_default(); + let primary_branch = references + .iter() + .find(|r| r.name() == MAIN_BRANCH || r.name() == MASTER_BRANCH) + .cloned(); + + match primary_branch { + None => { + // Watched branches was not set and failed to infer a primary branch + return report!(remote::ValidationError::PrimaryBranch) + .wrap_err() + .help("Consider providing explicit values for watched branches in the integration config") + .describe_lazy(|| "infer watched branches") + .documentation_lazy(doc::link::config_file_reference)?; + } + Some(branch) => { + let primary_branch_name = branch.name(); + warn!("Inferred '{primary_branch_name}' as the primary branch for '{integration}'. Broker imports only the primary branch when inferred; if desired this can be customized with the integration configuration."); + let watched_branch = remote::WatchedBranch::new(branch.name().to_string()); + integration.add_watched_branch(watched_branch); + } } } + integration.wrap_ok() } } diff --git a/src/db.rs b/src/db.rs index 1af4e1cb..ee9e8407 100644 --- a/src/db.rs +++ b/src/db.rs @@ -89,7 +89,15 @@ pub trait Database: Debug + Clone + Send + Sync { async fn state(&self, coordinate: &Coordinate) -> Result>, Error>; /// Set the state of a given [`Coordinate`]. - async fn set_state(&self, coordinate: &Coordinate, state: &[u8]) -> Result<(), Error>; + async fn set_state( + &self, + coordinate: &Coordinate, + state: &[u8], + is_branch: &bool, + ) -> Result<(), Error>; + + /// Deletes all states with the given repository and is_branch values + async fn delete_states(&self, repository: &str, is_branch: bool) -> Result<(), Error>; } /// Connect to the sqlite database implementation. diff --git a/src/db/sqlite.rs b/src/db/sqlite.rs index 6c0c7376..3ad8eefd 100644 --- a/src/db/sqlite.rs +++ b/src/db/sqlite.rs @@ -244,17 +244,37 @@ impl super::Database for Database { } #[tracing::instrument(fields(result))] - async fn set_state(&self, coordinate: &Coordinate, state: &[u8]) -> Result<(), super::Error> { + async fn set_state( + &self, + coordinate: &Coordinate, + state: &[u8], + is_branch: &bool, + ) -> Result<(), super::Error> { let integration = coordinate.namespace.to_string(); query!( r#" - insert into repo_state values (?, ?, ?, ?) + insert into repo_state values (?, ?, ?, ?, ?) on conflict do update set repo_state = excluded.repo_state "#, integration, coordinate.remote, coordinate.reference, - state + state, + is_branch, + ) + .execute(&self.internal) + .await + .map(|result| span_record!(result, debug result)) + .context(Error::Communication) + .change_context(super::Error::Interact) + } + + #[tracing::instrument(fields(result))] + async fn delete_states(&self, repository: &str, is_branch: bool) -> Result<(), super::Error> { + query!( + "delete from repo_state where repository = ? and is_branch = ? ", + repository, + is_branch, ) .execute(&self.internal) .await diff --git a/testdata/config/basic-http-basic-bad-repo-name.yml b/testdata/config/basic-http-basic-bad-repo-name.yml index 14c0fbbe..69766853 100644 --- a/testdata/config/basic-http-basic-bad-repo-name.yml +++ b/testdata/config/basic-http-basic-bad-repo-name.yml @@ -11,6 +11,9 @@ integrations: - type: git poll_interval: 1h remote: https://github.com/fossas/doesnotexist.git + import_branches: true + watched_branches: + - main auth: type: http_basic username: jssblck diff --git a/testdata/config/basic-http-basic-malformed-auth.yml b/testdata/config/basic-http-basic-malformed-auth.yml index 4245a2ed..50d8d9c6 100644 --- a/testdata/config/basic-http-basic-malformed-auth.yml +++ b/testdata/config/basic-http-basic-malformed-auth.yml @@ -9,6 +9,9 @@ debugging: integrations: - type: git + import_branches: true + watched_branches: + - main poll_interval: 1h remote: https://github.com/fossas/broker.git auth: diff --git a/testdata/config/basic-http-basic-malformed-debugging.yml b/testdata/config/basic-http-basic-malformed-debugging.yml index d52b55c3..783c8b1d 100644 --- a/testdata/config/basic-http-basic-malformed-debugging.yml +++ b/testdata/config/basic-http-basic-malformed-debugging.yml @@ -12,6 +12,9 @@ integrations: - type: git poll_interval: 1h remote: https://github.com/fossas/broker.git + import_branches: true + watched_branches: + - main auth: type: http_basic username: jssblck diff --git a/testdata/config/basic-http-basic.yml b/testdata/config/basic-http-basic.yml index ab946d49..5b4c6302 100644 --- a/testdata/config/basic-http-basic.yml +++ b/testdata/config/basic-http-basic.yml @@ -11,6 +11,9 @@ integrations: - type: git poll_interval: 1h remote: https://github.com/fossas/broker.git + import_branches: true + watched_branches: + - main auth: type: http_basic username: jssblck diff --git a/testdata/config/basic-http-header.yml b/testdata/config/basic-http-header.yml index a861a44a..b95e7ec5 100644 --- a/testdata/config/basic-http-header.yml +++ b/testdata/config/basic-http-header.yml @@ -11,6 +11,9 @@ integrations: - type: git poll_interval: 1h remote: https://github.com/fossas/broker.git + import_branches: true + watched_branches: + - main auth: type: http_header header: "Bearer: efgh5678" diff --git a/testdata/config/basic-http-no-auth-empty-repo.yml b/testdata/config/basic-http-no-auth-empty-repo.yml index 47d63d21..179a75a9 100644 --- a/testdata/config/basic-http-no-auth-empty-repo.yml +++ b/testdata/config/basic-http-no-auth-empty-repo.yml @@ -11,6 +11,9 @@ integrations: - type: git poll_interval: 1h remote: https://github.com/fossas/empty.git + import_branches: true + watched_branches: + - main auth: type: none transport: http diff --git a/testdata/config/basic-http-no-auth.yml b/testdata/config/basic-http-no-auth.yml index 3d2584c0..eb68261c 100644 --- a/testdata/config/basic-http-no-auth.yml +++ b/testdata/config/basic-http-no-auth.yml @@ -11,6 +11,9 @@ integrations: - type: git poll_interval: 1h remote: https://github.com/fossas/broker-test-example.git + import_branches: true + watched_branches: + - main auth: type: none transport: http diff --git a/testdata/config/basic-ssh-key.yml b/testdata/config/basic-ssh-key.yml index fb8cbd28..e2132ff8 100644 --- a/testdata/config/basic-ssh-key.yml +++ b/testdata/config/basic-ssh-key.yml @@ -11,6 +11,9 @@ integrations: - type: git poll_interval: 1h remote: git@github.com:fossas/broker.git + import_branches: true + watched_branches: + - main auth: type: ssh_key key: efgh5678 diff --git a/testdata/config/basic.yml b/testdata/config/basic.yml index 05a8ca37..9a76a16e 100644 --- a/testdata/config/basic.yml +++ b/testdata/config/basic.yml @@ -11,6 +11,9 @@ integrations: - type: git poll_interval: 1h remote: git@github.com:fossas/broker.git + import_branches: true + watched_branches: + - main auth: type: ssh_key_file path: /home/me/.ssh/id_rsa diff --git a/testdata/config/fossa-one-http-no-auth.yml b/testdata/config/fossa-one-http-no-auth.yml index c81b42e0..eb99b09b 100644 --- a/testdata/config/fossa-one-http-no-auth.yml +++ b/testdata/config/fossa-one-http-no-auth.yml @@ -11,6 +11,9 @@ integrations: - type: git poll_interval: 1h remote: https://github.com/fossas/one.git + import_branches: true + watched_branches: + - main auth: type: none transport: http diff --git a/testdata/config/nonexistent-repo-bad-http-basic.yml b/testdata/config/nonexistent-repo-bad-http-basic.yml index b05e7773..75747557 100644 --- a/testdata/config/nonexistent-repo-bad-http-basic.yml +++ b/testdata/config/nonexistent-repo-bad-http-basic.yml @@ -11,6 +11,9 @@ integrations: - type: git poll_interval: 1h remote: https://github.com/fossas/does-not-exist.git + import_branches: true + watched_branches: + - main auth: type: http_basic username: some_user diff --git a/testdata/config/nonexistent-repo-bad-http-header.yml b/testdata/config/nonexistent-repo-bad-http-header.yml index a41739c0..ad6a53c8 100644 --- a/testdata/config/nonexistent-repo-bad-http-header.yml +++ b/testdata/config/nonexistent-repo-bad-http-header.yml @@ -11,6 +11,9 @@ integrations: - type: git poll_interval: 1h remote: https://github.com/fossas/does-not-exist.git + import_branches: true + watched_branches: + - main auth: type: http_header header: "Bearer: some_user:some_password" diff --git a/testdata/config/private-repo-http-no-auth.yml b/testdata/config/private-repo-http-no-auth.yml index 3415af8f..a0c3fe4e 100644 --- a/testdata/config/private-repo-http-no-auth.yml +++ b/testdata/config/private-repo-http-no-auth.yml @@ -11,6 +11,9 @@ integrations: - type: git poll_interval: 1h remote: http://github.com/github/doesnotexist.git + import_branches: true + watched_branches: + - main auth: type: none transport: http diff --git a/testdata/config/short-poll-interval.yml b/testdata/config/short-poll-interval.yml index 616cc4c1..6885ed77 100644 --- a/testdata/config/short-poll-interval.yml +++ b/testdata/config/short-poll-interval.yml @@ -11,6 +11,9 @@ integrations: - type: git poll_interval: 1m remote: git@github.com:fossas/broker.git + import_branches: true + watched_branches: + - main auth: type: ssh_key_file path: /home/me/.ssh/id_rsa diff --git a/testdata/config/smart_imports_malformed.yml b/testdata/config/smart_imports_malformed.yml new file mode 100644 index 00000000..47d81976 --- /dev/null +++ b/testdata/config/smart_imports_malformed.yml @@ -0,0 +1,20 @@ +fossa_endpoint: https://app.fossa.com +fossa_integration_key: abcd1234 +version: 1 + +debugging: + location: /home/me/.config/fossa/broker/debugging/ + retention: + days: 3 + +integrations: + - type: git + poll_interval: 1h + remote: https://github.com/fossas/broker.git + auth: + type: http_basic + username: jssblck + password: efgh5678 + import_branches: false + watched_branches: + - main \ No newline at end of file diff --git a/tests/it/config.rs b/tests/it/config.rs index 4078655f..608cf1ee 100644 --- a/tests/it/config.rs +++ b/tests/it/config.rs @@ -233,3 +233,34 @@ async fn test_integration_short_poll_interval() { .await; assert_error_stack_snapshot!(&config_file_path, err); } + +#[tokio::test] +async fn test_integration_smart_imports_default() { + let (_, conf) = load_config!( + "testdata/config/basic.yml", + "testdata/database/empty.sqlite" + ) + .await; + + let Some(integration) = conf.integrations().as_ref().iter().next() else { + panic!("must have parsed at least one integration") + }; + assert_eq!( + integration.import_branches(), + &remote::BranchImportStrategy::Enabled + ); + assert_eq!( + integration.import_tags(), + &remote::TagImportStrategy::Disabled + ); +} + +#[tokio::test] +async fn test_integration_smart_imports_malformed() { + let (config_file_path, err) = load_config_err!( + "testdata/config/smart_imports_malformed.yml", + "testdata/database/empty.sqlite" + ) + .await; + assert_error_stack_snapshot!(&config_file_path, err); +} diff --git a/tests/it/db/sqlite.rs b/tests/it/db/sqlite.rs index c7346096..15fb4d42 100644 --- a/tests/it/db/sqlite.rs +++ b/tests/it/db/sqlite.rs @@ -137,10 +137,11 @@ async fn roundtrip_state() { ); let state = db.state(&coordinate).await.expect("must get state"); + let is_branch = true; assert!(state.is_none(), "db state was unset, so must be none"); let state = b"some state"; - db.set_state(&coordinate, state) + db.set_state(&coordinate, state, &is_branch) .await .expect("must set state"); @@ -151,3 +152,123 @@ async fn roundtrip_state() { .expect("state must have been set"); assert_eq!(new_state, state); } + +#[tokio::test] +async fn remove_all_branch_states() { + let (_tmp, db, _path) = temp_db!(); + + let coordinate = Coordinate::new( + broker::db::Namespace::Git, + String::from("some repo"), + String::from("some reference"), + ); + let coordinate2 = Coordinate::new( + broker::db::Namespace::Git, + String::from("some repo 2"), + String::from("some reference 2"), + ); + + let state = db.state(&coordinate).await.expect("must get state"); + let state2 = db.state(&coordinate2).await.expect("must get state"); + + let is_branch = true; + assert!(state.is_none(), "db state was unset, so must be none"); + assert!(state2.is_none(), "db state was unset, so must be none"); + + let state = b"some state"; + let state2 = b"some state 2"; + db.set_state(&coordinate, state, &is_branch) + .await + .expect("must set state"); + db.set_state(&coordinate2, state2, &is_branch) + .await + .expect("must set state"); + + let new_state = db + .state(&coordinate) + .await + .expect("must get state") + .expect("state must have been set"); + let new_state2 = db + .state(&coordinate2) + .await + .expect("must get state") + .expect("state must have been set"); + assert_eq!(new_state, state); + assert_eq!(new_state2, state2); + + let remote = "some repo"; + let remote2 = "some repo 2"; + db.delete_states(remote, is_branch) + .await + .expect("state must be deleted"); + db.delete_states(remote2, is_branch) + .await + .expect("state must be deleted"); + + let state_after_delete = db.state(&coordinate).await.expect("must get state"); + let state2_after_delete = db.state(&coordinate2).await.expect("must get state"); + assert!(state_after_delete.is_none(), "db state was removed"); + assert!(state2_after_delete.is_none(), "db state2 was removed"); +} + +#[tokio::test] +async fn remove_all_tag_states() { + let (_tmp, db, _path) = temp_db!(); + + let coordinate = Coordinate::new( + broker::db::Namespace::Git, + String::from("some repo"), + String::from("some reference"), + ); + let coordinate2 = Coordinate::new( + broker::db::Namespace::Git, + String::from("some repo 2"), + String::from("some reference 2"), + ); + + let state = db.state(&coordinate).await.expect("must get state"); + let state2 = db.state(&coordinate2).await.expect("must get state"); + + let is_branch = true; + let tag_identifier = false; + assert!(state.is_none(), "db state was unset, so must be none"); + assert!(state2.is_none(), "db state was unset, so must be none"); + + let state = b"some state"; + let state2 = b"some state 2"; + db.set_state(&coordinate, state, &is_branch) + .await + .expect("must set state"); + db.set_state(&coordinate2, state2, &tag_identifier) + .await + .expect("must set state"); + + let new_state = db + .state(&coordinate) + .await + .expect("must get state") + .expect("state must have been set"); + let new_state2 = db + .state(&coordinate2) + .await + .expect("must get state") + .expect("state must have been set"); + assert_eq!(new_state, state); + assert_eq!(new_state2, state2); + + let remote2 = "some repo 2"; + db.delete_states(remote2, tag_identifier) + .await + .expect("state must be deleted"); + + let state_after_delete = db + .state(&coordinate) + .await + .expect("must get state") + .expect("state must have been set"); + let state2_after_delete = db.state(&coordinate2).await.expect("must get state"); + + assert_eq!(state_after_delete, state); + assert!(state2_after_delete.is_none(), "db state2 was removed"); +} diff --git a/tests/it/snapshots/it__config__integration_smart_imports_malformed.snap b/tests/it/snapshots/it__config__integration_smart_imports_malformed.snap new file mode 100644 index 00000000..e7ec4b76 --- /dev/null +++ b/tests/it/snapshots/it__config__integration_smart_imports_malformed.snap @@ -0,0 +1,18 @@ +--- +source: tests/it/config.rs +expression: err +info: testdata/config/smart_imports_malformed.yml +--- +load config file +├╴at {source location} +│ +├─▶ parse config file in v1 format +│ ╰╴at {source location} +│ +├─▶ validate parsed config file values +│ ╰╴at {source location} +│ +╰─▶ validate import branches and watched branches + ├╴at {source location} + ├╴help: import branches must be 'true' if watched branches are provided + ╰╴context: import branches: 'false'