diff --git a/Cargo.lock b/Cargo.lock
index 7c3d14b7..98bb9275 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -263,7 +263,7 @@ dependencies = [
[[package]]
name = "broker"
-version = "0.2.4-pre"
+version = "0.3.0-pre"
dependencies = [
"aho-corasick 0.7.20",
"async-trait",
@@ -283,6 +283,7 @@ dependencies = [
"error-stack",
"futures",
"getset",
+ "glob",
"governor",
"humantime",
"indoc",
@@ -1153,6 +1154,12 @@ version = "0.28.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6fb8d784f27acf97159b40fc4db5ecd8aa23b9ad5ef69cdd136d3bc80665f0c0"
+[[package]]
+name = "glob"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
+
[[package]]
name = "governor"
version = "0.6.0"
diff --git a/Cargo.toml b/Cargo.toml
index 7d757d2e..9758fa6b 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
[package]
name = "broker"
-version = "0.2.4-pre"
+version = "0.3.0-pre"
edition = "2021"
description = "The bridge between FOSSA and internal DevOps services"
readme = "README.md"
@@ -70,6 +70,7 @@ tikv-jemallocator = { version = "0.5.4", optional = true }
deadqueue = "0.2.4"
governor = "0.6.0"
nonzero_ext = "0.3.0"
+glob = "0.3.1"
[dev-dependencies]
insta = { version = "1.31.0", features = ["filters", "json", "yaml"] }
diff --git a/db/canonical.db b/db/canonical.db
index 6c057191..53db70fb 100644
Binary files a/db/canonical.db and b/db/canonical.db differ
diff --git a/db/migrations/20230912062551_repo_state_2.down.sql b/db/migrations/20230912062551_repo_state_2.down.sql
new file mode 100644
index 00000000..18873c64
--- /dev/null
+++ b/db/migrations/20230912062551_repo_state_2.down.sql
@@ -0,0 +1,2 @@
+-- Add down migration script here
+alter table repo_state drop column is_branch;
\ No newline at end of file
diff --git a/db/migrations/20230912062551_repo_state_2.up.sql b/db/migrations/20230912062551_repo_state_2.up.sql
new file mode 100644
index 00000000..9b1e6e93
--- /dev/null
+++ b/db/migrations/20230912062551_repo_state_2.up.sql
@@ -0,0 +1,2 @@
+-- Add up migration script here
+alter table repo_state add column is_branch integer;
\ No newline at end of file
diff --git a/docs/dev/README.md b/docs/dev/README.md
index 429e244a..f24f45be 100644
--- a/docs/dev/README.md
+++ b/docs/dev/README.md
@@ -165,3 +165,41 @@ git pull # Ensure you're tagging the latest commit
git tag v0.2.0 # Validate this is correct, and don't forget the `v`
git push --tags # Push the new tag to the remote.
```
+
+## smart imports
+
+Broker provides configurable branch/tag scanning for every integration. You can customize your scans
+through these fields listed in the integrations section of your config.yml:
+
+```
+integrations:
+ - type: git
+ import_branches: true # Defaults to true
+ watched_branches: # If unspecified, Broker will try to set to main or master if present
+ - main
+ - release*
+ import_tags: false # Defaults to false
+```
+
+### default values
+
+If these fields are not set, `import_branches` will be set to `true`, `import_tags` will be set to `false`, and Broker
+will make a best effort approach to set `watched_branches` to `main` or `master` if it is present in the remote.
+
+### branch scanning
+
+In order to scan specific branches, `import_branches` must be set to `true` and the list of branches you intend to scan should be provided under `watched_branches`. Having `watched_branches` set while having `import_branches` set to `false` is an invalid
+combination and will cause Broker to throw errors.
+
+[Glob matching](https://en.wikipedia.org/wiki/Glob_(programming)) is also provided with your branches. If one of your watched_branches is `release*` and your remote contains branches `release1`, `release2`, and `release-3`. Then all three
+of those branches will be scanned due to glob matching.
+
+### tag scanning
+
+In order to allow Broker to scan tags in your remote, `import_tags` must be set to `true`
+
+### toggling fields
+
+Toggling `import_branches` from `true` to `false` will remove all existing uploaded scans for ALL branches of that particular remote in your local database (this does NOT delete your scans in the FOSSA UI). If toggled from `false` to `true`, Broker will perform as if it is scanning the listed `watched_branches` for the first time. On subsequent poll cycles, Broker will import the latest changes from your configured branches since the last revision (skipping any intermediate commits).
+
+Toggling `import_tags` from `true` to `false` will remove all existing uploaded scans for ALL tags of that particular remote in your local database (this does NOT delete your scans in the FOSSA UI). If toggled from `false` to `true`, Broker will perform as if it is scanning all the remote's tags for the first time. This would mean that all tags for that remote would be scanned. On subsequent poll cycles, Broker will import all created or changed tags since the last poll cycle.
diff --git a/docs/reference/config.md b/docs/reference/config.md
index e69bae7a..1661036a 100644
--- a/docs/reference/config.md
+++ b/docs/reference/config.md
@@ -69,11 +69,14 @@ This block specifies how to configure Broker to communicate with a git server fo
| Value | Required? | Description | Suggested default | Minimum value |
|-----------------|-----------|-----------------------------------------------------------------------------------------------|-------------------|---------------|
-| `poll_interval` | Required | How often Broker checks with the remote repository to see whether it has changed.1 | `1 hour` | `1 hour` |
-| `remote` | Required | The remote git repository address. | N/A | N/A |
-| `auth` | Required | Required authentication to clone this repository. | N/A | N/A |
-| `team` | Optional | The team in FOSSA to which this project should be assigned.2 | N/A | N/A |
-| `title` | Optional | Specify a custom title for the project instead of using the default.3 | N/A | N/A |
+| `poll_interval` | Required | How often Broker checks with the remote repository to see whether it has changed.1 | `1 hour` | `1 hour` |
+| `remote` | Required | The remote git repository address. | N/A | N/A |
+| `auth` | Required | Required authentication to clone this repository. | N/A | N/A |
+| `team` | Optional | The team in FOSSA to which this project should be assigned.2 | N/A | N/A |
+| `title` | Optional | Specify a custom title for the project instead of using the default.3 | N/A | N/A |
+| `import_branches` | Optional | Initialize to scan specific branches for the remote repository | N/A | N/A |
+| `import_tags` | Optional | Initialize to scan tags for the remote repository | N/A | N/A |
+| `watched_branches`| Optional | The name of the branches that you intend to scan | N/A | N/A |
**[1]**: The poll interval defines the interval at which Broker _checks for updates_, not the interval at which Broker actually analyzes the repository.
For more details on authentication, see [integration authentication](#integration-authentication).
@@ -118,6 +121,44 @@ Examples for valid durations:
| `300ms 20s 5day` | 5 days, 20 seconds, and 300 milliseconds |
| `5day 4hours 10days` | 15 days and 4 hours |
+## Smart Imports
+
+Broker provides configurable branch/tag scanning for every integration. You can customize your scans
+through these fields listed in the integrations section of your config.yml:
+
+```
+integrations:
+ - type: git
+ import_branches: true # Defaults to true
+ watched_branches: # If unspecified, Broker will try to set to main or master if present
+ - main
+ - release*
+ import_tags: false # Defaults to false
+```
+
+### default values
+
+If these fields are not set, `import_branches` will be set to `true`, `import_tags` will be set to `false`, and Broker
+will make a best effort approach to set `watched_branches` to `main` or `master` if it is present in the remote.
+
+### branch scanning
+
+In order to scan specific branches, `import_branches` must be set to `true` and the list of branches you intend to scan should be provided under `watched_branches`. Having `watched_branches` set while having `import_branches` set to `false` is an invalid
+combination and will cause Broker to throw errors.
+
+[Glob matching](https://en.wikipedia.org/wiki/Glob_(programming)) is also provided with your branches. If one of your watched_branches is `release*` and your remote contains branches `release1`, `release2`, and `release-3`. Then all three
+of those branches will be scanned due to glob matching.
+
+### tag scanning
+
+In order to allow Broker to scan tags in your remote, `import_tags` must be set to `true`
+
+### toggling fields
+
+Toggling `import_branches` from `true` to `false` will remove all existing uploaded scans for ALL branches of that particular remote in your local database (this does NOT delete your scans in the FOSSA UI). If toggled from `false` to `true`, Broker will perform as if it is scanning the listed `watched_branches` for the first time. On subsequent poll cycles, Broker will import the latest changes from your configured branches since the last revision (skipping any intermediate commits).
+
+Toggling `import_tags` from `true` to `false` will remove all existing uploaded scans for ALL tags of that particular remote in your local database (this does NOT delete your scans in the FOSSA UI). If toggled from `false` to `true`, Broker will perform as if it is scanning all the remote's tags for the first time. This would mean that all tags for that remote would be scanned. On subsequent poll cycles, Broker will import all created or changed tags since the last poll cycle.
+
## Integration authentication
Integrations support several possible authentication schemes, specified by `type`.
diff --git a/src/api/remote.rs b/src/api/remote.rs
index a3af1fa0..93b00820 100644
--- a/src/api/remote.rs
+++ b/src/api/remote.rs
@@ -17,6 +17,7 @@ use derive_more::{AsRef, Display, From};
use derive_new::new;
use error_stack::{ensure, report, Report, ResultExt};
use getset::{CopyGetters, Getters};
+use glob::Pattern;
use humantime::parse_duration;
use serde::{Deserialize, Serialize};
use tempfile::TempDir;
@@ -51,6 +52,14 @@ pub enum ValidationError {
/// The provided value is empty.
#[error("value is empty")]
ValueEmpty,
+
+ /// Invalid combination of import branches and watched branches
+ #[error("validate import branches and watched branches")]
+ ImportBranches,
+
+ /// Unable to infer primary branch
+ #[error("primary branch could not be inferred")]
+ PrimaryBranch,
}
/// Validated config values for external code host integrations.
@@ -128,6 +137,18 @@ pub struct Integration {
#[getset(get = "pub")]
#[builder(setter(into))]
protocol: Protocol,
+
+ /// Specifies if we want to scan specific branches
+ #[getset(get = "pub")]
+ import_branches: BranchImportStrategy,
+
+ /// Specifies if we want to scan tags
+ #[getset(get = "pub")]
+ import_tags: TagImportStrategy,
+
+ /// The name of the branches we want to scan
+ #[getset(get = "pub")]
+ watched_branches: Vec,
}
impl Display for Integration {
@@ -151,6 +172,28 @@ impl Integration {
pub fn endpoint(&self) -> &Remote {
self.protocol().endpoint()
}
+
+ /// Checks if the reference branch should be scanned by comparing it to our watched branches
+ pub fn should_scan_reference(&self, reference: &str) -> bool {
+ let branches = self.watched_branches();
+ for branch in branches {
+ match Pattern::new(branch.name()) {
+ Ok(p) => {
+ if p.matches(reference) {
+ return true;
+ }
+ }
+ // In the case of error continue on and have the function return false if there are no matches
+ Err(_e) => continue,
+ }
+ }
+ false
+ }
+
+ /// Mutable reference for watched branches
+ pub fn add_watched_branch(&mut self, watched_branch: WatchedBranch) {
+ self.watched_branches.push(watched_branch)
+ }
}
/// Code is stored in many kinds of locations, from git repos to
@@ -214,6 +257,93 @@ impl TryFrom for PollInterval {
}
}
+/// Specificies if we want to scan branches
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Display, Deserialize, Serialize, new)]
+pub enum BranchImportStrategy {
+ /// Scanning branches is not allowed
+ Disabled,
+ /// Scanning branches is allowed
+ Enabled,
+}
+
+impl From