From e3c554482ee15e1b422c4b31536c92ef7066b9ae Mon Sep 17 00:00:00 2001 From: Jessica Black Date: Thu, 12 Oct 2023 14:32:50 -0700 Subject: [PATCH] Integrate snippet scanning into FOSSA CLI (#1298) --- Cargo.lock | 2 +- Changelog.md | 5 + docs/README.md | 1 + docs/references/subcommands/snippets.md | 106 +++++++ .../subcommands/snippets/analyze.md | 110 +++++++ .../references/subcommands/snippets/commit.md | 44 +++ extlib/millhone/Cargo.toml | 2 +- extlib/millhone/docs/subcommands/analyze.md | 45 +-- extlib/millhone/src/main.rs | 2 + spectrometer.cabal | 4 + src/App/Fossa/Config/Snippets.hs | 281 ++++++++++++++++++ src/App/Fossa/EmbeddedBinary.hs | 20 ++ src/App/Fossa/Main.hs | 2 + src/App/Fossa/Snippets.hs | 46 +++ src/App/Fossa/Snippets/Analyze.hs | 45 +++ src/App/Fossa/Snippets/Commit.hs | 48 +++ src/Effect/Exec.hs | 41 ++- src/Strategy/Go/Gomod.hs | 9 +- 18 files changed, 775 insertions(+), 38 deletions(-) create mode 100644 docs/references/subcommands/snippets.md create mode 100644 docs/references/subcommands/snippets/analyze.md create mode 100644 docs/references/subcommands/snippets/commit.md create mode 100644 src/App/Fossa/Config/Snippets.hs create mode 100644 src/App/Fossa/Snippets.hs create mode 100644 src/App/Fossa/Snippets/Analyze.hs create mode 100644 src/App/Fossa/Snippets/Commit.hs diff --git a/Cargo.lock b/Cargo.lock index c277d995d5..570a299be1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -871,7 +871,7 @@ dependencies = [ [[package]] name = "millhone" -version = "0.3.1" +version = "0.3.2" dependencies = [ "atty", "base64 0.21.4", diff --git a/Changelog.md b/Changelog.md index fb62d1d6d9..538f5f532f 100644 --- a/Changelog.md +++ b/Changelog.md @@ -1,5 +1,10 @@ # FOSSA CLI Changelog +## v3.8.17 + +Integrates FOSSA snippet scanning into the main application. +For more details and a quick start guide, see [the subcommand reference](./docs/references/subcommands/snippets.md). + ## v3.8.16 Delivers another update to the `millhone` early preview of FOSSA snippet scanning: diff --git a/docs/README.md b/docs/README.md index 0ec7ad9dfb..053bcaa9cd 100644 --- a/docs/README.md +++ b/docs/README.md @@ -61,6 +61,7 @@ Reference guides provide an exhaustive listing of all CLI functionality. If you - [`fossa test`](./references/subcommands/test.md) - [`fossa report`](./references/subcommands/report.md) - [`fossa list-targets`](./references/subcommands/list-targets.md) + - [`fossa snippets`](./references/subcommands/snippets.md) - CLI configuration files diff --git a/docs/references/subcommands/snippets.md b/docs/references/subcommands/snippets.md new file mode 100644 index 0000000000..7665c2dd67 --- /dev/null +++ b/docs/references/subcommands/snippets.md @@ -0,0 +1,106 @@ +## `fossa snippets` + +This subcommand is the home for FOSSA's snippet scanning feature. + +It is made up of two subcommands: + +- [`fossa snippets analyze`](./snippets/analyze.md) +- [`fossa snippets commit`](./snippets/commit.md) + +See the pages linked above for more details. + +## Quickstart + +```shell +# Set your API key. Get this from the FOSSA web application. +# On Windows, use this instead: $env:FOSSA_API_KEY=XXXX +export FOSSA_API_KEY=XXXX + +# Navigate to your project directory. +cd $MY_PROJECT_DIR + +# Analyze the project for local snippet matches. +# Match data is output to the directory specified to the `-o` or `--output` argument. +# If desired, you can manually review the matches output to the directory. +fossa snippets analyze -o snippets + +# Commit matched snippets to a `fossa-deps` file. +# Provide it the same directory provided to `fossa snippets analyze`. +# This creates a `fossa-deps` file in your project. +# +# Note that you can control what kinds of snippets are committed; +# see subcommand documentation for more details. +fossa snippets commit --analyze-output snippets + +# Run a standard FOSSA analysis, which will also upload snippet scanned dependencies, +# since they were stored in your `fossa-deps` file. +fossa analyze +``` + +## FAQ + +### Is my source code sent to FOSSA's servers? + +**Short version: No.** More detail explaining this is below. + +FOSSA CLI fingerprints your first party source code but does not send it to the server. +The fingerprint is a SHA-256 hashed representation of the content that made up the snippet. + +FOSSA CLI does send the fingerprint to the server, but since SHA-256 hashes are +[cryptographically secure](https://en.wikipedia.org/wiki/SHA-2), it is effectively not possible +for FOSSA to reproduce the original code that went into the snippet. + +Of course, if the fingerprint matches FOSSA could then infer that the project contains that snippet of code, +but since FOSSA CLI does not send any additional context in the file there's no way for FOSSA or anyone else +to make use of this information. + +The code to perform this is open source in this CLI; +users can also utilize tooling such as [echotraffic](https://github.com/fossas/echotraffic) +to report the information being uploaded. + +### How does FOSSA snippet scanning work? + +FOSSA snippet scanning operates over a matrix of options: + +``` +Targets × Kinds × Methods +``` + +Valid options for `Targets` are: + +Target | Description +-----------|----------------------------------------------------------------------- +`Function` | Considers function declarations in the source code as snippet targets. + +Valid options for `Kinds` are: + +Kind | Description +------------|---------------------------------------------- +`Full` | The full expression that makes up the target. +`Signature` | The function signature of `Function` targets. +`Body` | The function body of `Function` targets. + +Valid options for `Methods` are: + +Method | Description +--------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------- +`Raw` | The expression that makes up the target as written in the source code file. +`NormalizedSpace` | The expression with any character in the Unicode [whitespace character class][] replaced with a space, and any contiguous spaces collapsed to a single space. +`NormalizedComment` | The expression with comments removed, as defined by the source code language. +`NormalizedCode` | Equivalent to `NormalizedComment` followed by `NormalizedSpace`. + +Given these options, the fully defined matrix of options is as follows: + +``` +{Function} × {Full, Signature, Body} × {Raw, NormalizedSpace, NormalizedComment, NormalizedCode} +``` + +FOSSA then scans open source projects for these snippets and records them along with their metadata, +such as where in the file the snippet originated and from what project. + +Finally, when users scan their first-party projects, FOSSA extracts snippets in the same manner +and compares the fingerprints of the content of those snippets against the database. +If a match is found, FOSSA reports all open source projects in which the snippet was found, +along with recorded metadata about that snippet. + +[whitespace character class]: https://en.wikipedia.org/wiki/Unicode_character_property#Whitespace diff --git a/docs/references/subcommands/snippets/analyze.md b/docs/references/subcommands/snippets/analyze.md new file mode 100644 index 0000000000..472b05554f --- /dev/null +++ b/docs/references/subcommands/snippets/analyze.md @@ -0,0 +1,110 @@ +## `fossa snippets analyze` + +This subcommand extracts snippets from a user project and compares them to the FOSSA database of snippets. +Any matches are then written to the directory provided. + +## Options + +Argument | Required | Default | Description +---------------------|----------|------------------------|-------------------------------------------------------------------------------------------------------------------------------------- +`-o` / `--output` | Yes | None | The directory to which matches are output. +`--debug` | No | No | Enables debug mode. Note that debug bundles are not currently exported with `fossa snippets`, but this output is similarly useful. +`--overwrite-output` | No | No | If specified, overwrites the directory indicated by `--output`. +`--target` | No | `function` | If specified, extracts and matches only the specified targets. Specify multiple options by providing this argument multiple times. +`--kind` | No | `full, snippet, body` | If specified, extracts and matches only the specified kinds. Specify multiple options by providing this argument multiple times. +`--transform` | No | `space, comment, code` | If specified, extracts and matches only the specified transforms. Specify multiple options by providing this argument multiple times. + +> [!NOTE] +> `--transform` corresponds to the `Normalized` methods [listed here](../snippets.md#how-does-fossa-snippet-scanning-work). +> The `Raw` method is always enabled and cannot be disabled. + +## Output + +Matches are written to the location specified by the `--output` (or `-o`) argument. + +The output directory consists of a set of flat files, each representing a file in the scan directory +that had at least one matching snippet. These files are named with the path of the file relative to +the scan directory, with any path separators replaced by underscores, and a `.json` extension appended. + +For example, the following project: +``` +example-project/ + lib/ + lib.c + vendor/ + openssh/ + openssh.c + main.c +``` + +When scanned like `fossa snippets analyze -o snippets`, +would be presented like the below if all files contained a snippet match: +``` +snippets/ + lib_lib.c.json + lib_vendor_openssh_openssh.c.json + main.c.json +``` + +The content of each of these files is a JSON encoded array of matches, +where each object in the array consists of the following keys: + +Key | Description +--------------------|------------------------------------------------------------------------------- +`found_in` | The relative path of the local file in which the snippet match was found. +`local_text` | The text that matched the snippet in the local file. +`local_snippet` | Information about the snippet extracted from the local file. +`matching_snippets` | A collection of snippets from the FOSSA knowledgebase that match this snippet. + +The `local_snippet` object has the following keys: + +Key | Description +--------------|--------------------------------------------------------------------------- +`fingerprint` | The base64 representation of the snippet fingerprint. +`target` | The kind of source code item that matched for this snippet. +`kind` | The kind of snippet that was matched. +`method` | The normalization method used on the matching snippet. +`file_path` | The path of the file containing the snippet, relative to the project root. +`byte_start` | The byte index in the file at which the snippet begins. +`byte_end` | The byte index in the file at which the snippet ends. +`line_start` | The line number in the file at which the snippet begins. +`line_end` | The line number in the file at which the snippet ends. +`col_start` | The column number on the `line_start` at which the snippet begins. +`col_end` | The column number on the `line_end` at which the snippet ends. +`language` | The language of the identified snippet. + +Each entry in the `matching_snippets` collection has the following keys: + +Key | Description +--------------|--------------------------------------------------------------------------- +`locator` | The FOSSA identifier for the project to which this snippet belongs. +`fingerprint` | The base64 representation of the snippet fingerprint. +`target` | The kind of source code item that matched for this snippet. +`kind` | The kind of snippet that was matched. +`method` | The normalization method used on the matching snippet. +`file_path` | The path of the file containing the snippet, relative to the project root. +`byte_start` | The byte index in the file at which the snippet begins. +`byte_end` | The byte index in the file at which the snippet ends. +`line_start` | The line number in the file at which the snippet begins. +`line_end` | The line number in the file at which the snippet ends. +`col_start` | The column number on the `line_start` at which the snippet begins. +`col_end` | The column number on the `line_end` at which the snippet ends. +`language` | The language of the identified snippet. +`ingest_id` | The ingestion run that discovered this snippet (not meaningful to users). + +# Correcting Matches + +In order to correct matches, users may manually edit the contents of this directory +or files within the directory to alter or remove matches. + +For example, if a certain snippet is found in the local code that matches +a snippet in the FOSSA knowledgebase, but it's known to be a false positive, +users can script the removal of that snippet match from this directory prior to +committing these results in a FOSSA scan. + +# Next Steps + +After running `fossa snippets analyze`, the next step is to run `fossa snippets commit`. + +These are separate steps to give users the ability to edit or review the matched data +prior to submitting the results to FOSSA. diff --git a/docs/references/subcommands/snippets/commit.md b/docs/references/subcommands/snippets/commit.md new file mode 100644 index 0000000000..9b038254c3 --- /dev/null +++ b/docs/references/subcommands/snippets/commit.md @@ -0,0 +1,44 @@ +## `fossa snippets commit` + +This subcommand commits the analysis performed in the `analyze` subcommand into a `fossa-deps` file ([reference](../../files/fossa-deps.md)). +For more information on possible options, run `fossa snippets commit --help`. + +## Options + +Argument | Required | Default | Description +-------------------------|----------|------------------------|----------------------------------------------------------------------------------------------------------------------------------------------- +`--analyze-output` | Yes | None | The directory to which `fossa snippets analyze` output its matches. +`--debug` | No | No | Enables debug mode. Note that debug bundles are not currently exported with `fossa snippets`, but this output is similarly useful. +`--overwrite-fossa-deps` | No | No | If specified, overwrites the `fossa-deps` file if present. +`--target` | No | `function` | If specified, commits matches consisting of only the specified targets. Specify multiple options by providing this argument multiple times. +`--kind` | No | `full, snippet, body` | If specified, commits matches consisting of only the specified kinds. Specify multiple options by providing this argument multiple times. +`--transform` | No | `space, comment, code` | If specified, commits matches consisting of only the specified transforms. Specify multiple options by providing this argument multiple times. +`--format` | No | `yml` | Allows configuring the format of the generated `fossa-deps` file. + +> [!NOTE] +> `--transform` corresponds to the `Normalized` methods [listed here](../snippets.md#how-does-fossa-snippet-scanning-work). +> The `Raw` method is always enabled and cannot be disabled. + +## Input + +The primary thing this subcommand requires is the path to the directory in which the output of `analyze` +was written. Users can also alter which kinds of matches to commit, and customize the output format +of the created `fossa-deps` file. + +## Output + +The result of this subcommand is a `fossa-deps` file written to the root of the project directory. + +> [!NOTE] +> This subcommand will not overwrite an existing `fossa-deps` file by default, +> and currently does not merge its output into an existing `fossa-deps` file. +> +> However, users can customize the output format (via `--format`) and then +> perform scripted merges themselves. + +## Next Steps + +After running `fossa snippets commit`, the next step is to run `fossa analyze` on the project. + +FOSSA CLI will then pick up the dependencies reported in that `fossa-deps` file and report them +as dependencies of the project. diff --git a/extlib/millhone/Cargo.toml b/extlib/millhone/Cargo.toml index d8a4596aad..2f3a8c1205 100644 --- a/extlib/millhone/Cargo.toml +++ b/extlib/millhone/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "millhone" -version = "0.3.1" +version = "0.3.2" edition = "2021" [features] diff --git a/extlib/millhone/docs/subcommands/analyze.md b/extlib/millhone/docs/subcommands/analyze.md index a01a91285d..e8662a6888 100644 --- a/extlib/millhone/docs/subcommands/analyze.md +++ b/extlib/millhone/docs/subcommands/analyze.md @@ -3,20 +3,9 @@ This subcommand analyzes a local project for snippets that match snippets in the FOSSA knowledgebase. For more information on possible options, run `millhone analyze --help`. -# Output +## Output Matches are written to the location specified by the `--output` (or `-o`) argument. -If this argument is not specified, `millhone` creates a temporary directory prefixed by "millhone_". - -> [!NOTE] -> Millhone by default creates this directory in the system temporary directory. -> If desired, this can be customized: -> - On Linux and macOS: set the `TMPDIR` environment variable. -> - On Windows, this uses the `GetTempPath` system call, which uses the first valid option of: -> - The path specified by the `TMP` environment variable. -> - The path specified by the `TEMP` environment variable. -> - The path specified by the `USERPROFILE` environment variable. -> - The Windows directory. The output directory consists of a set of flat files, each representing a file in the scan directory that had at least one matching snippet. These files are named with the path of the file relative to @@ -24,29 +13,25 @@ the scan directory, with any path separators replaced by underscores, and a `.js For example, the following project: ``` -/Users/ - me/ - projects/ - example-project/ - lib/ - lib.c - vendor/ - openssh/ - openssh.c - main.c +example-project/ + lib/ + lib.c + vendor/ + openssh/ + openssh.c + main.c ``` -When scanned like `millhone analyze /Users/me/projects/example-project`, +When scanned like `fossa snippets analyze -o snippets`, would be presented like the below if all files contained a snippet match: ``` -/tmp/ - millhone_abcd1234/ - lib_lib.c.json - lib_vendor_openssh_openssh.c.json - main.c.json +snippets/ + lib_lib.c.json + lib_vendor_openssh_openssh.c.json + main.c.json ``` -The contents of each of these files are a JSON encoded array of matches, +The content of each of these files is a JSON encoded array of matches, where each object in the array consists of the following keys: Key | Description @@ -104,7 +89,7 @@ committing these results in a FOSSA scan. # Next Steps -After running `millhone analyze`, the next step is to run `millhone commit`. +After running `fossa snippets analyze`, the next step is to run `fossa snippets commit`. These are separate steps to give users the ability to edit or review the matched data prior to submitting the results to FOSSA. diff --git a/extlib/millhone/src/main.rs b/extlib/millhone/src/main.rs index fab43fcbd2..4a24531638 100644 --- a/extlib/millhone/src/main.rs +++ b/extlib/millhone/src/main.rs @@ -115,6 +115,8 @@ fn main() -> stable_eyre::Result<()> { .with_writer(std::io::stdout) .with_file(false) .with_line_number(false) + .without_time() + .with_target(false) .with_span_events(tracing_subscriber::fmt::format::FmtSpan::NONE) .with_filter(app.level_filter()) .with_filter(self_sourced_events(app.log_level)), diff --git a/spectrometer.cabal b/spectrometer.cabal index 08e654f2b2..bf9f9afa6a 100644 --- a/spectrometer.cabal +++ b/spectrometer.cabal @@ -185,6 +185,7 @@ library App.Fossa.Config.LinkUserBinaries App.Fossa.Config.ListTargets App.Fossa.Config.Report + App.Fossa.Config.Snippets App.Fossa.Config.Test App.Fossa.Container App.Fossa.Container.AnalyzeNative @@ -210,6 +211,9 @@ library App.Fossa.Report App.Fossa.Report.Attribution App.Fossa.RunThemis + App.Fossa.Snippets + App.Fossa.Snippets.Analyze + App.Fossa.Snippets.Commit App.Fossa.Subcommand App.Fossa.Test App.Fossa.VendoredDependency diff --git a/src/App/Fossa/Config/Snippets.hs b/src/App/Fossa/Config/Snippets.hs new file mode 100644 index 0000000000..d1e641756e --- /dev/null +++ b/src/App/Fossa/Config/Snippets.hs @@ -0,0 +1,281 @@ +module App.Fossa.Config.Snippets ( + mkSubCommand, + SnippetsConfig (..), + SnippetsCommand, + CommitOutputFormat (..), + SnippetKind (..), + SnippetTarget (..), + SnippetTransform (..), + AnalyzeConfig (..), + CommitConfig (..), + labelForKind, + labelForTarget, + labelForTransform, +) where + +import App.Fossa.Config.Common (baseDirArg, collectBaseDir, fossaApiKeyCmdText) +import App.Fossa.Subcommand (EffStack, GetCommonOpts, GetSeverity (..), SubCommand (..)) +import App.OptionExtensions (uriOption) +import App.Types (BaseDir) +import Control.Carrier.Lift (sendIO) +import Control.Effect.Diagnostics (Diagnostics, Has) +import Control.Effect.Lift (Lift) +import Data.Aeson (ToJSON, defaultOptions, genericToEncoding, toEncoding) +import Data.List qualified as List +import Data.String.Conversion (ToString, ToText, toString, toText) +import Data.Text (Text) +import Effect.Logger (Severity (..)) +import Effect.ReadFS (ReadFS) +import GHC.Generics (Generic) +import Options.Applicative (InfoMod, Parser, command, eitherReader, help, hsubparser, info, long, many, metavar, option, optional, progDesc, short, strOption, switch, (<|>)) +import Path (Abs, Dir, Path) +import Path.IO qualified as Path +import Text.URI (URI) + +data SnippetsCommand + = CommandAnalyze + FilePath -- Scan root + Bool -- Debug + (Maybe URI) -- The FOSSA endpoint. Not currently used, but accepted for backwards compatibility. + (Maybe Text) -- The FOSSA API key. Not currently used, but accepted for backwards compatibility. + FilePath -- Output directory + Bool -- Whether to overwrite output directory + [SnippetTarget] + [SnippetKind] + [SnippetTransform] + | CommandCommit + FilePath -- Scan root + Bool -- Debug + (Maybe URI) -- The FOSSA endpoint. Not currently used, but accepted for backwards compatibility. + (Maybe Text) -- The FOSSA API key. Not currently used, but accepted for backwards compatibility. + FilePath -- Analyze's output directory + Bool -- Whether to overwrite output file + (Maybe CommitOutputFormat) + [SnippetTarget] + [SnippetKind] + [SnippetTransform] + +snippetsInfo :: InfoMod a +snippetsInfo = progDesc "FOSSA snippet scanning" + +snippetsAnalyzeInfo :: InfoMod a +snippetsAnalyzeInfo = progDesc "Analyze a local project for snippet matches" + +snippetsCommitInfo :: InfoMod a +snippetsCommitInfo = progDesc "Commit matches discovered during analyze into a fossa-deps file" + +instance GetSeverity SnippetsCommand where + getSeverity :: SnippetsCommand -> Severity + getSeverity (CommandAnalyze _ analyzeDebug _ _ _ _ _ _ _) = if analyzeDebug then SevDebug else SevInfo + getSeverity (CommandCommit _ commitDebug _ _ _ _ _ _ _ _) = if commitDebug then SevDebug else SevInfo + +instance GetCommonOpts SnippetsCommand + +mkSubCommand :: (SnippetsConfig -> EffStack ()) -> SubCommand SnippetsCommand SnippetsConfig +mkSubCommand = SubCommand "snippets" snippetsInfo cliParser noLoadConfig mergeOpts + where + noLoadConfig = const $ pure Nothing + +cliParser :: Parser SnippetsCommand +cliParser = analyze <|> commit + where + analyze = hsubparser . command "analyze" $ info analyzeOpts snippetsAnalyzeInfo + analyzeOpts = + CommandAnalyze + <$> baseDirArg + <*> switch (long "debug" <> help "Enable debug logging") + <*> optional (uriOption (long "endpoint" <> short 'e' <> metavar "URL" <> help "The FOSSA API server base URL (default: https://app.fossa.com)")) + <*> optional (strOption (long fossaApiKeyCmdText <> help "the FOSSA API server authentication key (default: FOSSA_API_KEY from env)")) + <*> strOption (long "output" <> short 'o' <> help "The directory to which matches are output") + <*> switch (long "overwrite-output" <> help "If specified, overwrites the output directory if it exists") + <*> many (option (eitherReader parseTarget) (long "target" <> help ("Analyze this combination of targets") <> metavar "TARGET")) + <*> many (option (eitherReader parseKind) (long "kind" <> help ("Analyze this combination of kinds") <> metavar "KIND")) + <*> many (option (eitherReader parseTransform) (long "transform" <> help ("Analyze this combination of transforms") <> metavar "TRANSFORM")) + commit = hsubparser . command "commit" $ info commitOpts snippetsCommitInfo + commitOpts = + CommandCommit + <$> baseDirArg + <*> switch (long "debug" <> help "Enable debug logging") + <*> optional (uriOption (long "endpoint" <> short 'e' <> metavar "URL" <> help "The FOSSA API server base URL (default: https://app.fossa.com)")) + <*> optional (strOption (long fossaApiKeyCmdText <> help "the FOSSA API server authentication key (default: FOSSA_API_KEY from env)")) + <*> strOption (long "analyze-output" <> help "The directory to which 'analyze' matches were saved") + <*> switch (long "overwrite-fossa-deps" <> help "If specified, overwrites the 'fossa-deps' file if it exists") + <*> optional (option (eitherReader parseCommitOutputFormat) (long "format" <> help ("The output format for the generated `fossa-deps` file") <> metavar "FORMAT")) + <*> many (option (eitherReader parseTarget) (long "target" <> help ("Commit this combination of targets") <> metavar "TARGET")) + <*> many (option (eitherReader parseKind) (long "kind" <> help ("Commit this combination of kinds") <> metavar "KIND")) + <*> many (option (eitherReader parseTransform) (long "transform" <> help ("Commit this combination of transforms") <> metavar "TRANSFORM")) + +mergeOpts :: + ( Has Diagnostics sig m + , Has (Lift IO) sig m + , Has ReadFS sig m + ) => + a -> + b -> + SnippetsCommand -> + m SnippetsConfig +mergeOpts _ _ (CommandAnalyze path debug _ _ output overwrite targets kinds transforms) = do + root <- collectBaseDir path + output' <- sendIO $ Path.resolveDir' output + pure . Analyze $ AnalyzeConfig root debug output' overwrite targets kinds transforms +mergeOpts _ _ (CommandCommit path debug _ _ analyzeOutput overwrite format targets kinds transforms) = do + root <- collectBaseDir path + analyzeOutput' <- sendIO $ Path.resolveDir' analyzeOutput + pure . Commit $ CommitConfig root debug analyzeOutput' overwrite format targets kinds transforms + +data SnippetsConfig + = Analyze AnalyzeConfig + | Commit CommitConfig + deriving (Show, Generic) + +instance ToJSON SnippetsConfig where + toEncoding = genericToEncoding defaultOptions + +data AnalyzeConfig = AnalyzeConfig + { analyzeScanDir :: BaseDir + , analyzeDebug :: Bool + , analyzeOutput :: Path Abs Dir + , analyzeOverwriteOutput :: Bool + , analyzeTargets :: [SnippetTarget] + , analyzeKinds :: [SnippetKind] + , analyzeTransforms :: [SnippetTransform] + } + deriving (Show, Generic) + +instance ToJSON AnalyzeConfig where + toEncoding = genericToEncoding defaultOptions + +data CommitConfig = CommitConfig + { commitScanDir :: BaseDir + , commitDebug :: Bool + , commitAnalyzeOutput :: Path Abs Dir + , commitOverwriteFossaDeps :: Bool + , commitOutputFormat :: Maybe CommitOutputFormat + , commitTargets :: [SnippetTarget] + , commitKinds :: [SnippetKind] + , commitTransforms :: [SnippetTransform] + } + deriving (Show, Generic) + +instance ToJSON CommitConfig where + toEncoding = genericToEncoding defaultOptions + +-- | The targets of snippets to extract. +-- Reference: @millhone::extract::Target@. +data SnippetTarget + = SnippetTargetFunction + deriving (Eq, Enum, Bounded, Show, Generic) + +instance ToJSON SnippetTarget where + toEncoding = genericToEncoding defaultOptions + +parseTarget :: String -> Either String SnippetTarget +parseTarget input = case List.find (\t -> toString t == input) optionsTarget of + Just found -> Right found + Nothing -> Left $ generateParseError input (toString <$> optionsTarget) + +optionsTarget :: [SnippetTarget] +optionsTarget = enumFromTo minBound maxBound + +instance ToText SnippetTarget where + toText :: SnippetTarget -> Text + toText SnippetTargetFunction = "function" + +instance ToString SnippetTarget where + toString :: SnippetTarget -> String + toString = toString . toText + +labelForTarget :: Text +labelForTarget = "--target" + +-- | The kind of item this snippet represents. +-- Reference: @millhone::extract::Kind@. +data SnippetKind + = SnippetKindSignature + | SnippetKindBody + | SnippetKindFull + deriving (Eq, Enum, Bounded, Show, Generic) + +instance ToJSON SnippetKind where + toEncoding = genericToEncoding defaultOptions + +instance ToText SnippetKind where + toText :: SnippetKind -> Text + toText SnippetKindSignature = "signature" + toText SnippetKindBody = "body" + toText SnippetKindFull = "full" + +instance ToString SnippetKind where + toString :: SnippetKind -> String + toString = toString . toText + +parseKind :: String -> Either String SnippetKind +parseKind input = case List.find (\t -> toString t == input) optionsKind of + Just found -> Right found + Nothing -> Left $ generateParseError input (toString <$> optionsKind) + +optionsKind :: [SnippetKind] +optionsKind = enumFromTo minBound maxBound + +labelForKind :: Text +labelForKind = "--kind" + +-- | The normalization used to extract this snippet. +-- Reference: @millhone::extract::Transform@. +data SnippetTransform + = SnippetTransformCode + | SnippetTransformComment + | SnippetTransformSpace + deriving (Eq, Enum, Bounded, Show, Generic) + +instance ToJSON SnippetTransform where + toEncoding = genericToEncoding defaultOptions + +instance ToText SnippetTransform where + toText :: SnippetTransform -> Text + toText SnippetTransformCode = "code" + toText SnippetTransformComment = "comment" + toText SnippetTransformSpace = "space" + +instance ToString SnippetTransform where + toString :: SnippetTransform -> String + toString = toString . toText + +parseTransform :: String -> Either String SnippetTransform +parseTransform input = case List.find (\t -> toString t == input) optionsTransform of + Just found -> Right found + Nothing -> Left $ generateParseError input (toString <$> optionsTransform) + +optionsTransform :: [SnippetTransform] +optionsTransform = enumFromTo minBound maxBound + +labelForTransform :: Text +labelForTransform = "--transform" + +data CommitOutputFormat + = Yml + | Json + deriving (Eq, Enum, Bounded, Show, Generic) + +instance ToJSON CommitOutputFormat where + toEncoding = genericToEncoding defaultOptions + +instance ToText CommitOutputFormat where + toText :: CommitOutputFormat -> Text + toText Yml = "yml" + toText Json = "json" + +instance ToString CommitOutputFormat where + toString :: CommitOutputFormat -> String + toString = toString . toText + +parseCommitOutputFormat :: String -> Either String CommitOutputFormat +parseCommitOutputFormat input = case List.find (\t -> toString t == input) optionsCommitOutputFormat of + Just found -> Right found + Nothing -> Left $ generateParseError input (toString <$> optionsCommitOutputFormat) + +optionsCommitOutputFormat :: [CommitOutputFormat] +optionsCommitOutputFormat = enumFromTo minBound maxBound + +generateParseError :: String -> [String] -> String +generateParseError input options = "'" <> input <> "' is not a valid option; expected one of: " <> List.intercalate ", " options diff --git a/src/App/Fossa/EmbeddedBinary.hs b/src/App/Fossa/EmbeddedBinary.hs index 7dfaa9afbb..be716695af 100644 --- a/src/App/Fossa/EmbeddedBinary.hs +++ b/src/App/Fossa/EmbeddedBinary.hs @@ -11,6 +11,7 @@ module App.Fossa.EmbeddedBinary ( withThemisAndIndex, withBerkeleyBinary, withLernieBinary, + withMillhoneBinary, allBins, dumpEmbeddedBinary, themisVersion, @@ -56,6 +57,7 @@ data PackagedBinary | ThemisIndex | BerkeleyDB | Lernie + | Millhone deriving (Show, Eq, Enum, Bounded) allBins :: [PackagedBinary] @@ -118,6 +120,13 @@ withLernieBinary :: m c withLernieBinary = withEmbeddedBinary Lernie +withMillhoneBinary :: + ( Has (Lift IO) sig m + ) => + (BinaryPaths -> m c) -> + m c +withMillhoneBinary = withEmbeddedBinary Millhone + withEmbeddedBinary :: ( Has (Lift IO) sig m ) => @@ -150,6 +159,7 @@ writeBinary dest bin = sendIO . writeExecutable dest $ case bin of ThemisIndex -> embeddedBinaryThemisIndex BerkeleyDB -> embeddedBinaryBerkeleyDB Lernie -> embeddedBinaryLernie + Millhone -> embeddedBinaryMillhone writeExecutable :: Path Abs File -> ByteString -> IO () writeExecutable path content = do @@ -163,6 +173,7 @@ extractedPath bin = case bin of ThemisIndex -> $(mkRelFile "index.gob.xz") BerkeleyDB -> $(mkRelFile "berkeleydb-plugin") Lernie -> $(mkRelFile "lernie") + Millhone -> $(mkRelFile "millhone") -- | Extract to @$TMP/fossa-vendor/ -- We used to extract everything to @$TMP/fossa-vendor@, but there's a subtle issue with that. @@ -203,6 +214,15 @@ themisVersion = $$(themisVersionQ) embeddedBinaryLernie :: ByteString embeddedBinaryLernie = $(embedFileIfExists "vendor-bins/lernie") +-- To build this, run `make build` or `cargo build --release`. +#ifdef mingw32_HOST_OS +embeddedBinaryMillhone :: ByteString +embeddedBinaryMillhone = $(embedFileIfExists "target/release/millhone.exe") +#else +embeddedBinaryMillhone :: ByteString +embeddedBinaryMillhone = $(embedFileIfExists "target/release/millhone") +#endif + -- To build this, run `make build` or `cargo build --release`. #ifdef mingw32_HOST_OS embeddedBinaryBerkeleyDB :: ByteString diff --git a/src/App/Fossa/Main.hs b/src/App/Fossa/Main.hs index 9c6df9609f..246bf58dfa 100644 --- a/src/App/Fossa/Main.hs +++ b/src/App/Fossa/Main.hs @@ -9,6 +9,7 @@ import App.Fossa.DumpBinaries qualified as Dump import App.Fossa.LicenseScan qualified as LicenseScan (licenseScanSubCommand) import App.Fossa.ListTargets qualified as ListTargets import App.Fossa.Report qualified as Report +import App.Fossa.Snippets qualified as Snippets import App.Fossa.Subcommand (GetCommonOpts, GetSeverity, SubCommand (..), runSubCommand) import App.Fossa.Test qualified as Test import App.Fossa.VSI.IAT.AssertUserDefinedBinaries qualified as LinkBins @@ -86,6 +87,7 @@ subcommands = public <|> private , decodeSubCommand Container.containerSubCommand , decodeSubCommand ListTargets.listSubCommand , decodeSubCommand LinkBins.linkBinsSubCommand + , decodeSubCommand Snippets.snippetsSubCommand ] initCommand :: Mod CommandFields (IO ()) diff --git a/src/App/Fossa/Snippets.hs b/src/App/Fossa/Snippets.hs new file mode 100644 index 0000000000..8d607d6806 --- /dev/null +++ b/src/App/Fossa/Snippets.hs @@ -0,0 +1,46 @@ +-- Types in this module are tightly based on the types in the Millhone CLI. +-- +-- In the documentation for this module, Millhone symbols are written +-- using Rust-style paths (e.g. @millhone::extract::Target@). +-- +-- The Millhone CLI is at @extlib/millhone@. +-- +-- Notable exceptions: +-- - Auth config is not included here. +-- The plan is to use FOSSA reverse proxying eventually, +-- and until that's done Millhone CLI hard codes authentication information. +-- - Logging config is not included here. +-- Instead FOSSA CLI automatically configures it based on +-- its configured log severity. + +module App.Fossa.Snippets ( + snippetsMain, + snippetsSubCommand, +) where + +import App.Fossa.Config.Snippets (SnippetsCommand, SnippetsConfig (..), mkSubCommand) +import App.Fossa.Snippets.Analyze (analyzeWithMillhone) +import App.Fossa.Snippets.Commit (commitWithMillhone) +import App.Fossa.Subcommand (SubCommand) +import Control.Algebra (Has) +import Control.Effect.Diagnostics (Diagnostics) +import Control.Effect.Lift (Lift) +import Effect.Exec (Exec) +import Effect.Logger (Logger, logInfo) + +snippetsSubCommand :: SubCommand SnippetsCommand SnippetsConfig +snippetsSubCommand = mkSubCommand snippetsMain + +snippetsMain :: + ( Has (Lift IO) sig m + , Has Exec sig m + , Has Diagnostics sig m + , Has Logger sig m + ) => + SnippetsConfig -> + m () +snippetsMain subcommand = do + logInfo "Running FOSSA snippets" + case subcommand of + Analyze cfg -> analyzeWithMillhone cfg + Commit cfg -> commitWithMillhone cfg diff --git a/src/App/Fossa/Snippets/Analyze.hs b/src/App/Fossa/Snippets/Analyze.hs new file mode 100644 index 0000000000..580623711c --- /dev/null +++ b/src/App/Fossa/Snippets/Analyze.hs @@ -0,0 +1,45 @@ +{-# LANGUAGE RecordWildCards #-} + +module App.Fossa.Snippets.Analyze ( + analyzeWithMillhone, +) where + +import App.Fossa.Config.Snippets (AnalyzeConfig (..), labelForKind, labelForTarget, labelForTransform) +import App.Fossa.EmbeddedBinary (BinaryPaths, toPath, withMillhoneBinary) +import App.Types (BaseDir (unBaseDir)) +import Control.Algebra (Has) +import Control.Effect.Diagnostics (Diagnostics) +import Control.Effect.Lift (Lift) +import Data.String.Conversion (toText) +import Effect.Exec (AllowErr (Never), Command (..), Exec, argFromPath, argsLabeled, execEffectful) +import Effect.Logger (Logger) +import Path (Abs, Dir, Path) + +analyzeWithMillhone :: + ( Has (Lift IO) sig m + , Has Exec sig m + , Has Diagnostics sig m + , Has Logger sig m + ) => + AnalyzeConfig -> + m () +analyzeWithMillhone conf = withMillhoneBinary $ \bin -> execEffectful root $ mkCmd bin root conf + where + root = unBaseDir $ analyzeScanDir conf + +mkCmd :: BinaryPaths -> Path Abs Dir -> AnalyzeConfig -> Command +mkCmd bin root AnalyzeConfig{..} = + Command + { cmdName = toText $ toPath bin + , cmdArgs = concat [debug, cmd, output, overwriteOutput, targets, kinds, transforms, dir] + , cmdAllowErr = Never + } + where + cmd = ["analyze"] + dir = [argFromPath root] + debug = if analyzeDebug then ["--log-level", "debug", "--log-format", "json"] else [] + targets = if null analyzeTargets then [] else argsLabeled labelForTarget analyzeTargets + kinds = if null analyzeKinds then [] else argsLabeled labelForKind analyzeKinds + transforms = if null analyzeTransforms then [] else argsLabeled labelForTransform analyzeTransforms + output = ["--output", argFromPath analyzeOutput] + overwriteOutput = if analyzeOverwriteOutput then ["--overwrite-output"] else [] diff --git a/src/App/Fossa/Snippets/Commit.hs b/src/App/Fossa/Snippets/Commit.hs new file mode 100644 index 0000000000..dbaa3ddaf0 --- /dev/null +++ b/src/App/Fossa/Snippets/Commit.hs @@ -0,0 +1,48 @@ +{-# LANGUAGE RecordWildCards #-} + +module App.Fossa.Snippets.Commit ( + commitWithMillhone, +) where + +import App.Fossa.Config.Snippets (CommitConfig (..), labelForKind, labelForTarget, labelForTransform) +import App.Fossa.EmbeddedBinary (BinaryPaths, toPath, withMillhoneBinary) +import App.Types (unBaseDir) +import Control.Algebra (Has) +import Control.Effect.Diagnostics (Diagnostics) +import Control.Effect.Lift (Lift) +import Data.String.Conversion (toText) +import Effect.Exec (AllowErr (Never), Command (..), Exec, argFromPath, argsLabeled, execEffectful) +import Effect.Logger (Logger) +import Path (Abs, Dir, Path) + +commitWithMillhone :: + ( Has (Lift IO) sig m + , Has Exec sig m + , Has Diagnostics sig m + , Has Logger sig m + ) => + CommitConfig -> + m () +commitWithMillhone conf = withMillhoneBinary $ \bin -> execEffectful root $ mkCmd bin root conf + where + root = unBaseDir $ commitScanDir conf + +mkCmd :: BinaryPaths -> Path Abs Dir -> CommitConfig -> Command +mkCmd bin root CommitConfig{..} = + Command + { cmdName = toText $ toPath bin + , cmdArgs = concat [debug, cmd, output, format, overwriteOutput, targets, kinds, transforms, dir] + , cmdAllowErr = Never + } + where + cmd = ["commit"] + dir = [argFromPath root] + debug = if commitDebug then ["--log-level", "debug", "--log-format", "json"] else [] + targets = if null commitTargets then [] else argsLabeled labelForTarget commitTargets + kinds = if null commitKinds then [] else argsLabeled labelForKind commitKinds + transforms = if null commitTransforms then [] else argsLabeled labelForTransform commitTransforms + output = ["--analyze-output-dir", argFromPath commitAnalyzeOutput] + format = case commitOutputFormat of + Just format' -> ["--format", toText format'] + Nothing -> [] + overwriteOutput = if commitOverwriteFossaDeps then ["--overwrite-fossa-deps"] else [] diff --git a/src/Effect/Exec.hs b/src/Effect/Exec.hs index a65c699ff8..187e818f83 100644 --- a/src/Effect/Exec.hs +++ b/src/Effect/Exec.hs @@ -4,10 +4,14 @@ {-# LANGUAGE UndecidableInstances #-} module Effect.Exec ( + argFromPath, + argsLabeled, + argsLabeledWith, Exec, ExecF (..), ExecErr (..), exec, + execEffectful, execThrow, Command (..), CmdFailure (..), @@ -64,18 +68,20 @@ import Data.Aeson ( ) import Data.Bifunctor (first) import Data.ByteString.Lazy qualified as BL +import Data.Foldable (traverse_) import Data.List.NonEmpty (NonEmpty) import Data.List.NonEmpty qualified as NE import Data.Map qualified as Map import Data.String (fromString) -import Data.String.Conversion (decodeUtf8, toString, toText) +import Data.String.Conversion (ToText, decodeUtf8, toStrict, toString, toText) import Data.Text (Text) import Data.Text qualified as Text import Data.Void (Void) import DepTypes (DepType (..)) +import Effect.Logger (Logger, logInfo) import Effect.ReadFS (ReadFS, getCurrentDir) import GHC.Generics (Generic) -import Path (Abs, Dir, Path, SomeBase (..), fromAbsDir) +import Path (Abs, Dir, Path, SomeBase (..), fromAbsDir, toFilePath) import Path.IO (AnyPath (makeAbsolute)) import Prettyprinter (Doc, indent, pretty, viaShow, vsep) import Prettyprinter.Render.Terminal (AnsiStyle) @@ -333,6 +339,27 @@ execThrow dir cmd = context ("Running command '" <> cmdName cmd <> "'") $ do Left failure -> fatal (CommandFailed failure) Right stdout -> pure stdout +-- | A variant of 'exec' that is run for its side effects: +-- * Throws an 'ExecErr' when the command returns a non-zero exit code, like @execThrow@. +-- * Logs each line of the subcommand's stdout via @logInfo@. +-- +-- Note: currently this buffers subcommand output; a future version may stream instead. +execEffectful :: + ( Has Exec sig m + , Has Diagnostics sig m + , Has Logger sig m + ) => + Path Abs Dir -> + Command -> + m () +execEffectful dir cmd = context ("Running command '" <> cmdName cmd <> "'") $ do + result <- exec dir cmd + case result of + Left failure -> fatal (CommandFailed failure) + Right stdout -> do + let outputLines :: [Text] = decodeUtf8 . toStrict <$> BL.splitWith (== 10) stdout + traverse_ (logInfo . pretty) outputLines + -- | A variant of 'execThrow' that runs the command in the current directory execThrow' :: (Has Exec sig m, Has ReadFS sig m, Has Diagnostics sig m) => Command -> m BL.ByteString execThrow' cmd = context ("Running command '" <> cmdName cmd <> "'") $ do @@ -421,6 +448,16 @@ instance ToDiagnostic CandidateCommandFailed where <> show failedArgs <> " resulted in a non-zero exit code" +argFromPath :: Path a b -> Text +argFromPath = toText . toFilePath + +argsLabeled :: ToText a => Text -> [a] -> [Text] +argsLabeled = argsLabeledWith toText + +argsLabeledWith :: (a -> Text) -> Text -> [a] -> [Text] +argsLabeledWith render label (arg : args) = [label, render arg] ++ argsLabeledWith render label args +argsLabeledWith _ _ [] = [] + type ExecIOC = SimpleC ExecF runExecIO :: Has (Lift IO) sig m => ExecIOC m a -> m a diff --git a/src/Strategy/Go/Gomod.hs b/src/Strategy/Go/Gomod.hs index 654d551614..de9781e3dd 100644 --- a/src/Strategy/Go/Gomod.hs +++ b/src/Strategy/Go/Gomod.hs @@ -406,7 +406,8 @@ buildGraph = traverse_ go . resolve -- 3. The raw version text for non-canonical versions. Nothing else we can -- do here. label pkg $ - GolangLabelVersion $ case reqVersion of - NonCanonical n -> n - Pseudo commitHash -> commitHash - Semantic semver -> "v" <> SemVer.toText semver{_versionMeta = []} + GolangLabelVersion $ + case reqVersion of + NonCanonical n -> n + Pseudo commitHash -> commitHash + Semantic semver -> "v" <> SemVer.toText semver{_versionMeta = []}