Skip to content
This repository has been archived by the owner on Apr 1, 2022. It is now read-only.

Commit

Permalink
Detect binaries as dependencies (#353)
Browse files Browse the repository at this point in the history
  • Loading branch information
jssblck authored Sep 7, 2021
1 parent 967a0a6 commit ad73f44
Show file tree
Hide file tree
Showing 8 changed files with 193 additions and 28 deletions.
7 changes: 7 additions & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
# Spectrometer Changelog

## v2.15.13

Adds another closed beta feature around FOSSA C/C++ support.
For now this functionality is considered publicly undocumented, and is only used with support from FOSSA engineering.

- Adds support for reporting detected binaries as unlicensed dependencies ([#353](https://github.com/fossas/spectrometer/pull/353))

## v2.15.12

- Yarn: Analyzes yarn.lock without runtime error, when yarn.lock includes directory dependency. ([#361](https://github.com/fossas/spectrometer/pull/361))
Expand Down
1 change: 1 addition & 0 deletions spectrometer.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ library
App.Fossa.API.BuildLink
App.Fossa.API.BuildWait
App.Fossa.ArchiveUploader
App.Fossa.BinaryDeps
App.Fossa.Compatibility
App.Fossa.Configuration
App.Fossa.Container
Expand Down
65 changes: 45 additions & 20 deletions src/App/Fossa/Analyze.hs
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,18 @@ module App.Fossa.Analyze (
JsonOutput (..),
VSIAnalysisMode (..),
IATAssertionMode (..),
discoverFuncs,
BinaryDiscoveryMode (..),
RecordMode (..),
ModeOptions (..),
discoverFuncs,
) where

import App.Docs (userGuideUrl)
import App.Fossa.API.BuildLink (getFossaBuildUrl)
import App.Fossa.Analyze.GraphMangler (graphingToGraph)
import App.Fossa.Analyze.Project (ProjectResult (..), mkResult)
import App.Fossa.Analyze.Record (AnalyzeEffects (..), AnalyzeJournal (..), loadReplayLog, saveReplayLog)
import App.Fossa.BinaryDeps (analyzeBinaryDeps)
import App.Fossa.FossaAPIV1 (UploadResponse (..), getProject, projectIsMonorepo, uploadAnalysis, uploadContributors)
import App.Fossa.ManualDeps (analyzeFossaDepsFile)
import App.Fossa.ProjectInference (inferProjectDefault, inferProjectFromVCS, mergeOverride, saveRevision)
Expand Down Expand Up @@ -64,7 +67,7 @@ import Path (Abs, Dir, Path, fromAbsDir, toFilePath)
import Path.IO (makeRelative)
import Path.IO qualified as P
import Srclib.Converter qualified as Srclib
import Srclib.Types (Locator (locatorProject, locatorRevision), SourceUnit, parseLocator)
import Srclib.Types (Locator (locatorProject, locatorRevision), SourceUnit (..), parseLocator)
import Strategy.Bundler qualified as Bundler
import Strategy.Cargo qualified as Cargo
import Strategy.Carthage qualified as Carthage
Expand Down Expand Up @@ -119,6 +122,14 @@ data UnpackArchives = UnpackArchives

data JsonOutput = JsonOutput

-- | Collect analysis modes into a single type for ease of use.
-- These modes are intended to be different options that alter how analysis is performed or what analysis steps are followed.
data ModeOptions = ModeOptions
{ modeVSIAnalysis :: VSIAnalysisMode
, modeIATAssertion :: IATAssertionMode
, modeBinaryDiscovery :: BinaryDiscoveryMode
}

-- | "VSI analysis" modes
data VSIAnalysisMode
= -- | enable the VSI analysis strategy
Expand All @@ -133,6 +144,13 @@ data IATAssertionMode
| -- | assertion not enabled
IATAssertionDisabled

-- | "Binary Discovery" modes
data BinaryDiscoveryMode
= -- | Binary discovery enabled
BinaryDiscoveryEnabled
| -- | Binary discovery disabled
BinaryDiscoveryDisabled

-- | "Replay logging" modes
data RecordMode
= -- | record effect invocations
Expand All @@ -142,8 +160,8 @@ data RecordMode
| -- | don't record or replay
RecordModeNone

analyzeMain :: FilePath -> RecordMode -> Severity -> ScanDestination -> OverrideProject -> Flag UnpackArchives -> Flag JsonOutput -> VSIAnalysisMode -> IATAssertionMode -> AllFilters -> IO ()
analyzeMain workdir recordMode logSeverity destination project unpackArchives jsonOutput enableVSI assertionMode filters =
analyzeMain :: FilePath -> RecordMode -> Severity -> ScanDestination -> OverrideProject -> Flag UnpackArchives -> Flag JsonOutput -> ModeOptions -> AllFilters -> IO ()
analyzeMain workdir recordMode logSeverity destination project unpackArchives jsonOutput modeOptions filters =
withDefaultLogger logSeverity
. Diag.logWithExit_
. runReadFSIO
Expand All @@ -170,7 +188,7 @@ analyzeMain workdir recordMode logSeverity destination project unpackArchives js
. runReplay @Exec (effectsExec effects)
$ doAnalyze basedir
where
doAnalyze basedir = analyze basedir destination project unpackArchives jsonOutput enableVSI assertionMode filters
doAnalyze basedir = analyze basedir destination project unpackArchives jsonOutput modeOptions filters

discoverFuncs :: (TaskEffs sig m, TaskEffs rsig run) => [Path Abs Dir -> m [DiscoveredProject run]]
discoverFuncs =
Expand Down Expand Up @@ -245,19 +263,23 @@ analyze ::
OverrideProject ->
Flag UnpackArchives ->
Flag JsonOutput ->
VSIAnalysisMode ->
IATAssertionMode ->
ModeOptions ->
AllFilters ->
m ()
analyze (BaseDir basedir) destination override unpackArchives jsonOutput enableVSI iatAssertion filters = do
analyze (BaseDir basedir) destination override unpackArchives jsonOutput ModeOptions{..} filters = do
capabilities <- sendIO getNumCapabilities

let apiOpts = case destination of
OutputStdout -> Nothing
UploadScan opts _ -> Just opts

-- additional source units are built outside the standard strategy flow, because they either
-- require additional information (eg API credentials), or they return additional information (eg user deps).
manualSrcUnits <- analyzeFossaDepsFile basedir apiOpts
vsiResults <- analyzeVSI enableVSI apiOpts basedir filters
vsiResults <- analyzeVSI modeVSIAnalysis apiOpts basedir filters
binarySearchResults <- analyzeDiscoverBinaries modeBinaryDiscovery basedir filters
let additionalSourceUnits :: [SourceUnit]
additionalSourceUnits = catMaybes [manualSrcUnits, vsiResults, binarySearchResults]

(projectResults, ()) <-
runOutput @ProjectResult
Expand All @@ -270,14 +292,14 @@ analyze (BaseDir basedir) destination override unpackArchives jsonOutput enableV
let filteredProjects = filterProjects (BaseDir basedir) projectResults

-- Need to check if vendored is empty as well, even if its a boolean that vendoredDeps exist
case checkForEmptyUpload projectResults filteredProjects [manualSrcUnits, vsiResults] of
case checkForEmptyUpload projectResults filteredProjects additionalSourceUnits of
NoneDiscovered -> Diag.fatal ErrNoProjectsDiscovered
FilteredAll count -> Diag.fatal (ErrFilteredAllProjects count projectResults)
FoundSome sourceUnits -> case destination of
OutputStdout -> logStdout . decodeUtf8 . Aeson.encode $ buildResult manualSrcUnits filteredProjects
OutputStdout -> logStdout . decodeUtf8 . Aeson.encode $ buildResult additionalSourceUnits filteredProjects
UploadScan opts metadata -> do
locator <- uploadSuccessfulAnalysis (BaseDir basedir) opts metadata jsonOutput override sourceUnits
doAssertRevisionBinaries iatAssertion opts locator
doAssertRevisionBinaries modeIATAssertion opts locator

analyzeVSI :: (MonadIO m, Has Diag.Diagnostics sig m, Has Exec sig m, Has (Lift IO) sig m, Has Logger sig m) => VSIAnalysisMode -> Maybe ApiOpts -> Path Abs Dir -> AllFilters -> m (Maybe SourceUnit)
analyzeVSI VSIAnalysisEnabled (Just apiOpts) dir filters = do
Expand All @@ -286,6 +308,12 @@ analyzeVSI VSIAnalysisEnabled (Just apiOpts) dir filters = do
pure $ Just results
analyzeVSI _ _ _ _ = pure Nothing

analyzeDiscoverBinaries :: (MonadIO m, Has Diag.Diagnostics sig m, Has (Lift IO) sig m, Has Logger sig m, Has ReadFS sig m) => BinaryDiscoveryMode -> Path Abs Dir -> AllFilters -> m (Maybe SourceUnit)
analyzeDiscoverBinaries BinaryDiscoveryEnabled dir filters = do
logInfo "Discovering binary files as dependencies"
analyzeBinaryDeps dir filters
analyzeDiscoverBinaries _ _ _ = pure Nothing

doAssertRevisionBinaries :: (Has Diag.Diagnostics sig m, Has ReadFS sig m, Has (Lift IO) sig m, Has Logger sig m) => IATAssertionMode -> ApiOpts -> Locator -> m ()
doAssertRevisionBinaries (IATAssertionEnabled dir) apiOpts locator = assertRevisionBinaries dir apiOpts locator
doAssertRevisionBinaries _ _ _ = pure ()
Expand Down Expand Up @@ -380,9 +408,8 @@ data CountedResult
-- Takes a list of all projects analyzed, and the list after filtering. We assume
-- that the smaller list is the latter, and return that list. Starting with user-defined deps,
-- we also include a check for an additional source unit from fossa-deps.yml.
checkForEmptyUpload :: [ProjectResult] -> [ProjectResult] -> [Maybe SourceUnit] -> CountedResult
checkForEmptyUpload xs ys potentialAdditionalUnits = do
let additionalUnits = catMaybes potentialAdditionalUnits
checkForEmptyUpload :: [ProjectResult] -> [ProjectResult] -> [SourceUnit] -> CountedResult
checkForEmptyUpload xs ys additionalUnits = do
if null additionalUnits
then case (xlen, ylen) of
-- We didn't discover, so we also didn't filter
Expand Down Expand Up @@ -463,16 +490,14 @@ buildProjectSummary project projectLocator projectUrl = do
, "id" .= projectLocator
]

buildResult :: Maybe SourceUnit -> [ProjectResult] -> Aeson.Value
buildResult maybeSrcUnit projects =
buildResult :: [SourceUnit] -> [ProjectResult] -> Aeson.Value
buildResult srcUnits projects =
Aeson.object
[ "projects" .= map buildProject projects
, "sourceUnits" .= finalSourceUnits
]
where
finalSourceUnits = case maybeSrcUnit of
Just unit -> unit : scannedUnits
Nothing -> scannedUnits
finalSourceUnits = srcUnits ++ scannedUnits
scannedUnits = map Srclib.toSourceUnit projects

buildProject :: ProjectResult -> Aeson.Value
Expand Down
115 changes: 115 additions & 0 deletions src/App/Fossa/BinaryDeps.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
{-# LANGUAGE RecordWildCards #-}

module App.Fossa.BinaryDeps (analyzeBinaryDeps) where

import App.Fossa.Analyze.Project (ProjectResult (..))
import App.Fossa.VSI.IAT.Fingerprint (fingerprintRaw)
import App.Fossa.VSI.IAT.Types (Fingerprint (..))
import Control.Algebra (Has)
import Control.Carrier.Diagnostics (Diagnostics, fromEither)
import Control.Effect.Lift (Lift)
import Data.ByteString qualified as BS
import Data.Maybe (catMaybes)
import Data.String.Conversion (toText)
import Data.Text (Text)
import Data.Text qualified as Text
import Discovery.Filters (AllFilters (..), FilterCombination (combinedPaths))
import Discovery.Walk (WalkStep (WalkContinue), walk')
import Effect.ReadFS (ReadFS, readContentsBSLimit)
import Path (Abs, Dir, File, Path, isProperPrefixOf, stripProperPrefix, toFilePath, (</>))
import Srclib.Converter qualified as Srclib
import Srclib.Types (AdditionalDepData (..), SourceUnit (..), SourceUserDefDep (..))
import Types (GraphBreadth (Complete))

data BinaryFile = BinaryFile
{ binaryPath :: Path Abs File
, binaryFingerprint :: Fingerprint
}

-- | Binary detection is sufficiently different from other analysis types that it cannot be just another strategy.
-- Instead, binary detection is run separately over the entire scan directory, outputting its own source unit.
-- The goal of this feature is to enable a FOSSA user to flag all vendored binaries (as defined by git) in the project as dependencies.
-- Users may then use standard FOSSA UX flows to ignore or add license information to the detected binaries.
analyzeBinaryDeps :: (Has (Lift IO) sig m, Has Diagnostics sig m, Has ReadFS sig m) => Path Abs Dir -> AllFilters -> m (Maybe SourceUnit)
analyzeBinaryDeps dir filters = do
binaries <- fingerprintBinaries (toPathFilters dir filters) dir
if null binaries
then pure Nothing
else pure . Just $ toSourceUnit (toProject dir) binaries

fingerprintBinaries :: (Has (Lift IO) sig m, Has Diagnostics sig m, Has ReadFS sig m) => PathFilters -> Path Abs Dir -> m [BinaryFile]
fingerprintBinaries filters = walk' $ \dir _ files -> do
if shouldFingerprintDir dir filters
then do
someBinaries <- traverse fingerprintIfBinary files
pure (catMaybes someBinaries, WalkContinue)
else pure ([], WalkContinue)

fingerprintIfBinary :: (Has (Lift IO) sig m, Has Diagnostics sig m, Has ReadFS sig m) => Path Abs File -> m (Maybe BinaryFile)
fingerprintIfBinary file = do
isBinary <- fileIsBinary file
if isBinary
then do
fp <- fingerprintRaw file
pure . Just $ BinaryFile file fp
else pure Nothing

-- | PathFilters is a specialized filter mechanism that operates only on absolute directory paths.
data PathFilters = PathFilters
{ include :: [Path Abs Dir]
, exclude :: [Path Abs Dir]
}
deriving (Show)

toPathFilters :: Path Abs Dir -> AllFilters -> PathFilters
toPathFilters root filters =
PathFilters
{ include = map (root </>) (combinedPaths $ includeFilters filters)
, exclude = map (root </>) (combinedPaths $ excludeFilters filters)
}

shouldFingerprintDir :: Path Abs Dir -> PathFilters -> Bool
shouldFingerprintDir dir filters = (not shouldExclude) && shouldInclude
where
shouldExclude = (isPrefixedOrEqual dir) `any` (exclude filters)
shouldInclude = null (include filters) || (isPrefixedOrEqual dir) `any` (include filters)
isPrefixedOrEqual a b = a == b || isProperPrefixOf b a -- swap order of isProperPrefixOf comparison because we want to know if dir is prefixed by any filter

toProject :: Path Abs Dir -> ProjectResult
toProject dir = ProjectResult "binary-deps" dir mempty Complete []

toDependency :: Path Abs Dir -> BinaryFile -> SourceUserDefDep
toDependency root BinaryFile{..} =
SourceUserDefDep
{ srcUserDepName = renderRelative root binaryPath
, srcUserDepVersion = renderFingerprint binaryFingerprint
, srcUserDepLicense = ""
, srcUserDepDescription = Just "Binary discovered in source tree"
, srcUserDepHomepage = Nothing
}

toSourceUnit :: ProjectResult -> [BinaryFile] -> SourceUnit
toSourceUnit project binaries = do
let unit = Srclib.toSourceUnit project
let deps = map (toDependency $ projectResultPath project) binaries
unit{additionalData = Just $ AdditionalDepData (Just deps) Nothing}

-- | Just render the first few characters of the fingerprint.
-- The goal is to provide a high confidence that future binaries with the same name won't collide,
-- and we don't need all 256 bits for that.
renderFingerprint :: Fingerprint -> Text
renderFingerprint fingerprint = Text.take 12 $ unFingerprint fingerprint

renderRelative :: Path Abs Dir -> Path Abs File -> Text
renderRelative absDir absFile =
case stripProperPrefix absDir absFile of
Left _ -> toText . toFilePath $ absFile
Right relFile -> toText . toFilePath $ relFile

-- | Determine if a file is binary using the same method as git:
-- "is there a zero byte in the first 8000 bytes of the file"
fileIsBinary :: (Has ReadFS sig m, Has Diagnostics sig m) => Path Abs File -> m Bool
fileIsBinary file = do
attemptedContent <- readContentsBSLimit file 8000
content <- fromEither attemptedContent
pure $ BS.elem 0 content
12 changes: 10 additions & 2 deletions src/App/Fossa/Main.hs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ module App.Fossa.Main (
appMain,
) where

import App.Fossa.Analyze (IATAssertionMode (..), JsonOutput (..), RecordMode (..), ScanDestination (..), UnpackArchives (..), VSIAnalysisMode (..), analyzeMain)
import App.Fossa.Analyze (BinaryDiscoveryMode (..), IATAssertionMode (..), JsonOutput (..), ModeOptions (ModeOptions), RecordMode (..), ScanDestination (..), UnpackArchives (..), VSIAnalysisMode (..), analyzeMain)
import App.Fossa.Compatibility (Argument, argumentParser, compatibilityMain)
import App.Fossa.Configuration (
ConfigFile (
Expand Down Expand Up @@ -191,7 +191,8 @@ appMain = do

let analyzeOverride = override{overrideBranch = analyzeBranch <|> ((fileConfig >>= configRevision) >>= configBranch)}
combinedFilters = normalizedFilters fileConfig analyzeOptions
doAnalyze destination = analyzeMain analyzeBaseDir analyzeRecordMode logSeverity destination analyzeOverride analyzeUnpackArchives analyzeJsonOutput analyzeVSIMode assertionMode combinedFilters
modeOptions = ModeOptions analyzeVSIMode assertionMode analyzeBinaryDiscoveryMode
doAnalyze destination = analyzeMain analyzeBaseDir analyzeRecordMode logSeverity destination analyzeOverride analyzeUnpackArchives analyzeJsonOutput modeOptions combinedFilters

if analyzeOutput
then doAnalyze OutputStdout
Expand Down Expand Up @@ -414,6 +415,7 @@ analyzeOpts =
<*> many (option (eitherReader pathOpt) (long "only-path" <> help "Only scan these paths. See paths.only in the fossa.yml spec." <> metavar "PATH"))
<*> many (option (eitherReader pathOpt) (long "exclude-path" <> help "Exclude these paths from scanning. See paths.exclude in the fossa.yml spec." <> metavar "PATH"))
<*> vsiAnalyzeOpt
<*> binaryDiscoveryOpt
<*> iatAssertionOpt
<*> monorepoOpts
<*> analyzeReplayOpt
Expand All @@ -424,6 +426,11 @@ vsiAnalyzeOpt =
flag' VSIAnalysisEnabled (long "enable-vsi" <> hidden)
<|> pure VSIAnalysisDisabled

binaryDiscoveryOpt :: Parser BinaryDiscoveryMode
binaryDiscoveryOpt =
flag' BinaryDiscoveryEnabled (long "experimental-enable-binary-discovery" <> hidden)
<|> pure BinaryDiscoveryDisabled

iatAssertionOpt :: Parser AnalyzeVSIAssertionMode
iatAssertionOpt =
(AnalyzeVSIAssertionEnabled <$> strOption (long "experimental-link-project-binary" <> hidden))
Expand Down Expand Up @@ -703,6 +710,7 @@ data AnalyzeOptions = AnalyzeOptions
, analyzeOnlyPaths :: [Path Rel Dir]
, analyzeExcludePaths :: [Path Rel Dir]
, analyzeVSIMode :: VSIAnalysisMode
, analyzeBinaryDiscoveryMode :: BinaryDiscoveryMode
, analyzeAssertMode :: AnalyzeVSIAssertionMode
, monorepoAnalysisOpts :: MonorepoAnalysisOpts
, analyzeRecordMode :: RecordMode
Expand Down
1 change: 1 addition & 0 deletions src/App/Fossa/VSI/IAT/Fingerprint.hs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
module App.Fossa.VSI.IAT.Fingerprint (
fingerprintRaw,
fingerprintContentsRaw,
) where

Expand Down
7 changes: 1 addition & 6 deletions src/App/Fossa/VSI/IAT/Types.hs
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,7 @@ module App.Fossa.VSI.IAT.Types (
) where

import App.Fossa.VSI.Types qualified as VSI
import Data.Aeson (
FromJSON (parseJSON),
ToJSON (toJSON),
withObject,
(.:),
)
import Data.Aeson (FromJSON (parseJSON), ToJSON (toJSON), withObject, (.:))
import Data.Text (Text)

-- | Fingerprint uniquely idenfies a file, derived from its content.
Expand Down
Loading

0 comments on commit ad73f44

Please sign in to comment.