diff --git a/Changelog.md b/Changelog.md index 5e800bb45..182c6d8ac 100644 --- a/Changelog.md +++ b/Changelog.md @@ -1,5 +1,12 @@ # Spectrometer Changelog +## v2.15.13 + +Adds another closed beta feature around FOSSA C/C++ support. +For now this functionality is considered publicly undocumented, and is only used with support from FOSSA engineering. + +- Adds support for reporting detected binaries as unlicensed dependencies ([#353](https://github.com/fossas/spectrometer/pull/353)) + ## v2.15.12 - Yarn: Analyzes yarn.lock without runtime error, when yarn.lock includes directory dependency. ([#361](https://github.com/fossas/spectrometer/pull/361)) diff --git a/spectrometer.cabal b/spectrometer.cabal index c98f9c372..719b5482b 100644 --- a/spectrometer.cabal +++ b/spectrometer.cabal @@ -139,6 +139,7 @@ library App.Fossa.API.BuildLink App.Fossa.API.BuildWait App.Fossa.ArchiveUploader + App.Fossa.BinaryDeps App.Fossa.Compatibility App.Fossa.Configuration App.Fossa.Container diff --git a/src/App/Fossa/Analyze.hs b/src/App/Fossa/Analyze.hs index 466717fb4..f70f241e6 100644 --- a/src/App/Fossa/Analyze.hs +++ b/src/App/Fossa/Analyze.hs @@ -7,8 +7,10 @@ module App.Fossa.Analyze ( JsonOutput (..), VSIAnalysisMode (..), IATAssertionMode (..), - discoverFuncs, + BinaryDiscoveryMode (..), RecordMode (..), + ModeOptions (..), + discoverFuncs, ) where import App.Docs (userGuideUrl) @@ -16,6 +18,7 @@ import App.Fossa.API.BuildLink (getFossaBuildUrl) import App.Fossa.Analyze.GraphMangler (graphingToGraph) import App.Fossa.Analyze.Project (ProjectResult (..), mkResult) import App.Fossa.Analyze.Record (AnalyzeEffects (..), AnalyzeJournal (..), loadReplayLog, saveReplayLog) +import App.Fossa.BinaryDeps (analyzeBinaryDeps) import App.Fossa.FossaAPIV1 (UploadResponse (..), getProject, projectIsMonorepo, uploadAnalysis, uploadContributors) import App.Fossa.ManualDeps (analyzeFossaDepsFile) import App.Fossa.ProjectInference (inferProjectDefault, inferProjectFromVCS, mergeOverride, saveRevision) @@ -64,7 +67,7 @@ import Path (Abs, Dir, Path, fromAbsDir, toFilePath) import Path.IO (makeRelative) import Path.IO qualified as P import Srclib.Converter qualified as Srclib -import Srclib.Types (Locator (locatorProject, locatorRevision), SourceUnit, parseLocator) +import Srclib.Types (Locator (locatorProject, locatorRevision), SourceUnit (..), parseLocator) import Strategy.Bundler qualified as Bundler import Strategy.Cargo qualified as Cargo import Strategy.Carthage qualified as Carthage @@ -119,6 +122,14 @@ data UnpackArchives = UnpackArchives data JsonOutput = JsonOutput +-- | Collect analysis modes into a single type for ease of use. +-- These modes are intended to be different options that alter how analysis is performed or what analysis steps are followed. +data ModeOptions = ModeOptions + { modeVSIAnalysis :: VSIAnalysisMode + , modeIATAssertion :: IATAssertionMode + , modeBinaryDiscovery :: BinaryDiscoveryMode + } + -- | "VSI analysis" modes data VSIAnalysisMode = -- | enable the VSI analysis strategy @@ -133,6 +144,13 @@ data IATAssertionMode | -- | assertion not enabled IATAssertionDisabled +-- | "Binary Discovery" modes +data BinaryDiscoveryMode + = -- | Binary discovery enabled + BinaryDiscoveryEnabled + | -- | Binary discovery disabled + BinaryDiscoveryDisabled + -- | "Replay logging" modes data RecordMode = -- | record effect invocations @@ -142,8 +160,8 @@ data RecordMode | -- | don't record or replay RecordModeNone -analyzeMain :: FilePath -> RecordMode -> Severity -> ScanDestination -> OverrideProject -> Flag UnpackArchives -> Flag JsonOutput -> VSIAnalysisMode -> IATAssertionMode -> AllFilters -> IO () -analyzeMain workdir recordMode logSeverity destination project unpackArchives jsonOutput enableVSI assertionMode filters = +analyzeMain :: FilePath -> RecordMode -> Severity -> ScanDestination -> OverrideProject -> Flag UnpackArchives -> Flag JsonOutput -> ModeOptions -> AllFilters -> IO () +analyzeMain workdir recordMode logSeverity destination project unpackArchives jsonOutput modeOptions filters = withDefaultLogger logSeverity . Diag.logWithExit_ . runReadFSIO @@ -170,7 +188,7 @@ analyzeMain workdir recordMode logSeverity destination project unpackArchives js . runReplay @Exec (effectsExec effects) $ doAnalyze basedir where - doAnalyze basedir = analyze basedir destination project unpackArchives jsonOutput enableVSI assertionMode filters + doAnalyze basedir = analyze basedir destination project unpackArchives jsonOutput modeOptions filters discoverFuncs :: (TaskEffs sig m, TaskEffs rsig run) => [Path Abs Dir -> m [DiscoveredProject run]] discoverFuncs = @@ -245,19 +263,23 @@ analyze :: OverrideProject -> Flag UnpackArchives -> Flag JsonOutput -> - VSIAnalysisMode -> - IATAssertionMode -> + ModeOptions -> AllFilters -> m () -analyze (BaseDir basedir) destination override unpackArchives jsonOutput enableVSI iatAssertion filters = do +analyze (BaseDir basedir) destination override unpackArchives jsonOutput ModeOptions{..} filters = do capabilities <- sendIO getNumCapabilities let apiOpts = case destination of OutputStdout -> Nothing UploadScan opts _ -> Just opts + -- additional source units are built outside the standard strategy flow, because they either + -- require additional information (eg API credentials), or they return additional information (eg user deps). manualSrcUnits <- analyzeFossaDepsFile basedir apiOpts - vsiResults <- analyzeVSI enableVSI apiOpts basedir filters + vsiResults <- analyzeVSI modeVSIAnalysis apiOpts basedir filters + binarySearchResults <- analyzeDiscoverBinaries modeBinaryDiscovery basedir filters + let additionalSourceUnits :: [SourceUnit] + additionalSourceUnits = catMaybes [manualSrcUnits, vsiResults, binarySearchResults] (projectResults, ()) <- runOutput @ProjectResult @@ -270,14 +292,14 @@ analyze (BaseDir basedir) destination override unpackArchives jsonOutput enableV let filteredProjects = filterProjects (BaseDir basedir) projectResults -- Need to check if vendored is empty as well, even if its a boolean that vendoredDeps exist - case checkForEmptyUpload projectResults filteredProjects [manualSrcUnits, vsiResults] of + case checkForEmptyUpload projectResults filteredProjects additionalSourceUnits of NoneDiscovered -> Diag.fatal ErrNoProjectsDiscovered FilteredAll count -> Diag.fatal (ErrFilteredAllProjects count projectResults) FoundSome sourceUnits -> case destination of - OutputStdout -> logStdout . decodeUtf8 . Aeson.encode $ buildResult manualSrcUnits filteredProjects + OutputStdout -> logStdout . decodeUtf8 . Aeson.encode $ buildResult additionalSourceUnits filteredProjects UploadScan opts metadata -> do locator <- uploadSuccessfulAnalysis (BaseDir basedir) opts metadata jsonOutput override sourceUnits - doAssertRevisionBinaries iatAssertion opts locator + doAssertRevisionBinaries modeIATAssertion opts locator analyzeVSI :: (MonadIO m, Has Diag.Diagnostics sig m, Has Exec sig m, Has (Lift IO) sig m, Has Logger sig m) => VSIAnalysisMode -> Maybe ApiOpts -> Path Abs Dir -> AllFilters -> m (Maybe SourceUnit) analyzeVSI VSIAnalysisEnabled (Just apiOpts) dir filters = do @@ -286,6 +308,12 @@ analyzeVSI VSIAnalysisEnabled (Just apiOpts) dir filters = do pure $ Just results analyzeVSI _ _ _ _ = pure Nothing +analyzeDiscoverBinaries :: (MonadIO m, Has Diag.Diagnostics sig m, Has (Lift IO) sig m, Has Logger sig m, Has ReadFS sig m) => BinaryDiscoveryMode -> Path Abs Dir -> AllFilters -> m (Maybe SourceUnit) +analyzeDiscoverBinaries BinaryDiscoveryEnabled dir filters = do + logInfo "Discovering binary files as dependencies" + analyzeBinaryDeps dir filters +analyzeDiscoverBinaries _ _ _ = pure Nothing + doAssertRevisionBinaries :: (Has Diag.Diagnostics sig m, Has ReadFS sig m, Has (Lift IO) sig m, Has Logger sig m) => IATAssertionMode -> ApiOpts -> Locator -> m () doAssertRevisionBinaries (IATAssertionEnabled dir) apiOpts locator = assertRevisionBinaries dir apiOpts locator doAssertRevisionBinaries _ _ _ = pure () @@ -380,9 +408,8 @@ data CountedResult -- Takes a list of all projects analyzed, and the list after filtering. We assume -- that the smaller list is the latter, and return that list. Starting with user-defined deps, -- we also include a check for an additional source unit from fossa-deps.yml. -checkForEmptyUpload :: [ProjectResult] -> [ProjectResult] -> [Maybe SourceUnit] -> CountedResult -checkForEmptyUpload xs ys potentialAdditionalUnits = do - let additionalUnits = catMaybes potentialAdditionalUnits +checkForEmptyUpload :: [ProjectResult] -> [ProjectResult] -> [SourceUnit] -> CountedResult +checkForEmptyUpload xs ys additionalUnits = do if null additionalUnits then case (xlen, ylen) of -- We didn't discover, so we also didn't filter @@ -463,16 +490,14 @@ buildProjectSummary project projectLocator projectUrl = do , "id" .= projectLocator ] -buildResult :: Maybe SourceUnit -> [ProjectResult] -> Aeson.Value -buildResult maybeSrcUnit projects = +buildResult :: [SourceUnit] -> [ProjectResult] -> Aeson.Value +buildResult srcUnits projects = Aeson.object [ "projects" .= map buildProject projects , "sourceUnits" .= finalSourceUnits ] where - finalSourceUnits = case maybeSrcUnit of - Just unit -> unit : scannedUnits - Nothing -> scannedUnits + finalSourceUnits = srcUnits ++ scannedUnits scannedUnits = map Srclib.toSourceUnit projects buildProject :: ProjectResult -> Aeson.Value diff --git a/src/App/Fossa/BinaryDeps.hs b/src/App/Fossa/BinaryDeps.hs new file mode 100644 index 000000000..3fe0e3758 --- /dev/null +++ b/src/App/Fossa/BinaryDeps.hs @@ -0,0 +1,115 @@ +{-# LANGUAGE RecordWildCards #-} + +module App.Fossa.BinaryDeps (analyzeBinaryDeps) where + +import App.Fossa.Analyze.Project (ProjectResult (..)) +import App.Fossa.VSI.IAT.Fingerprint (fingerprintRaw) +import App.Fossa.VSI.IAT.Types (Fingerprint (..)) +import Control.Algebra (Has) +import Control.Carrier.Diagnostics (Diagnostics, fromEither) +import Control.Effect.Lift (Lift) +import Data.ByteString qualified as BS +import Data.Maybe (catMaybes) +import Data.String.Conversion (toText) +import Data.Text (Text) +import Data.Text qualified as Text +import Discovery.Filters (AllFilters (..), FilterCombination (combinedPaths)) +import Discovery.Walk (WalkStep (WalkContinue), walk') +import Effect.ReadFS (ReadFS, readContentsBSLimit) +import Path (Abs, Dir, File, Path, isProperPrefixOf, stripProperPrefix, toFilePath, ()) +import Srclib.Converter qualified as Srclib +import Srclib.Types (AdditionalDepData (..), SourceUnit (..), SourceUserDefDep (..)) +import Types (GraphBreadth (Complete)) + +data BinaryFile = BinaryFile + { binaryPath :: Path Abs File + , binaryFingerprint :: Fingerprint + } + +-- | Binary detection is sufficiently different from other analysis types that it cannot be just another strategy. +-- Instead, binary detection is run separately over the entire scan directory, outputting its own source unit. +-- The goal of this feature is to enable a FOSSA user to flag all vendored binaries (as defined by git) in the project as dependencies. +-- Users may then use standard FOSSA UX flows to ignore or add license information to the detected binaries. +analyzeBinaryDeps :: (Has (Lift IO) sig m, Has Diagnostics sig m, Has ReadFS sig m) => Path Abs Dir -> AllFilters -> m (Maybe SourceUnit) +analyzeBinaryDeps dir filters = do + binaries <- fingerprintBinaries (toPathFilters dir filters) dir + if null binaries + then pure Nothing + else pure . Just $ toSourceUnit (toProject dir) binaries + +fingerprintBinaries :: (Has (Lift IO) sig m, Has Diagnostics sig m, Has ReadFS sig m) => PathFilters -> Path Abs Dir -> m [BinaryFile] +fingerprintBinaries filters = walk' $ \dir _ files -> do + if shouldFingerprintDir dir filters + then do + someBinaries <- traverse fingerprintIfBinary files + pure (catMaybes someBinaries, WalkContinue) + else pure ([], WalkContinue) + +fingerprintIfBinary :: (Has (Lift IO) sig m, Has Diagnostics sig m, Has ReadFS sig m) => Path Abs File -> m (Maybe BinaryFile) +fingerprintIfBinary file = do + isBinary <- fileIsBinary file + if isBinary + then do + fp <- fingerprintRaw file + pure . Just $ BinaryFile file fp + else pure Nothing + +-- | PathFilters is a specialized filter mechanism that operates only on absolute directory paths. +data PathFilters = PathFilters + { include :: [Path Abs Dir] + , exclude :: [Path Abs Dir] + } + deriving (Show) + +toPathFilters :: Path Abs Dir -> AllFilters -> PathFilters +toPathFilters root filters = + PathFilters + { include = map (root ) (combinedPaths $ includeFilters filters) + , exclude = map (root ) (combinedPaths $ excludeFilters filters) + } + +shouldFingerprintDir :: Path Abs Dir -> PathFilters -> Bool +shouldFingerprintDir dir filters = (not shouldExclude) && shouldInclude + where + shouldExclude = (isPrefixedOrEqual dir) `any` (exclude filters) + shouldInclude = null (include filters) || (isPrefixedOrEqual dir) `any` (include filters) + isPrefixedOrEqual a b = a == b || isProperPrefixOf b a -- swap order of isProperPrefixOf comparison because we want to know if dir is prefixed by any filter + +toProject :: Path Abs Dir -> ProjectResult +toProject dir = ProjectResult "binary-deps" dir mempty Complete [] + +toDependency :: Path Abs Dir -> BinaryFile -> SourceUserDefDep +toDependency root BinaryFile{..} = + SourceUserDefDep + { srcUserDepName = renderRelative root binaryPath + , srcUserDepVersion = renderFingerprint binaryFingerprint + , srcUserDepLicense = "" + , srcUserDepDescription = Just "Binary discovered in source tree" + , srcUserDepHomepage = Nothing + } + +toSourceUnit :: ProjectResult -> [BinaryFile] -> SourceUnit +toSourceUnit project binaries = do + let unit = Srclib.toSourceUnit project + let deps = map (toDependency $ projectResultPath project) binaries + unit{additionalData = Just $ AdditionalDepData (Just deps) Nothing} + +-- | Just render the first few characters of the fingerprint. +-- The goal is to provide a high confidence that future binaries with the same name won't collide, +-- and we don't need all 256 bits for that. +renderFingerprint :: Fingerprint -> Text +renderFingerprint fingerprint = Text.take 12 $ unFingerprint fingerprint + +renderRelative :: Path Abs Dir -> Path Abs File -> Text +renderRelative absDir absFile = + case stripProperPrefix absDir absFile of + Left _ -> toText . toFilePath $ absFile + Right relFile -> toText . toFilePath $ relFile + +-- | Determine if a file is binary using the same method as git: +-- "is there a zero byte in the first 8000 bytes of the file" +fileIsBinary :: (Has ReadFS sig m, Has Diagnostics sig m) => Path Abs File -> m Bool +fileIsBinary file = do + attemptedContent <- readContentsBSLimit file 8000 + content <- fromEither attemptedContent + pure $ BS.elem 0 content diff --git a/src/App/Fossa/Main.hs b/src/App/Fossa/Main.hs index 9cd007851..8703a1eec 100644 --- a/src/App/Fossa/Main.hs +++ b/src/App/Fossa/Main.hs @@ -4,7 +4,7 @@ module App.Fossa.Main ( appMain, ) where -import App.Fossa.Analyze (IATAssertionMode (..), JsonOutput (..), RecordMode (..), ScanDestination (..), UnpackArchives (..), VSIAnalysisMode (..), analyzeMain) +import App.Fossa.Analyze (BinaryDiscoveryMode (..), IATAssertionMode (..), JsonOutput (..), ModeOptions (ModeOptions), RecordMode (..), ScanDestination (..), UnpackArchives (..), VSIAnalysisMode (..), analyzeMain) import App.Fossa.Compatibility (Argument, argumentParser, compatibilityMain) import App.Fossa.Configuration ( ConfigFile ( @@ -191,7 +191,8 @@ appMain = do let analyzeOverride = override{overrideBranch = analyzeBranch <|> ((fileConfig >>= configRevision) >>= configBranch)} combinedFilters = normalizedFilters fileConfig analyzeOptions - doAnalyze destination = analyzeMain analyzeBaseDir analyzeRecordMode logSeverity destination analyzeOverride analyzeUnpackArchives analyzeJsonOutput analyzeVSIMode assertionMode combinedFilters + modeOptions = ModeOptions analyzeVSIMode assertionMode analyzeBinaryDiscoveryMode + doAnalyze destination = analyzeMain analyzeBaseDir analyzeRecordMode logSeverity destination analyzeOverride analyzeUnpackArchives analyzeJsonOutput modeOptions combinedFilters if analyzeOutput then doAnalyze OutputStdout @@ -414,6 +415,7 @@ analyzeOpts = <*> many (option (eitherReader pathOpt) (long "only-path" <> help "Only scan these paths. See paths.only in the fossa.yml spec." <> metavar "PATH")) <*> many (option (eitherReader pathOpt) (long "exclude-path" <> help "Exclude these paths from scanning. See paths.exclude in the fossa.yml spec." <> metavar "PATH")) <*> vsiAnalyzeOpt + <*> binaryDiscoveryOpt <*> iatAssertionOpt <*> monorepoOpts <*> analyzeReplayOpt @@ -424,6 +426,11 @@ vsiAnalyzeOpt = flag' VSIAnalysisEnabled (long "enable-vsi" <> hidden) <|> pure VSIAnalysisDisabled +binaryDiscoveryOpt :: Parser BinaryDiscoveryMode +binaryDiscoveryOpt = + flag' BinaryDiscoveryEnabled (long "experimental-enable-binary-discovery" <> hidden) + <|> pure BinaryDiscoveryDisabled + iatAssertionOpt :: Parser AnalyzeVSIAssertionMode iatAssertionOpt = (AnalyzeVSIAssertionEnabled <$> strOption (long "experimental-link-project-binary" <> hidden)) @@ -703,6 +710,7 @@ data AnalyzeOptions = AnalyzeOptions , analyzeOnlyPaths :: [Path Rel Dir] , analyzeExcludePaths :: [Path Rel Dir] , analyzeVSIMode :: VSIAnalysisMode + , analyzeBinaryDiscoveryMode :: BinaryDiscoveryMode , analyzeAssertMode :: AnalyzeVSIAssertionMode , monorepoAnalysisOpts :: MonorepoAnalysisOpts , analyzeRecordMode :: RecordMode diff --git a/src/App/Fossa/VSI/IAT/Fingerprint.hs b/src/App/Fossa/VSI/IAT/Fingerprint.hs index 84051c78c..ab1580cc8 100644 --- a/src/App/Fossa/VSI/IAT/Fingerprint.hs +++ b/src/App/Fossa/VSI/IAT/Fingerprint.hs @@ -1,4 +1,5 @@ module App.Fossa.VSI.IAT.Fingerprint ( + fingerprintRaw, fingerprintContentsRaw, ) where diff --git a/src/App/Fossa/VSI/IAT/Types.hs b/src/App/Fossa/VSI/IAT/Types.hs index b3409eaa8..c009eb59f 100644 --- a/src/App/Fossa/VSI/IAT/Types.hs +++ b/src/App/Fossa/VSI/IAT/Types.hs @@ -9,12 +9,7 @@ module App.Fossa.VSI.IAT.Types ( ) where import App.Fossa.VSI.Types qualified as VSI -import Data.Aeson ( - FromJSON (parseJSON), - ToJSON (toJSON), - withObject, - (.:), - ) +import Data.Aeson (FromJSON (parseJSON), ToJSON (toJSON), withObject, (.:)) import Data.Text (Text) -- | Fingerprint uniquely idenfies a file, derived from its content. diff --git a/src/Effect/ReadFS.hs b/src/Effect/ReadFS.hs index 882566542..f5636958a 100644 --- a/src/Effect/ReadFS.hs +++ b/src/Effect/ReadFS.hs @@ -11,6 +11,7 @@ module Effect.ReadFS ( -- * Reading raw file contents readContentsBS, + readContentsBSLimit, readContentsText, -- * Resolving relative filepaths @@ -59,12 +60,14 @@ import GHC.Generics (Generic) import Parse.XML (FromXML, parseXML, xmlErrorPretty) import Path import Path.IO qualified as PIO +import System.IO (IOMode (ReadMode), withFile) import Text.Megaparsec (Parsec, runParser) import Text.Megaparsec.Error (errorBundlePretty) import Toml qualified data ReadFS (m :: Type -> Type) k where ReadContentsBS' :: Path x File -> ReadFS m (Either ReadFSErr ByteString) + ReadContentsBSLimit' :: Path x File -> Int -> ReadFS m (Either ReadFSErr ByteString) ReadContentsText' :: Path x File -> ReadFS m (Either ReadFSErr Text) DoesFileExist :: Path x File -> ReadFS m Bool DoesDirExist :: Path x Dir -> ReadFS m Bool @@ -110,6 +113,10 @@ instance ToDiagnostic ReadFSErr where readContentsBS' :: Has ReadFS sig m => Path b File -> m (Either ReadFSErr ByteString) readContentsBS' path = send (ReadContentsBS' path) +-- | Read at most n bytes of file content into a strict 'ByteString' +readContentsBSLimit :: Has ReadFS sig m => Path b File -> Int -> m (Either ReadFSErr ByteString) +readContentsBSLimit path limit = send (ReadContentsBSLimit' path limit) + -- | Read file contents into a strict 'ByteString' readContentsBS :: (Has ReadFS sig m, Has Diagnostics sig m) => Path b File -> m ByteString readContentsBS = fromEither <=< readContentsBS' @@ -200,6 +207,9 @@ runReadFSIO = interpret $ \case ReadContentsBS' file -> do BS.readFile (toFilePath file) `catchingIO` FileReadError (toFilePath file) + ReadContentsBSLimit' file limit -> do + readContentsBSLimit' file limit + `catchingIO` FileReadError (toFilePath file) ReadContentsText' file -> do (decodeUtf8 <$> BS.readFile (toFilePath file)) `catchingIO` FileReadError (toFilePath file) @@ -216,5 +226,8 @@ runReadFSIO = interpret $ \case DoesFileExist file -> sendIO (PIO.doesFileExist file) DoesDirExist dir -> sendIO (PIO.doesDirExist dir) +readContentsBSLimit' :: Path x File -> Int -> IO ByteString +readContentsBSLimit' file limit = withFile (toFilePath file) ReadMode $ \handle -> BS.hGetSome handle limit + catchingIO :: Has (Lift IO) sig m => IO a -> (Text -> ReadFSErr) -> m (Either ReadFSErr a) catchingIO io mangle = sendIO $ E.catch (Right <$> io) (\(e :: E.IOException) -> pure (Left (mangle (toText (show e)))))