Skip to content
This repository has been archived by the owner on Apr 1, 2022. It is now read-only.

Commit

Permalink
Binary deps: unpack jar (#372)
Browse files Browse the repository at this point in the history
  • Loading branch information
jssblck authored Sep 20, 2021
1 parent 1c9308f commit d23abf0
Show file tree
Hide file tree
Showing 12 changed files with 280 additions and 59 deletions.
4 changes: 4 additions & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Spectrometer Changelog

## v2.15.21

- When using `--experimental-enable-binary-discovery`, prepopulates information discovered in JAR manfiests. ([#372](https://github.com/fossas/spectrometer/pull/372))

## v2.15.20

- Yarn: Fixes potential runtime errors, when yarn.lock contains deep dependency without specification at root level in yarn.lock. ([#369](https://github.com/fossas/spectrometer/pull/369))
Expand Down
3 changes: 3 additions & 0 deletions spectrometer.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ library
App.Fossa.API.BuildWait
App.Fossa.ArchiveUploader
App.Fossa.BinaryDeps
App.Fossa.BinaryDeps.Jar
App.Fossa.Compatibility
App.Fossa.Configuration
App.Fossa.Container
Expand Down Expand Up @@ -331,6 +332,7 @@ test-suite unit-tests
Android.UtilSpec
App.DocsSpec
App.Fossa.API.BuildLinkSpec
App.Fossa.BinaryDeps.JarSpec
App.Fossa.Configuration.ConfigurationSpec
App.Fossa.ManualDepsSpec
App.Fossa.Report.AttributionSpec
Expand All @@ -347,6 +349,7 @@ test-suite unit-tests
Dart.PubDepsSpec
Dart.PubSpecSpec
Dart.PubSpecLockSpec
Discovery.ArchiveSpec
Discovery.FiltersSpec
Effect.ExecSpec
Elixir.MixTreeSpec
Expand Down
85 changes: 39 additions & 46 deletions src/App/Fossa/BinaryDeps.hs
Original file line number Diff line number Diff line change
@@ -1,59 +1,47 @@
{-# LANGUAGE RecordWildCards #-}

module App.Fossa.BinaryDeps (analyzeBinaryDeps) where

import App.Fossa.Analyze.Project (ProjectResult (..))
import App.Fossa.BinaryDeps.Jar (resolveJar)
import App.Fossa.VSI.IAT.Fingerprint (fingerprintRaw)
import App.Fossa.VSI.IAT.Types (Fingerprint (..))
import Control.Algebra (Has)
import Control.Carrier.Diagnostics (Diagnostics, fromEither)
import Control.Effect.Lift (Lift)
import Control.Monad (filterM)
import Data.ByteString qualified as BS
import Data.Maybe (catMaybes)
import Data.String.Conversion (toText)
import Data.Text (Text)
import Data.Text qualified as Text
import Discovery.Filters (AllFilters (..), FilterCombination (combinedPaths))
import Discovery.Walk (WalkStep (WalkContinue), walk')
import Effect.Logger (Logger)
import Effect.ReadFS (ReadFS, readContentsBSLimit)
import Path (Abs, Dir, File, Path, isProperPrefixOf, stripProperPrefix, toFilePath, (</>))
import Path (Abs, Dir, File, Path, isProperPrefixOf, (</>))
import Path.Extra (renderRelative)
import Srclib.Converter qualified as Srclib
import Srclib.Types (AdditionalDepData (..), SourceUnit (..), SourceUserDefDep (..))
import Types (GraphBreadth (Complete))

data BinaryFile = BinaryFile
{ binaryPath :: Path Abs File
, binaryFingerprint :: Fingerprint
}

-- | Binary detection is sufficiently different from other analysis types that it cannot be just another strategy.
-- Instead, binary detection is run separately over the entire scan directory, outputting its own source unit.
-- The goal of this feature is to enable a FOSSA user to flag all vendored binaries (as defined by git) in the project as dependencies.
-- Users may then use standard FOSSA UX flows to ignore or add license information to the detected binaries.
analyzeBinaryDeps :: (Has (Lift IO) sig m, Has Diagnostics sig m, Has ReadFS sig m) => Path Abs Dir -> AllFilters -> m (Maybe SourceUnit)
analyzeBinaryDeps :: (Has (Lift IO) sig m, Has Diagnostics sig m, Has Logger sig m, Has ReadFS sig m) => Path Abs Dir -> AllFilters -> m (Maybe SourceUnit)
analyzeBinaryDeps dir filters = do
binaries <- fingerprintBinaries (toPathFilters dir filters) dir
if null binaries
binaryPaths <- findBinaries (toPathFilters dir filters) dir
if null binaryPaths
then pure Nothing
else pure . Just $ toSourceUnit (toProject dir) binaries
else do
resolvedBinaries <- traverse (resolveBinary strategies dir) binaryPaths
pure . Just $ toSourceUnit (toProject dir) resolvedBinaries

fingerprintBinaries :: (Has (Lift IO) sig m, Has Diagnostics sig m, Has ReadFS sig m) => PathFilters -> Path Abs Dir -> m [BinaryFile]
fingerprintBinaries filters = walk' $ \dir _ files -> do
findBinaries :: (Has (Lift IO) sig m, Has Diagnostics sig m, Has ReadFS sig m) => PathFilters -> Path Abs Dir -> m [Path Abs File]
findBinaries filters = walk' $ \dir _ files -> do
if shouldFingerprintDir dir filters
then do
someBinaries <- traverse fingerprintIfBinary files
pure (catMaybes someBinaries, WalkContinue)
binaries <- filterM fileIsBinary files
pure (binaries, WalkContinue)
else pure ([], WalkContinue)

fingerprintIfBinary :: (Has (Lift IO) sig m, Has Diagnostics sig m, Has ReadFS sig m) => Path Abs File -> m (Maybe BinaryFile)
fingerprintIfBinary file = do
isBinary <- fileIsBinary file
if isBinary
then do
fp <- fingerprintRaw file
pure . Just $ BinaryFile file fp
else pure Nothing

-- | PathFilters is a specialized filter mechanism that operates only on absolute directory paths.
data PathFilters = PathFilters
{ include :: [Path Abs Dir]
Expand All @@ -78,20 +66,9 @@ shouldFingerprintDir dir filters = (not shouldExclude) && shouldInclude
toProject :: Path Abs Dir -> ProjectResult
toProject dir = ProjectResult "binary-deps" dir mempty Complete []

toDependency :: Path Abs Dir -> BinaryFile -> SourceUserDefDep
toDependency root BinaryFile{..} =
SourceUserDefDep
{ srcUserDepName = renderRelative root binaryPath
, srcUserDepVersion = renderFingerprint binaryFingerprint
, srcUserDepLicense = ""
, srcUserDepDescription = Just "Binary discovered in source tree"
, srcUserDepHomepage = Nothing
}

toSourceUnit :: ProjectResult -> [BinaryFile] -> SourceUnit
toSourceUnit project binaries = do
toSourceUnit :: ProjectResult -> [SourceUserDefDep] -> SourceUnit
toSourceUnit project deps = do
let unit = Srclib.toSourceUnit project
let deps = map (toDependency $ projectResultPath project) binaries
unit{additionalData = Just $ AdditionalDepData (Just deps) Nothing}

-- | Just render the first few characters of the fingerprint.
Expand All @@ -100,16 +77,32 @@ toSourceUnit project binaries = do
renderFingerprint :: Fingerprint -> Text
renderFingerprint fingerprint = Text.take 12 $ unFingerprint fingerprint

renderRelative :: Path Abs Dir -> Path Abs File -> Text
renderRelative absDir absFile =
case stripProperPrefix absDir absFile of
Left _ -> toText . toFilePath $ absFile
Right relFile -> toText . toFilePath $ relFile

-- | Determine if a file is binary using the same method as git:
-- "is there a zero byte in the first 8000 bytes of the file"
fileIsBinary :: (Has ReadFS sig m, Has Diagnostics sig m) => Path Abs File -> m Bool
fileIsBinary file = do
attemptedContent <- readContentsBSLimit file 8000
content <- fromEither attemptedContent
pure $ BS.elem 0 content

-- | Try the next strategy in the list. If successful, evaluate to its result; if not move down the list of strategies and try again.
-- Eventually falls back to strategyRawFingerprint if no other strategy succeeds.
resolveBinary :: (Has (Lift IO) sig m, Has ReadFS sig m, Has Diagnostics sig m) => [(Path Abs Dir -> Path Abs File -> m (Maybe SourceUserDefDep))] -> Path Abs Dir -> Path Abs File -> m SourceUserDefDep
resolveBinary (resolve : remainingStrategies) = \root file -> do
result <- resolve root file
case result of
Just r -> pure r
Nothing -> resolveBinary remainingStrategies root file
resolveBinary [] = strategyRawFingerprint

-- | Functions which may be able to resolve a binary to a dependency.
strategies :: (Has (Lift IO) sig m, Has Diagnostics sig m, Has Logger sig m, Has ReadFS sig m) => [(Path Abs Dir -> Path Abs File -> m (Maybe SourceUserDefDep))]
strategies =
[resolveJar]

-- | Fallback strategy: resolve to a user defined dependency for the binary, where the name is the relative path and the version is the fingerprint.
-- This strategy is used if no other strategy succeeds at resolving the binary.
strategyRawFingerprint :: (Has (Lift IO) sig m, Has Diagnostics sig m) => Path Abs Dir -> Path Abs File -> m SourceUserDefDep
strategyRawFingerprint root file = do
fp <- fingerprintRaw file
pure $ SourceUserDefDep (renderRelative root file) (renderFingerprint fp) "" (Just "Binary discovered in source tree") Nothing
104 changes: 104 additions & 0 deletions src/App/Fossa/BinaryDeps/Jar.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE TemplateHaskell #-}

module App.Fossa.BinaryDeps.Jar (resolveJar) where

import Control.Algebra (Has)
import Control.Carrier.Diagnostics (Diagnostics, context, fromMaybeText, recover, (<||>))
import Control.Carrier.Finally (runFinally)
import Control.Effect.Lift (Lift)
import Data.List (isSuffixOf, sortOn)
import Data.Map (Map)
import Data.Map qualified as Map
import Data.Maybe (listToMaybe, mapMaybe)
import Data.String.Conversion (ToString (toString), ToText (toText))
import Data.Text (Text)
import Data.Text qualified as Text
import Discovery.Archive (extractZip, withArchive)
import Discovery.Walk (WalkStep (WalkContinue, WalkSkipAll), findFileNamed, walk')
import Effect.Logger (Logger, logDebug, pretty)
import Effect.ReadFS (ReadFS, readContentsText, readContentsXML)
import GHC.Base ((<|>))
import Path (Abs, Dir, File, Path, filename, mkRelDir, mkRelFile, (</>))
import Path.Extra (renderRelative)
import Srclib.Types (SourceUserDefDep (..))
import Strategy.Maven.Pom.PomFile (MavenCoordinate (..), Pom (..), RawPom, pomLicenseName, validatePom)

data JarMetadata = JarMetadata
{ jarName :: Text
, jarVersion :: Text
, jarLicense :: Text
}

-- | Implement JAR resolution using a similar method to Ant analysis in CLIv1.
-- The overall idea is to:
-- 1. Extract the JAR to a temporary directory (it's a zip!)
-- 2. Search inside for a file named `pom.xml`; if there are multiple pick the one with the shortest path.
-- If a representative pom.xml was found, parse it and return metadata derived from it.
-- 3. Attempt to open `META-INF/MANIFEST.MF`, parse it, and return metadata derived from it.
resolveJar :: (Has (Lift IO) sig m, Has Diagnostics sig m, Has Logger sig m, Has ReadFS sig m) => Path Abs Dir -> Path Abs File -> m (Maybe SourceUserDefDep)
resolveJar _ file | not $ fileHasSuffix file [".jar", ".aar"] = pure Nothing
resolveJar root file = do
let fileDescription = toText file
logDebug $ "Inferring metadata from " <> pretty fileDescription
result <- recover . context ("Infer metadata from " <> fileDescription) . runFinally $ withArchive extractZip file $ \dir -> tacticPom dir <||> tacticMetaInf dir
pure $ fmap (toUserDefDep root file) result

tacticMetaInf :: (Has (Lift IO) sig m, Has Diagnostics sig m, Has Logger sig m, Has ReadFS sig m) => Path Abs Dir -> m JarMetadata
tacticMetaInf archive = context ("Parse " <> toText metaInfPath) $ do
content <- readContentsText metaInfPath
logDebug $ "Parsing META-INF manifest: " <> pretty (renderRelative archive metaInfPath)
metaInfManifestToMeta $ parseMetaInfManifest content
where
metaInfPath = archive </> $(mkRelDir "META-INF") </> $(mkRelFile "MANIFEST.MF")

parseMetaInfManifest :: Text -> Map Text Text
parseMetaInfManifest t = Map.fromList . map strip' . filter' $ map (Text.breakOn ":") (Text.lines t)
where
null' (a, b) = any Text.null [a, b]
strip' (a, b) = (Text.strip a, Text.strip $ Text.tail b)
filter' = filter (not . null')

metaInfManifestToMeta :: Has Diagnostics sig m => Map Text Text -> m JarMetadata
metaInfManifestToMeta manifest =
JarMetadata
<$> fromMaybeText "Missing bundle name" (Map.lookup "Bundle-SymbolicName" manifest <|> Map.lookup "Implementation-Title" manifest)
<*> fromMaybeText "Missing implementation version" (Map.lookup "Implementation-Version" manifest)
<*> pure "" -- Don't attempt to use Bundle-License; it's a URL and we don't parse it on the backend

tacticPom :: (Has (Lift IO) sig m, Has Diagnostics sig m, Has Logger sig m, Has ReadFS sig m) => Path Abs Dir -> m JarMetadata
tacticPom archive = context ("Parse representative pom.xml in " <> toText archive) $ do
poms <- context "Find pom.xml files" $ walk' (collectFilesNamed "pom.xml") (archive </> $(mkRelDir "META-INF"))
if length poms > 1
then logDebug $ "Found multiple pom.xml files: " <> pretty (Text.intercalate "; " $ map (renderRelative archive) poms)
else pure ()
pom <- fromMaybeText "No pom.xml files found" $ choosePom poms
logDebug $ "Chose representative pom.xml: " <> pretty (renderRelative archive pom)
parsePom pom

choosePom :: [Path Abs File] -> Maybe (Path Abs File)
choosePom = listToMaybe . sortOn (length . toString)

parsePom :: (Has (Lift IO) sig m, Has Diagnostics sig m, Has ReadFS sig m) => Path Abs File -> m JarMetadata
parsePom file = context ("Parse pom file: " <> toText file) $ do
(result :: RawPom) <- readContentsXML file
validated <- fromMaybeText "Invalid format" $ validatePom result
pure $ pomToMeta validated

pomToMeta :: Pom -> JarMetadata
pomToMeta Pom{..} = do
let name = (coordGroup pomCoord) <> ":" <> (coordArtifact pomCoord)
let license = Text.intercalate "\n" $ mapMaybe pomLicenseName pomLicenses
JarMetadata name (coordVersion pomCoord) license

collectFilesNamed :: Applicative f => String -> Path Abs Dir -> [Path Abs Dir] -> [Path Abs File] -> f ([Path Abs File], WalkStep)
collectFilesNamed name _ _ files = case findFileNamed name files of
Just f -> pure ([f], WalkSkipAll)
Nothing -> pure ([], WalkContinue)

fileHasSuffix :: Path a File -> [String] -> Bool
fileHasSuffix file = any (\suffix -> suffix `isSuffixOf` toString (filename file))

toUserDefDep :: Path Abs Dir -> Path Abs File -> JarMetadata -> SourceUserDefDep
toUserDefDep root file JarMetadata{..} =
SourceUserDefDep (renderRelative root file) jarVersion jarLicense (Just jarName) Nothing
29 changes: 17 additions & 12 deletions src/Discovery/Archive.hs
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
module Discovery.Archive (
discover,
withArchive,
extractRpm,
extractTar,
extractTarGz,
extractZip,
) where

import Codec.Archive.Tar qualified as Tar
Expand All @@ -23,16 +28,16 @@ import Prelude hiding (zip)
discover :: (Has (Lift IO) sig m, Has ReadFS sig m, Has Diagnostics sig m, Has Finally sig m, Has TaskPool sig m) => (Path Abs Dir -> m ()) -> Path Abs Dir -> m ()
discover go = walk $ \_ _ files -> do
let tars = filter (\file -> ".tar" `isSuffixOf` fileName file) files
traverse_ (\file -> forkTask $ withArchive tar file go) tars
traverse_ (\file -> forkTask $ withArchive extractTar file go) tars

let tarGzs = filter (\file -> ".tar.gz" `isSuffixOf` fileName file) files
traverse_ (\file -> forkTask $ withArchive tarGz file go) tarGzs
traverse_ (\file -> forkTask $ withArchive extractTarGz file go) tarGzs

let zips = filter (\file -> ".zip" `isSuffixOf` fileName file) files
traverse_ (\file -> forkTask $ withArchive zip file go) zips
traverse_ (\file -> forkTask $ withArchive extractZip file go) zips

let jars = filter (\file -> ".jar" `isSuffixOf` fileName file) files
traverse_ (\file -> forkTask $ withArchive zip file go) jars
traverse_ (\file -> forkTask $ withArchive extractZip file go) jars

let rpms = filter (\file -> ".rpm" `isSuffixOf` fileName file) files
traverse_ (\file -> forkTask $ withArchive extractRpm file go) rpms
Expand All @@ -49,8 +54,8 @@ withArchive ::
-- | Path to archive
Path Abs File ->
-- | Callback
(Path Abs Dir -> m ()) ->
m ()
(Path Abs Dir -> m c) ->
m c
withArchive extract file go = do
tmpDir <- mkTempDir (fileName file)
extract tmpDir file
Expand All @@ -66,12 +71,12 @@ mkTempDir name = do

---------- Tar files

tar :: Has (Lift IO) sig m => Path Abs Dir -> Path Abs File -> m ()
tar dir tarFile =
extractTar :: Has (Lift IO) sig m => Path Abs Dir -> Path Abs File -> m ()
extractTar dir tarFile =
sendIO $ Tar.unpack (fromAbsDir dir) . removeTarLinks . Tar.read =<< BL.readFile (fromAbsFile tarFile)

tarGz :: Has (Lift IO) sig m => Path Abs Dir -> Path Abs File -> m ()
tarGz dir tarGzFile =
extractTarGz :: Has (Lift IO) sig m => Path Abs Dir -> Path Abs File -> m ()
extractTarGz dir tarGzFile =
sendIO $ Tar.unpack (fromAbsDir dir) . removeTarLinks . Tar.read . GZip.decompress =<< BL.readFile (fromAbsFile tarGzFile)

-- The tar unpacker dies when tar files reference files outside of the archive root
Expand All @@ -86,6 +91,6 @@ removeTarLinks (Tar.Fail e) = Tar.Fail e

---------- Zip files

zip :: Has (Lift IO) sig m => Path Abs Dir -> Path Abs File -> m ()
zip dir zipFile =
extractZip :: Has (Lift IO) sig m => Path Abs Dir -> Path Abs File -> m ()
extractZip dir zipFile =
sendIO $ Zip.withArchive (fromAbsFile zipFile) (Zip.unpackInto (fromAbsDir dir))
12 changes: 11 additions & 1 deletion src/Path/Extra.hs
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
module Path.Extra (
tryMakeRelative,
renderRelative,
) where

import Path
import Data.String.Conversion (toText)
import Data.Text (Text)
import Path (Abs, Dir, File, Path, SomeBase (..), stripProperPrefix, toFilePath)

-- tryMakeRelative returns the path of an absolute file (Path Abs File) relative to an absolute directory (Path Abs Dir).
-- If the file is not within the directory, then the absolute file path will be returned
Expand All @@ -11,3 +14,10 @@ tryMakeRelative absDir absFile =
case stripProperPrefix absDir absFile of
Left _ -> Abs absFile
Right relFile -> Rel relFile

-- | Render the relative path between a Path Abs Dir and a Path Abs File that is supposed to be in that dir.
-- Intended for convenience when displaying the newly relative path; to interact with it use `tryMakeRelative` instead.
renderRelative :: Path Abs Dir -> Path Abs File -> Text
renderRelative absDir absFile = case tryMakeRelative absDir absFile of
Abs p -> toText . toFilePath $ p
Rel p -> toText . toFilePath $ p
Loading

0 comments on commit d23abf0

Please sign in to comment.