Skip to content

Commit

Permalink
extract-hackage-info: allow to dump as JSON
Browse files Browse the repository at this point in the history
  • Loading branch information
amesgen authored and mrkkrp committed May 22, 2024
1 parent b2e2a6b commit b7179b6
Show file tree
Hide file tree
Showing 4 changed files with 74 additions and 11 deletions.
2 changes: 1 addition & 1 deletion extract-hackage-info.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ cleanup()
mkdir "$HOOGLE_DATABASE"
curl "https://hackage.haskell.org/packages/hoogle.tar.gz" | tar -xz -C "$HOOGLE_DATABASE"

nix run .#extract-hackage-info -- "$HOOGLE_DATABASE" -o "$OUTPUT"
nix run .#extract-hackage-info -- generate "$HOOGLE_DATABASE" -o "$OUTPUT"

cp "$OUTPUT" "extract-hackage-info/hackage-info.bin"

Expand Down
6 changes: 6 additions & 0 deletions extract-hackage-info/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,9 @@ newtype HackageInfo
## How to use `extract-hackage-info`

Run `extract-hackage-info.sh` from the root of the repository.

You can also dump the Hackage info database as JSON via

```console
cabal run extract-hackage-info -- dump extract-hackage-info/hackage-info.bin
```
1 change: 1 addition & 0 deletions extract-hackage-info/extract-hackage-info.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ executable extract-hackage-info
ghc-options: -O2 -Wall -rtsopts -Wunused-packages
build-depends:
Cabal-syntax >=3.12 && <3.13,
aeson >=2.2 && <3,
base >=4.12 && <5,
binary >=0.8 && <0.9,
bytestring >=0.10 && <0.13,
Expand Down
76 changes: 66 additions & 10 deletions extract-hackage-info/src/Main.hs
Original file line number Diff line number Diff line change
@@ -1,14 +1,19 @@
{-# LANGUAGE BlockArguments #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
{-# OPTIONS_GHC -Wno-orphans #-}

module Main (main) where

import Control.Exception
import Control.Monad
import Data.Aeson qualified as A
import Data.Binary qualified as Binary
import Data.Binary.Get qualified as Binary
import Data.Binary.Put qualified as Binary
import Data.ByteString qualified as ByteString
import Data.ByteString.Lazy qualified as BL
Expand All @@ -21,7 +26,9 @@ import Data.Text.Encoding (decodeLatin1)
import Data.Text.Encoding.Error (UnicodeException)
import Data.Text.IO.Utf8 qualified as T.Utf8
import Distribution.ModuleName (ModuleName)
import Distribution.ModuleName qualified as ModuleName
import Distribution.Types.PackageName (PackageName)
import Distribution.Types.PackageName qualified as PackageName
import Formatting
import Hoogle qualified
import Options.Applicative
Expand Down Expand Up @@ -175,18 +182,54 @@ displayFixityStats packages =
declarationsPerModule = concatMap Map.elems modulesPerPackage
declarationCount = sum (Map.size <$> declarationsPerModule)

data Config = Config
{ cfgHoogleDatabasePath :: FilePath,
cfgOutputPath :: FilePath
}
-- ToJSON orphan instances

deriving anyclass instance A.ToJSON FixityInfo

deriving anyclass instance A.ToJSON FixityDirection

instance A.ToJSON OpName where
toJSON = A.toJSON . unOpName

deriving anyclass instance A.ToJSONKey OpName

instance A.ToJSON ModuleName where
toJSON = A.toJSON . ModuleName.toFilePath

deriving anyclass instance A.ToJSONKey ModuleName

instance A.ToJSON PackageName where
toJSON = A.toJSON . PackageName.unPackageName

deriving anyclass instance A.ToJSONKey PackageName

-- CLI config

data Config
= Generate
{ cfgHoogleDatabasePath :: FilePath,
cfgOutputPath :: FilePath
}
| Dump
{ cfgPath :: FilePath
}
deriving (Eq, Show)

configParserInfo :: ParserInfo Config
configParserInfo = info (helper <*> configParser) fullDesc
where
configParser :: Parser Config
configParser =
Config
subparser . mconcat $
[ command "generate" . info (helper <*> generateParser) $
fullDesc <> progDesc "Generate a Hackage info database.",
command "dump" . info (helper <*> dumpParser) $
fullDesc <> progDesc "Dump a generated Hackage info database to JSON."
]

generateParser :: Parser Config
generateParser =
Generate
<$> (strArgument . mconcat)
[ metavar "HOOGLE_DATABASE_PATH",
help
Expand All @@ -201,9 +244,22 @@ configParserInfo = info (helper <*> configParser) fullDesc
value defaultOutputPath
]

dumpParser :: Parser Config
dumpParser =
Dump
<$> (strArgument . mconcat)
[ metavar "HACKAGE_INFO_PATH",
help "A generated Hackage info database"
]

main :: IO ()
main = do
Config {..} <- execParser configParserInfo
hackageInfo' <- extractHoogleInfo cfgHoogleDatabasePath
BL.writeFile cfgOutputPath . Binary.runPut . Binary.put $
HackageInfo hackageInfo'
main =
execParser configParserInfo >>= \case
Generate {..} -> do
hackageInfo' <- extractHoogleInfo cfgHoogleDatabasePath
BL.writeFile cfgOutputPath . Binary.runPut . Binary.put $
HackageInfo hackageInfo'
Dump {..} -> do
HackageInfo hackageInfo' <-
Binary.runGet Binary.get <$> BL.readFile cfgPath
BL.putStr $ A.encode hackageInfo'

0 comments on commit b7179b6

Please sign in to comment.