Skip to content

Commit

Permalink
Merge pull request #1 from antonkesy/dev
Browse files Browse the repository at this point in the history
Add 95% complete proto3 parser
  • Loading branch information
antonkesy authored Nov 28, 2023
2 parents 917541c + 16d532e commit 9603ddd
Show file tree
Hide file tree
Showing 41 changed files with 2,626 additions and 438 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
.stack-work/
*~
dist-newstyle
138 changes: 135 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,138 @@
# protobuf-parser

protobuf 3 + gRPC parser using parsec
Simplified [Protocol Buffers Version 3](https://protobuf.dev/programming-guides/proto3/) and [gRPC](https://grpc.io/docs/what-is-grpc/introduction/) parser using Haskell and [Parsec](https://hackage.haskell.org/package/parsec)

`stack run`
`stack test`
## Usage

```bash
> stack run -- --help
Usage: protobuf-parser-exe [-f|--file PATH] [-p|--pretty] [STRING...]

Available options:
-f,--file PATH Specify file path to parse
-p,--pretty Enable pretty print
-h,--help Show this help text

```

```bash
stack run -- -p -f ./test/E2E/protofiles/chat.proto
stack run -- -p "message SearchRequest { int32 page_number = 2; double results_per_page = 3; }"
stack run "import \"foo.proto\"; import \"bar.proto\"; package foobar;"

stack test
```

## Structure

```
protobuf-parser
├── app
│   └── Main.hs -> CLI Parsing
├── ...
├── src
│   └── Text
│   └── Protobuf
│   ├── Parser -> Parser Combinators
│   │   ├── ...
│   │   └── *.hs
│   ├── Parser.hs -> Complete Protobuf Parser
│   └── Types.hs -> Protobuf Type representation
├── ...
└── test
├── E2E
│   ├── ...
│   └── protofiles -> Example Protobuf files
│   └── *.proto
├── ...
└── Unit
└── ...
```

## Simplifications

This projects acts as a parser combinator showcase project.
Therefore, not all features are complete or correct:

- Only proto3 syntax is supported
- Not all values are check for correctness
- Base Lexical Elements do not strictly follow the [offical spec](https://protobuf.dev/reference/protobuf/proto3-spec/#lexical_elements)
- Proto 3 Ranges do not allow the keyword "min"
- Empty statements are missing
- Import weak and public are missing

## Grammar

The correct and complete Grammar can be found at the [offical Protocol Buffers Version 3 Language Specification](https://protobuf.dev/reference/protobuf/proto3-spec/)

Following is basic syntax in Extended Backus-Naur Form (EBNF):

```
| alternation
() grouping
[] option (zero or one time)
{} repetition (any number of times)
```

```
syntax = "syntax" "=" ("'" "proto3" "'" | '"' "proto3" '"') ";"
import = "import" [ "weak" | "public" ] strLit ";"
package = "package" fullIdent ";"
constant = fullIdent | ( [ "-" | "+" ] intLit ) | ( [ "-" | "+" ] floatLit ) |
strLit | boolLit | MessageValue
option = "option" optionName "=" constant ";"
optionName = ( ident | "(" ["."] fullIdent ")" )
type = "double" | "float" | "int32" | "int64" | "uint32" | "uint64"
| "sint32" | "sint64" | "fixed32" | "fixed64" | "sfixed32" | "sfixed64"
| "bool" | "string" | "bytes" | messageType | enumType
fieldNumber = intLit;
field = [ "repeated" ] type fieldName "=" fieldNumber [ "[" fieldOptions "]" ] ";"
fieldOptions = fieldOption { "," fieldOption }
fieldOption = optionName "=" constant
oneof = "oneof" oneofName "{" { option | oneofField } "}"
oneofField = type fieldName "=" fieldNumber [ "[" fieldOptions "]" ] ";"
mapField = "map" "<" keyType "," type ">" mapName "=" fieldNumber [ "[" fieldOptions "]" ] ";"
keyType = "int32" | "int64" | "uint32" | "uint64" | "sint32" | "sint64" |
"fixed32" | "fixed64" | "sfixed32" | "sfixed64" | "bool" | "string"
reserved = "reserved" ( ranges | strFieldNames ) ";"
ranges = range { "," range }
range = intLit [ "to" ( intLit | "max" ) ]
strFieldNames = strFieldName { "," strFieldName }
strFieldName = "'" fieldName "'" | '"' fieldName '"'
enum = "enum" enumName enumBody
enumBody = "{" { option | enumField | emptyStatement | reserved } "}"
enumField = ident "=" [ "-" ] intLit [ "[" enumValueOption { "," enumValueOption } "]" ]";"
enumValueOption = optionName "=" constant
message = "message" messageName messageBody
messageBody = "{" { field | enum | message | option | oneof | mapField |
reserved | emptyStatement } "}"
service = "service" serviceName "{" { option | rpc | emptyStatement } "}"
rpc = "rpc" rpcName "(" [ "stream" ] messageType ")" "returns" "(" [ "stream" ]
messageType ")" (( "{" {option | emptyStatement } "}" ) | ";")
proto = syntax { import | package | option | topLevelDef | emptyStatement }
topLevelDef = message | enum | service
```
61 changes: 56 additions & 5 deletions app/Main.hs
Original file line number Diff line number Diff line change
@@ -1,10 +1,61 @@
module Main (main) where

import ProtoParser
import Text.Parsec (parse)
import Options.Applicative
import Prettyprinter
( Pretty (pretty),
defaultLayoutOptions,
layoutPretty,
)
import Prettyprinter.Render.String (renderString)
import Text.Protobuf.Parser
import Text.Protobuf.Types

data Options = Options (Maybe FilePath) Bool [String]

parseOptions :: Parser Options
parseOptions =
Options
<$> optional
( strOption
( long "file"
<> short 'f'
<> metavar "PATH"
<> help "Specify file path to parse"
)
)
<*> switch
( long "pretty"
<> short 'p'
<> help "Enable pretty print"
)
<*> many (argument str (metavar "STRING..."))

main :: IO ()
main = do
case parse enumField "" "reserved 1, 2" of
Left err -> print err
Right res -> print res
opts <- execParser $ info (parseOptions <**> helper) fullDesc
processOptions opts

processOptions :: Options -> IO ()
processOptions (Options Nothing False []) =
putStrLn "Arguments: No file path provided or strings provided"
processOptions (Options (Just path) isPrettier []) = do
result <- parseProtoFile path
case result of
Left err -> putStrLn $ "Parse error: " ++ show err
Right protobuf ->
protoPrint protobuf isPrettier
processOptions (Options _ isPrettier otherArgs) =
case parseProtobuf (unwords otherArgs) of
Left err -> putStrLn $ "Parse error: " ++ show err
Right protobuf ->
protoPrint protobuf isPrettier

protoPrint :: Protobuf -> Bool -> IO ()
protoPrint protobuf isPrettier =
if not isPrettier
then print protobuf
else
putStrLn $
renderString $
layoutPretty defaultLayoutOptions $
pretty protobuf
8 changes: 7 additions & 1 deletion package.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@ description: Please see the README on GitHub at <https://github.com/antonkesy/pr

dependencies:
- base >= 4.7 && < 5
- parsec >=3.1.16 && <4
- parsec >= 3.1.16 && < 4
- prettyprinter >= 1.7.1 && < 2
- optparse-applicative >= 0.17.0

ghc-options:
- -Wall
Expand All @@ -26,6 +28,10 @@ ghc-options:
- -Wmissing-home-modules
- -Wpartial-fields
- -Wredundant-constraints
- -Werror -W -fwarn-unused-imports -fwarn-unused-binds -fwarn-orphans
- -fwarn-unused-matches -fwarn-unused-do-bind -fwarn-wrong-do-bind
- -fwarn-missing-signatures -fno-warn-partial-type-signatures
- -Wredundant-constraints -rtsopts

library:
source-dirs: src
Expand Down
45 changes: 35 additions & 10 deletions protobuf-parser.cabal
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
cabal-version: 2.2

-- This file has been generated from package.yaml by hpack version 0.35.2.
-- This file has been generated from package.yaml by hpack version 0.36.0.
--
-- see: https://github.com/sol/hpack

Expand All @@ -25,20 +25,32 @@ source-repository head

library
exposed-modules:
Protobuf
ProtoParser
ProtoParser.Enum
ProtoParser.Misc
Text.Protobuf.Parser
Text.Protobuf.Parser.Comment
Text.Protobuf.Parser.EndOfLine
Text.Protobuf.Parser.Enum
Text.Protobuf.Parser.Import
Text.Protobuf.Parser.Message
Text.Protobuf.Parser.Option
Text.Protobuf.Parser.Package
Text.Protobuf.Parser.Reserved
Text.Protobuf.Parser.Service
Text.Protobuf.Parser.Space
Text.Protobuf.Parser.Syntax
Text.Protobuf.Parser.Type
Text.Protobuf.Types
other-modules:
Paths_protobuf_parser
autogen-modules:
Paths_protobuf_parser
hs-source-dirs:
src
ghc-options: -Wall -Wcompat -Widentities -Wincomplete-record-updates -Wincomplete-uni-patterns -Wmissing-export-lists -Wmissing-home-modules -Wpartial-fields -Wredundant-constraints
ghc-options: -Wall -Wcompat -Widentities -Wincomplete-record-updates -Wincomplete-uni-patterns -Wmissing-export-lists -Wmissing-home-modules -Wpartial-fields -Wredundant-constraints -Werror -W -fwarn-unused-imports -fwarn-unused-binds -fwarn-orphans -fwarn-unused-matches -fwarn-unused-do-bind -fwarn-wrong-do-bind -fwarn-missing-signatures -fno-warn-partial-type-signatures -Wredundant-constraints -rtsopts
build-depends:
base >=4.7 && <5
, optparse-applicative >=0.17.0
, parsec >=3.1.16 && <4
, prettyprinter >=1.7.1 && <2
default-language: Haskell2010

executable protobuf-parser-exe
Expand All @@ -49,28 +61,41 @@ executable protobuf-parser-exe
Paths_protobuf_parser
hs-source-dirs:
app
ghc-options: -Wall -Wcompat -Widentities -Wincomplete-record-updates -Wincomplete-uni-patterns -Wmissing-export-lists -Wmissing-home-modules -Wpartial-fields -Wredundant-constraints -threaded -rtsopts -with-rtsopts=-N
ghc-options: -Wall -Wcompat -Widentities -Wincomplete-record-updates -Wincomplete-uni-patterns -Wmissing-export-lists -Wmissing-home-modules -Wpartial-fields -Wredundant-constraints -Werror -W -fwarn-unused-imports -fwarn-unused-binds -fwarn-orphans -fwarn-unused-matches -fwarn-unused-do-bind -fwarn-wrong-do-bind -fwarn-missing-signatures -fno-warn-partial-type-signatures -Wredundant-constraints -rtsopts -threaded -rtsopts -with-rtsopts=-N
build-depends:
base >=4.7 && <5
, optparse-applicative >=0.17.0
, parsec >=3.1.16 && <4
, prettyprinter >=1.7.1 && <2
, protobuf-parser
default-language: Haskell2010

test-suite protobuf-parser-test
type: exitcode-stdio-1.0
main-is: Spec.hs
other-modules:
Unit.Enum
Unit.Misc
E2E.Files
Unit.Text.Protobuf.Parser
Unit.Text.Protobuf.Parser.Comment
Unit.Text.Protobuf.Parser.Enum
Unit.Text.Protobuf.Parser.Import
Unit.Text.Protobuf.Parser.Message
Unit.Text.Protobuf.Parser.Option
Unit.Text.Protobuf.Parser.Package
Unit.Text.Protobuf.Parser.Service
Unit.Text.Protobuf.Parser.Syntax
Unit.Text.Protobuf.Parser.Type
Paths_protobuf_parser
autogen-modules:
Paths_protobuf_parser
hs-source-dirs:
test
ghc-options: -Wall -Wcompat -Widentities -Wincomplete-record-updates -Wincomplete-uni-patterns -Wmissing-export-lists -Wmissing-home-modules -Wpartial-fields -Wredundant-constraints -threaded -rtsopts -with-rtsopts=-N
ghc-options: -Wall -Wcompat -Widentities -Wincomplete-record-updates -Wincomplete-uni-patterns -Wmissing-export-lists -Wmissing-home-modules -Wpartial-fields -Wredundant-constraints -Werror -W -fwarn-unused-imports -fwarn-unused-binds -fwarn-orphans -fwarn-unused-matches -fwarn-unused-do-bind -fwarn-wrong-do-bind -fwarn-missing-signatures -fno-warn-partial-type-signatures -Wredundant-constraints -rtsopts -threaded -rtsopts -with-rtsopts=-N
build-depends:
HUnit
, base >=4.7 && <5
, optparse-applicative >=0.17.0
, parsec >=3.1.16 && <4
, prettyprinter >=1.7.1 && <2
, protobuf-parser
default-language: Haskell2010
8 changes: 0 additions & 8 deletions src/ProtoParser.hs

This file was deleted.

Loading

0 comments on commit 9603ddd

Please sign in to comment.