-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1 from antonkesy/dev
Add 95% complete proto3 parser
- Loading branch information
Showing
41 changed files
with
2,626 additions
and
438 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,3 @@ | ||
.stack-work/ | ||
*~ | ||
dist-newstyle |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,138 @@ | ||
# protobuf-parser | ||
|
||
protobuf 3 + gRPC parser using parsec | ||
Simplified [Protocol Buffers Version 3](https://protobuf.dev/programming-guides/proto3/) and [gRPC](https://grpc.io/docs/what-is-grpc/introduction/) parser using Haskell and [Parsec](https://hackage.haskell.org/package/parsec) | ||
|
||
`stack run` | ||
`stack test` | ||
## Usage | ||
|
||
```bash | ||
> stack run -- --help | ||
Usage: protobuf-parser-exe [-f|--file PATH] [-p|--pretty] [STRING...] | ||
|
||
Available options: | ||
-f,--file PATH Specify file path to parse | ||
-p,--pretty Enable pretty print | ||
-h,--help Show this help text | ||
|
||
``` | ||
|
||
```bash | ||
stack run -- -p -f ./test/E2E/protofiles/chat.proto | ||
stack run -- -p "message SearchRequest { int32 page_number = 2; double results_per_page = 3; }" | ||
stack run "import \"foo.proto\"; import \"bar.proto\"; package foobar;" | ||
|
||
stack test | ||
``` | ||
|
||
## Structure | ||
|
||
``` | ||
protobuf-parser | ||
├── app | ||
│ └── Main.hs -> CLI Parsing | ||
├── ... | ||
├── src | ||
│ └── Text | ||
│ └── Protobuf | ||
│ ├── Parser -> Parser Combinators | ||
│ │ ├── ... | ||
│ │ └── *.hs | ||
│ ├── Parser.hs -> Complete Protobuf Parser | ||
│ └── Types.hs -> Protobuf Type representation | ||
├── ... | ||
└── test | ||
├── E2E | ||
│ ├── ... | ||
│ └── protofiles -> Example Protobuf files | ||
│ └── *.proto | ||
├── ... | ||
└── Unit | ||
└── ... | ||
``` | ||
|
||
## Simplifications | ||
|
||
This projects acts as a parser combinator showcase project. | ||
Therefore, not all features are complete or correct: | ||
|
||
- Only proto3 syntax is supported | ||
- Not all values are check for correctness | ||
- Base Lexical Elements do not strictly follow the [offical spec](https://protobuf.dev/reference/protobuf/proto3-spec/#lexical_elements) | ||
- Proto 3 Ranges do not allow the keyword "min" | ||
- Empty statements are missing | ||
- Import weak and public are missing | ||
|
||
## Grammar | ||
|
||
The correct and complete Grammar can be found at the [offical Protocol Buffers Version 3 Language Specification](https://protobuf.dev/reference/protobuf/proto3-spec/) | ||
|
||
Following is basic syntax in Extended Backus-Naur Form (EBNF): | ||
|
||
``` | ||
| alternation | ||
() grouping | ||
[] option (zero or one time) | ||
{} repetition (any number of times) | ||
``` | ||
|
||
``` | ||
syntax = "syntax" "=" ("'" "proto3" "'" | '"' "proto3" '"') ";" | ||
import = "import" [ "weak" | "public" ] strLit ";" | ||
package = "package" fullIdent ";" | ||
constant = fullIdent | ( [ "-" | "+" ] intLit ) | ( [ "-" | "+" ] floatLit ) | | ||
strLit | boolLit | MessageValue | ||
option = "option" optionName "=" constant ";" | ||
optionName = ( ident | "(" ["."] fullIdent ")" ) | ||
type = "double" | "float" | "int32" | "int64" | "uint32" | "uint64" | ||
| "sint32" | "sint64" | "fixed32" | "fixed64" | "sfixed32" | "sfixed64" | ||
| "bool" | "string" | "bytes" | messageType | enumType | ||
fieldNumber = intLit; | ||
field = [ "repeated" ] type fieldName "=" fieldNumber [ "[" fieldOptions "]" ] ";" | ||
fieldOptions = fieldOption { "," fieldOption } | ||
fieldOption = optionName "=" constant | ||
oneof = "oneof" oneofName "{" { option | oneofField } "}" | ||
oneofField = type fieldName "=" fieldNumber [ "[" fieldOptions "]" ] ";" | ||
mapField = "map" "<" keyType "," type ">" mapName "=" fieldNumber [ "[" fieldOptions "]" ] ";" | ||
keyType = "int32" | "int64" | "uint32" | "uint64" | "sint32" | "sint64" | | ||
"fixed32" | "fixed64" | "sfixed32" | "sfixed64" | "bool" | "string" | ||
reserved = "reserved" ( ranges | strFieldNames ) ";" | ||
ranges = range { "," range } | ||
range = intLit [ "to" ( intLit | "max" ) ] | ||
strFieldNames = strFieldName { "," strFieldName } | ||
strFieldName = "'" fieldName "'" | '"' fieldName '"' | ||
enum = "enum" enumName enumBody | ||
enumBody = "{" { option | enumField | emptyStatement | reserved } "}" | ||
enumField = ident "=" [ "-" ] intLit [ "[" enumValueOption { "," enumValueOption } "]" ]";" | ||
enumValueOption = optionName "=" constant | ||
message = "message" messageName messageBody | ||
messageBody = "{" { field | enum | message | option | oneof | mapField | | ||
reserved | emptyStatement } "}" | ||
service = "service" serviceName "{" { option | rpc | emptyStatement } "}" | ||
rpc = "rpc" rpcName "(" [ "stream" ] messageType ")" "returns" "(" [ "stream" ] | ||
messageType ")" (( "{" {option | emptyStatement } "}" ) | ";") | ||
proto = syntax { import | package | option | topLevelDef | emptyStatement } | ||
topLevelDef = message | enum | service | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,61 @@ | ||
module Main (main) where | ||
|
||
import ProtoParser | ||
import Text.Parsec (parse) | ||
import Options.Applicative | ||
import Prettyprinter | ||
( Pretty (pretty), | ||
defaultLayoutOptions, | ||
layoutPretty, | ||
) | ||
import Prettyprinter.Render.String (renderString) | ||
import Text.Protobuf.Parser | ||
import Text.Protobuf.Types | ||
|
||
data Options = Options (Maybe FilePath) Bool [String] | ||
|
||
parseOptions :: Parser Options | ||
parseOptions = | ||
Options | ||
<$> optional | ||
( strOption | ||
( long "file" | ||
<> short 'f' | ||
<> metavar "PATH" | ||
<> help "Specify file path to parse" | ||
) | ||
) | ||
<*> switch | ||
( long "pretty" | ||
<> short 'p' | ||
<> help "Enable pretty print" | ||
) | ||
<*> many (argument str (metavar "STRING...")) | ||
|
||
main :: IO () | ||
main = do | ||
case parse enumField "" "reserved 1, 2" of | ||
Left err -> print err | ||
Right res -> print res | ||
opts <- execParser $ info (parseOptions <**> helper) fullDesc | ||
processOptions opts | ||
|
||
processOptions :: Options -> IO () | ||
processOptions (Options Nothing False []) = | ||
putStrLn "Arguments: No file path provided or strings provided" | ||
processOptions (Options (Just path) isPrettier []) = do | ||
result <- parseProtoFile path | ||
case result of | ||
Left err -> putStrLn $ "Parse error: " ++ show err | ||
Right protobuf -> | ||
protoPrint protobuf isPrettier | ||
processOptions (Options _ isPrettier otherArgs) = | ||
case parseProtobuf (unwords otherArgs) of | ||
Left err -> putStrLn $ "Parse error: " ++ show err | ||
Right protobuf -> | ||
protoPrint protobuf isPrettier | ||
|
||
protoPrint :: Protobuf -> Bool -> IO () | ||
protoPrint protobuf isPrettier = | ||
if not isPrettier | ||
then print protobuf | ||
else | ||
putStrLn $ | ||
renderString $ | ||
layoutPretty defaultLayoutOptions $ | ||
pretty protobuf |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.