From 6969dc26007f2d5031f08852e0be7066a09e9a75 Mon Sep 17 00:00:00 2001
From: Sergey Prokhorov <sergey.prokhorov@klarna.com>
Date: Fri, 19 Jul 2019 17:57:27 +0200
Subject: [PATCH 01/13] .avdl lexer+parser draft

---
 .gitignore                            |   2 +
 rebar.config                          |   2 +-
 src/avro_idl_lexer.xrl                |  77 +++++++++++
 src/avro_idl_parser.yrl               | 180 ++++++++++++++++++++++++++
 test/data/empty_protocol.avdl         |   6 +
 test/data/full_protocol.avdl          |  33 +++++
 test/data/protocol_with_typedefs.avdl |  26 ++++
 7 files changed, 325 insertions(+), 1 deletion(-)
 create mode 100644 src/avro_idl_lexer.xrl
 create mode 100644 src/avro_idl_parser.yrl
 create mode 100644 test/data/empty_protocol.avdl
 create mode 100644 test/data/full_protocol.avdl
 create mode 100644 test/data/protocol_with_typedefs.avdl

diff --git a/.gitignore b/.gitignore
index 919cc29..8909176 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,3 +12,5 @@ out/
 _build
 rebar.lock
 *.crashdump
+src/avro_idl_lexer.erl
+src/avro_idl_parser.erl
diff --git a/rebar.config b/rebar.config
index 38c47a6..9f49d6c 100644
--- a/rebar.config
+++ b/rebar.config
@@ -1,6 +1,6 @@
 %% -*- mode:erlang -*-
 {erl_opts,             [ debug_info
-                       , warnings_as_errors
+%                       , warnings_as_errors
                        , {d,'NOTEST'}
                        ]}.
 {eunit_opts,           [verbose]}.
diff --git a/src/avro_idl_lexer.xrl b/src/avro_idl_lexer.xrl
new file mode 100644
index 0000000..eb6c1ba
--- /dev/null
+++ b/src/avro_idl_lexer.xrl
@@ -0,0 +1,77 @@
+%% @doc Avro IDL lexer
+%% https://avro.apache.org/docs/current/idl.html
+
+Definitions.
+
+Rules.
+
+[\s\t\n\r]+ : skip_token.
+
+"[^\"]+" : {token, {string_v, TokenLine, unescape(TokenChars, $\")}}.
+
+`[^\`]+` : {token, {id, TokenLine, unescape(TokenChars, $`)}}.
+
+//[^\r\n]* : {token, {comment_v, TokenLine, unescape_line_comment(TokenChars)}}.
+
+/\*(.|[\r\n])*\*/ : {token, {comment_v, TokenLine, unescape_multiline_comment(TokenChars)}}.
+
+\{ : {token, {'{', TokenLine}}.
+\} : {token, {'}', TokenLine}}.
+\( : {token, {'(', TokenLine}}.
+\) : {token, {')', TokenLine}}.
+\[ : {token, {'[', TokenLine}}.
+\] : {token, {']', TokenLine}}.
+<  : {token, {'<', TokenLine}}.
+>  : {token, {'>', TokenLine}}.
+;  : {token, {';', TokenLine}}.
+\, : {token, {',', TokenLine}}.
+
+
+%% Default values (json)
+= : {token, {'=', TokenLine}}.
+%% TODO: better float regexp
+[+-]?[0-9]+\.[0-9]+ : {token, {float_v, TokenLine, list_to_float(TokenChars)}}.
+[+-]?[0-9]+         : {token, {integer_v, TokenLine, list_to_integer(TokenChars)}}.
+true|false          : {token, {bool_v, TokenLine, list_to_atom(TokenChars)}}.
+%% TODO: null?/:(for maps)/???...
+
+%% === Datatype IDs ===
+
+%% primitive; FIXME: 'null' can be used in both primitive and data!
+int|long|string|boolean|float|double|bytes|null : {token, {primitive_t, TokenLine, list_to_atom(TokenChars)}}.
+
+%% complex
+record|enum|array|map|fixed|union : {token, {list_to_atom(TokenChars ++ "_t"), TokenLine}}.
+
+%% Logical
+decimal|date|time_ms|timestamp_ms : {token, {logical_t, TokenLine, list_to_atom(TokenChars)}}.
+
+%% keywords
+error|throws|oneway|void|import|idl|protocol|schema : {token, {list_to_atom(TokenChars ++ "_k"), TokenLine}}.
+
+%% === Constructs ===
+
+@[a-zA-Z0-9_-]+ : {token, {annotation_v, TokenLine, unescape_annotation(TokenChars)}}.
+
+[A-Za-z_][A-Za-z_0-9]* : {token, {id, TokenLine, TokenChars}}.
+
+Erlang code.
+
+
+unescape(Token, Char) ->
+    string:trim(Token, both, [Char]).
+
+unescape_line_comment("//" ++ Comment) ->
+    Comment.
+
+%% TODO: cleanup
+unescape_multiline_comment("/**" ++ Comment0) ->
+    %% Drop closing "*/"
+    Len = length(Comment0),
+    lists:sublist(Comment0, Len - 2);
+unescape_multiline_comment("/*" ++ Comment0) ->
+    Len = length(Comment0),
+    lists:sublist(Comment0, Len - 2).
+
+unescape_annotation("@" ++ Annotation) ->
+    Annotation.
diff --git a/src/avro_idl_parser.yrl b/src/avro_idl_parser.yrl
new file mode 100644
index 0000000..f2d3070
--- /dev/null
+++ b/src/avro_idl_parser.yrl
@@ -0,0 +1,180 @@
+%% @doc Avro IDL parser
+%% https://avro.apache.org/docs/current/idl.html
+
+Header "%% Hello".
+
+Terminals id string_v comment_v float_v integer_v bool_v annotation_v
+    primitive_t logical_t
+    '{' '}' '(' ')' '[' ']' '<' '>' ';' ',' '='
+    record_t enum_t array_t map_t fixed_t union_t
+    protocol_k error_k throws_k oneway_k void_k import_k idl_k schema_k.
+
+Nonterminals
+    protocol typedefs
+    decorator decorator_value string array_of_strings array_of_strings_tail
+    typedef_tail typedef
+    import import_file_type
+    primitive
+    enum enum_variants
+    union
+    record
+    fixed
+    array
+    map
+    error
+    data
+    array_of_data array_of_data_tail.
+
+Rootsymbol protocol.
+
+
+protocol ->
+    protocol_k id '{' '}' :
+        {protocol, value_of('$2'), []}.
+protocol ->
+    protocol_k id '{' typedef typedef_tail :
+        {protocol, value_of('$2'), ['$4' | '$5']}.
+protocol ->
+    decorator protocol :
+        {decorated, '$1', '$2'}.  % todo: embed into protocol?
+
+
+%% == Decorator ==
+decorator ->
+    annotation_v '(' decorator_value ')' :
+        {decorator, value_of('$1'), '$3'}.
+
+%% Maybe can just use `data` instead of `decorator_value`?
+decorator_value ->
+    string :
+        '$1'.
+decorator_value ->
+    array_of_strings :
+        '$1'.
+
+string ->
+    string_v :
+        value_of('$1').
+
+array_of_strings ->
+    '[' ']' :
+        [].
+array_of_strings ->
+    '[' string array_of_strings_tail :
+        ['$2' | '$3'].
+
+array_of_strings_tail ->
+    ']' :
+        [].
+array_of_strings_tail ->
+    ',' string array_of_strings_tail :
+        ['$2' | '$3'].
+
+
+%% == Type definitions (inside protocol or record) ==
+
+typedef_tail ->
+    '}' :
+        [].
+typedef_tail ->
+    typedef typedef_tail :
+        ['$1' | '$2'].
+
+%% TODO: generalize to 'type' name (= value)(;)
+typedef -> import : '$1'.
+typedef -> primitive : '$1'.
+typedef -> enum : '$1'.
+typedef -> union : '$1'.
+typedef -> record : '$1'.
+typedef -> fixed : '$1'.
+typedef -> array : '$1'.
+typedef -> map : '$1'.
+typedef -> error : '$1'.
+%% typedef -> function : '$1'.  % TODO
+
+%% -- Import def
+
+import ->
+    import_k import_file_type string_v ';' :
+        {import, '$2', value_of('$3')}.
+
+import_file_type -> idl_k : idl.
+import_file_type -> protocol_k : protocol.
+import_file_type -> schema_k : schema.
+
+%% -- Primitive typedef
+primitive ->
+    primitive_t id ';' :
+        {primitive, value_of('$2'), value_of('$1'), undefined}.
+primitive ->
+    primitive_t id '=' data ';' :
+        {primitive, value_of('$2'), value_of('$1'), '$4'}.
+
+%% -- Enum typedef
+enum ->
+    enum_t id '{' id enum_variants :
+        {enum, value_of('$2'), [value_of('$4') | '$5']}.
+
+enum_variants ->
+    '}' :
+        [].
+enum_variants ->
+    ',' id enum_variants : [value_of('$2') | '$3'].
+
+
+union -> union_t : '$1'.                        %TODO
+record -> record_t : '$1'.                      %TODO
+
+%% -- Fixed typedef
+fixed ->
+    fixed_t id '(' integer_v ')' ';':
+        {fixed, '$2', value_of('$4'), undefined}.
+fixed ->
+    fixed_t id '(' integer_v ')' '=' data ';' :
+        {fixed, '$2', value_of('$4'), '$6'}.
+
+%% -- Array typedef
+array ->
+    array_t '<' primitive_t '>' id ';' :
+        {array, value_of('$5'), value_of('$3'), undefined}.         %FIXME: not just primitives!
+array ->
+    array_t '<' primitive_t '>' id '=' data ';' :
+        {array, value_of('$5'), value_of('$3'), '$7'}.
+
+%% -- Map typedef
+map ->
+    map_t '<' primitive_t '>' id ';' :
+        {map, '$5', value_of('$3'), undefined}.           %FIXME: not just primitives!; defaults!
+
+%% -- Error typedef
+error ->
+    error_k : '$1'.                             %TODO
+
+%% == Data (JSON) for default values
+data -> string_v : value_of('$1').
+data -> integer_v : value_of('$1').
+data -> float_v : value_of('$1').
+data -> bool_v : value_of('$1').
+data -> array_of_data : '$1'.
+
+array_of_data ->
+    '[' ']' :
+        [].
+array_of_data ->
+    '[' data array_of_data_tail :
+        ['$2' | '$3'].
+
+array_of_data_tail ->
+    ']' :
+        [].
+array_of_data_tail ->
+    ',' data array_of_data_tail :
+        ['$2' | '$3'].
+
+Erlang code.
+
+value_of(Token) ->
+    try element(3, Token)
+    catch error:badarg ->
+            error({badarg, Token})
+    end.
diff --git a/test/data/empty_protocol.avdl b/test/data/empty_protocol.avdl
new file mode 100644
index 0000000..20082c9
--- /dev/null
+++ b/test/data/empty_protocol.avdl
@@ -0,0 +1,6 @@
+@deco1("wasd")
+@deco2(["abc", "def"])
+@deco3([])
+protocol MyProto {
+
+}
diff --git a/test/data/full_protocol.avdl b/test/data/full_protocol.avdl
new file mode 100644
index 0000000..9d07aa9
--- /dev/null
+++ b/test/data/full_protocol.avdl
@@ -0,0 +1,33 @@
+/**
+ * An example protocol in Avro IDL
+ */
+@namespace("org.apache.avro.test")
+protocol Simple {
+  @aliases(["org.foo.KindOf"])
+  enum Kind {
+    FOO,
+    BAR, // the bar enum value
+    BAZ
+  }
+  fixed MD5(16);
+  record TestRecord {
+    @order("ignore")
+    string name = "default name";
+    int amount = -1;
+    @order("descending")
+    Kind kind;
+    MD5 hash;
+    union { MD5, null} @aliases(["hash"]) nullableHash;
+    array<long> arrayOfLongs;
+  }
+  error TestError {
+    string message;
+  }
+  string hello(string greeting);
+  TestRecord echo(TestRecord `record`);
+  int add(int arg1, int arg2);
+  bytes echoBytes(bytes data);
+  void `error`() throws TestError;
+  void ping() oneway;
+}
+
diff --git a/test/data/protocol_with_typedefs.avdl b/test/data/protocol_with_typedefs.avdl
new file mode 100644
index 0000000..75ca25e
--- /dev/null
+++ b/test/data/protocol_with_typedefs.avdl
@@ -0,0 +1,26 @@
+
+protocol MyProto {
+   import idl "foo.avdl";
+   import protocol "bar.avpr";
+   import schema "baz.avsc";
+
+   enum MyEnum1 {
+       VAR11,
+       VAR12,
+       VAR13
+   }
+   enum MyEnum2 {
+       VAR21,
+       VAR22,
+       VAR23
+   }
+
+   int my_int;
+   string my_string = "wasd";
+   float my_float = 12.34;
+   boolean my_bool = false;
+
+   array<int> my_int_array;
+   array<int> my_int_array_def = [1, 2, 3];
+   array<string> my_str_array_def = ["123", "456", "cdf"];
+}

From 0bafe7eeefb3fdcb651aa6074bf24983a788c1b7 Mon Sep 17 00:00:00 2001
From: Sergey Prokhorov <sergey.prokhorov@klarna.com>
Date: Mon, 22 Jul 2019 15:59:00 +0200
Subject: [PATCH 02/13] More progress on parser

Seems the only things left are some of annotations and docstrings
---
 src/avro_idl_lexer.xrl                |  20 ++-
 src/avro_idl_parser.yrl               | 209 ++++++++++++++++++--------
 test/data/protocol_with_typedefs.avdl |  40 ++++-
 3 files changed, 193 insertions(+), 76 deletions(-)

diff --git a/src/avro_idl_lexer.xrl b/src/avro_idl_lexer.xrl
index eb6c1ba..7e98033 100644
--- a/src/avro_idl_lexer.xrl
+++ b/src/avro_idl_lexer.xrl
@@ -7,12 +7,15 @@ Rules.
 
 [\s\t\n\r]+ : skip_token.
 
+%% TODO: escaped double quotes inside strings
 "[^\"]+" : {token, {string_v, TokenLine, unescape(TokenChars, $\")}}.
 
 `[^\`]+` : {token, {id, TokenLine, unescape(TokenChars, $`)}}.
 
 //[^\r\n]* : {token, {comment_v, TokenLine, unescape_line_comment(TokenChars)}}.
 
+%% `/**` is a docstring for the following object
+/\*\*(.|[\r\n])*\*/ : {token, {doc_v, TokenLine, unescape_multiline_comment(TokenChars)}}.
 /\*(.|[\r\n])*\*/ : {token, {comment_v, TokenLine, unescape_multiline_comment(TokenChars)}}.
 
 \{ : {token, {'{', TokenLine}}.
@@ -26,26 +29,29 @@ Rules.
 ;  : {token, {';', TokenLine}}.
 \, : {token, {',', TokenLine}}.
 
+%% Null can be in both values and primitive types
+null : {token, {null, TokenLine}}.
 
 %% Default values (json)
 = : {token, {'=', TokenLine}}.
-%% TODO: better float regexp
+%% TODO: better float regexp;
+%% XXX: is it safe to use list_to_float? seems float syntax is used for decimal defaults as well
 [+-]?[0-9]+\.[0-9]+ : {token, {float_v, TokenLine, list_to_float(TokenChars)}}.
 [+-]?[0-9]+         : {token, {integer_v, TokenLine, list_to_integer(TokenChars)}}.
 true|false          : {token, {bool_v, TokenLine, list_to_atom(TokenChars)}}.
-%% TODO: null?/:(for maps)/???...
+\:                  : {token, {':', TokenLine}}.
 
 %% === Datatype IDs ===
 
-%% primitive; FIXME: 'null' can be used in both primitive and data!
-int|long|string|boolean|float|double|bytes|null : {token, {primitive_t, TokenLine, list_to_atom(TokenChars)}}.
+%% primitive
+int|long|string|boolean|float|double|bytes : {token, {primitive_t, TokenLine, list_to_atom(TokenChars)}}.
 
 %% complex
 record|enum|array|map|fixed|union : {token, {list_to_atom(TokenChars ++ "_t"), TokenLine}}.
 
 %% Logical
-decimal|date|time_ms|timestamp_ms : {token, {logical_t, TokenLine, list_to_atom(TokenChars)}}.
-
+date|time_ms|timestamp_ms : {token, {logical_t, TokenLine, list_to_atom(TokenChars)}}.
+decimal : {token, {decimal_t, TokenLine}}.
 %% keywords
 error|throws|oneway|void|import|idl|protocol|schema : {token, {list_to_atom(TokenChars ++ "_k"), TokenLine}}.
 
@@ -54,6 +60,8 @@ error|throws|oneway|void|import|idl|protocol|schema : {token, {list_to_atom(Toke
 @[a-zA-Z0-9_-]+ : {token, {annotation_v, TokenLine, unescape_annotation(TokenChars)}}.
 
 [A-Za-z_][A-Za-z_0-9]* : {token, {id, TokenLine, TokenChars}}.
+%% namespaced will only be allowed in data type spec
+[A-Za-z_][A-Za-z_0-9\.]+[A-Za-z_0-9] : {token, {ns_id, TokenLine, TokenChars}}.
 
 Erlang code.
 
diff --git a/src/avro_idl_parser.yrl b/src/avro_idl_parser.yrl
index f2d3070..b873c3a 100644
--- a/src/avro_idl_parser.yrl
+++ b/src/avro_idl_parser.yrl
@@ -1,29 +1,32 @@
 %% @doc Avro IDL parser
 %% https://avro.apache.org/docs/current/idl.html
+%% XXX: all `comment_v` tockens should be filtered-out before parsing!
+%% TODO: docstrings
+%% TODO: better annotations support
 
 Header "%% Hello".
 
-Terminals id string_v comment_v float_v integer_v bool_v annotation_v
-    primitive_t logical_t
-    '{' '}' '(' ')' '[' ']' '<' '>' ';' ',' '='
+Terminals id ns_id null string_v doc_v float_v integer_v bool_v annotation_v
+    primitive_t logical_t decimal_t
+    '{' '}' '(' ')' '[' ']' '<' '>' ';' ',' '=' ':'
     record_t enum_t array_t map_t fixed_t union_t
     protocol_k error_k throws_k oneway_k void_k import_k idl_k schema_k.
 
 Nonterminals
-    protocol typedefs
-    decorator decorator_value string array_of_strings array_of_strings_tail
-    typedef_tail typedef
+    protocol
+    annotation annotation_value string array_of_strings array_of_strings_tail
+    declaration declaration_tail
     import import_file_type
-    primitive
+    record record_field record_tail
+    type error
+    decimal
     enum enum_variants
-    union
-    record
+    union union_tail
     fixed
     array
     map
-    error
-    data
-    array_of_data array_of_data_tail.
+    function fun_return fun_arguments fun_argument fun_extra
+    data array_of_data array_of_data_tail map_of_data map_of_data_tail.
 
 Rootsymbol protocol.
 
@@ -32,23 +35,23 @@ protocol ->
     protocol_k id '{' '}' :
         {protocol, value_of('$2'), []}.
 protocol ->
-    protocol_k id '{' typedef typedef_tail :
+    protocol_k id '{' declaration declaration_tail :
         {protocol, value_of('$2'), ['$4' | '$5']}.
 protocol ->
-    decorator protocol :
-        {decorated, '$1', '$2'}.  % todo: embed into protocol?
+    annotation protocol :
+        {annotated, '$1', '$2'}.
 
 
-%% == Decorator ==
-decorator ->
-    annotation_v '(' decorator_value ')' :
-        {decorator, value_of('$1'), '$3'}.
+%% == Annotation ==
+annotation ->
+    annotation_v '(' annotation_value ')' :
+        {annotation, value_of('$1'), '$3'}.
 
 %% Maybe can just use `data` instead of `decorator_value`?
-decorator_value ->
+annotation_value ->
     string :
         '$1'.
-decorator_value ->
+annotation_value ->
     array_of_strings :
         '$1'.
 
@@ -71,26 +74,22 @@ array_of_strings_tail ->
         ['$2' | '$3'].
 
 
-%% == Type definitions (inside protocol or record) ==
+%% == Protocol definitions ==
 
-typedef_tail ->
+declaration_tail ->
     '}' :
         [].
-typedef_tail ->
-    typedef typedef_tail :
+declaration_tail ->
+    declaration declaration_tail :
         ['$1' | '$2'].
 
-%% TODO: generalize to 'type' name (= value)(;)
-typedef -> import : '$1'.
-typedef -> primitive : '$1'.
-typedef -> enum : '$1'.
-typedef -> union : '$1'.
-typedef -> record : '$1'.
-typedef -> fixed : '$1'.
-typedef -> array : '$1'.
-typedef -> map : '$1'.
-typedef -> error : '$1'.
-%% typedef -> function : '$1'.  % TODO
+declaration -> import : '$1'.
+declaration -> enum : '$1'.
+declaration -> fixed : '$1'.
+declaration -> error : '$1'.
+declaration -> record : '$1'.
+declaration -> function : '$1'.
+
 
 %% -- Import def
 
@@ -102,14 +101,6 @@ import_file_type -> idl_k : idl.
 import_file_type -> protocol_k : protocol.
 import_file_type -> schema_k : schema.
 
-%% -- Primitive typedef
-primitive ->
-    primitive_t id ';' :
-        {primitive, value_of('$2'), value_of('$1'), undefined}.
-primitive ->
-    primitive_t id '=' data ';' :
-        {primitive, value_of('$2'), value_of('$1'), '$4'}.
-
 %% -- Enum typedef
 enum ->
     enum_t id '{' id enum_variants :
@@ -121,34 +112,112 @@ enum_variants ->
 enum_variants ->
     ',' id enum_variants : [value_of('$2') | '$3'].
 
-
-union -> union_t : '$1'.                        %TODO
-record -> record_t : '$1'.                      %TODO
-
 %% -- Fixed typedef
 fixed ->
     fixed_t id '(' integer_v ')' ';':
-        {fixed, '$2', value_of('$4'), undefined}.
-fixed ->
-    fixed_t id '(' integer_v ')' '=' data ';' :
-        {fixed, '$2', value_of('$4'), '$6'}.
+        {fixed, value_of('$2'), value_of('$4')}.
+
+%% -- Error typedef
+error ->
+    error_k id '{' record_field record_tail :
+        {error, value_of('$2'), ['$4' | '$5']}.
+
+
+%% -- Record
+
+record ->
+    record_t id '{' record_field record_tail :
+        {record, value_of('$2'), ['$4' | '$5']}.
+record ->
+    annotation record :
+        {annotated, '$1', '$2'}.
+
+record_tail ->
+    '}' :
+        [].
+record_tail ->
+    record_field record_tail :
+        ['$1' | '$2'].
+
+record_field ->
+    type id ';' :
+        {field, value_of('$2'), '$1', undefined}.
+record_field ->
+    type id '=' data ';' :
+        {field, value_of('$2'), '$1', '$4'}.
+
+type -> primitive_t : value_of('$1').
+type -> logical_t : value_of('$1').
+type -> null : null.
+type -> id : {custom, value_of('$1')}.
+type -> ns_id : {custom, value_of('$1')}.
+type -> decimal : '$1'.
+type -> union : '$1'.
+type -> array : '$1'.
+type -> map : '$1'.
+
+%% -- Decimal
+decimal ->
+    decimal_t '(' integer_v ',' integer_v ')' :    %decimal(precision, scale)
+        {decimal, value_of('$3'), value_of('$5')}. %
+
+%% -- Union
+union ->
+    union_t '{' type union_tail :
+        {union, ['$3' | '$4']}.
+
+union_tail ->
+    '}' :
+        [].
+union_tail ->
+    ',' type union_tail :
+        ['$2' | '$3'].
 
 %% -- Array typedef
 array ->
-    array_t '<' primitive_t '>' id ';' :
-        {array, value_of('$5'), value_of('$3'), undefined}.         %FIXME: not just primitives!
-array ->
-    array_t '<' primitive_t '>' id '=' data ';' :
-        {array, value_of('$5'), value_of('$3'), '$7'}.
+    array_t '<' primitive_t '>' :
+        {array, value_of('$3')}.         %FIXME: not just primitives!
 
 %% -- Map typedef
 map ->
-    map_t '<' primitive_t '>' id ';' :
-        {map, '$5', value_of('$3'), undefined}.           %FIXME: not just primitives!; defaults!
+    map_t '<' primitive_t '>' :
+        {map, value_of('$3')}.           %FIXME: not just primitives!
+
+%% == Function (message) definitions
+
+function ->
+    fun_return id '(' fun_arguments ')' fun_extra ';' :
+        {function, value_of('$2'), '$4', '$6'}.
+
+fun_return -> type : '$1'.
+fun_return -> void_k : void.
+
+fun_arguments ->
+    '$empty' :
+        [].
+fun_arguments ->
+    fun_argument :
+        ['$1'].
+fun_arguments ->
+    fun_argument ',' fun_arguments :
+        ['$1' | '$3'].
+
+fun_argument ->
+    type id :
+        {arg, value_of('$2'), '$1', undefined}.
+fun_argument ->
+    type id '=' data :
+        {arg, value_of('$2'), '$1', '$4'}.
+
+fun_extra ->
+    '$empty' : undefined.
+fun_extra ->
+    throws_k id :
+        {throws, value_of('$2')}.
+fun_extra ->
+    oneway_k :
+        oneway.
 
-%% -- Error typedef
-error ->
-    error_k : '$1'.                             %TODO
 
 %% == Data (JSON) for default values
 data -> string_v : value_of('$1').
@@ -156,6 +225,8 @@ data -> integer_v : value_of('$1').
 data -> float_v : value_of('$1').
 data -> bool_v : value_of('$1').
 data -> array_of_data : '$1'.
+data -> null : null.
+data -> map_of_data : '$1'.
 
 array_of_data ->
     '[' ']' :
@@ -171,6 +242,20 @@ array_of_data_tail ->
     ',' data array_of_data_tail :
         ['$2' | '$3'].
 
+map_of_data ->
+    '{' '}' :
+        #{}.
+map_of_data ->
+    '{' string_v ':' data map_of_data_tail :
+        ('$5')#{value_of('$2') => '$4'}.
+
+map_of_data_tail ->
+    '}' :
+        #{}.
+map_of_data_tail ->
+    ',' string_v ':' data map_of_data_tail:
+        ('$5')#{value_of('$2') => '$4'}.
+
 Erlang code.
 
 value_of(Token) ->
diff --git a/test/data/protocol_with_typedefs.avdl b/test/data/protocol_with_typedefs.avdl
index 75ca25e..b428d2d 100644
--- a/test/data/protocol_with_typedefs.avdl
+++ b/test/data/protocol_with_typedefs.avdl
@@ -1,4 +1,4 @@
-
+@namespace("org.erlang.www")
 protocol MyProto {
    import idl "foo.avdl";
    import protocol "bar.avpr";
@@ -14,13 +14,37 @@ protocol MyProto {
        VAR22,
        VAR23
    }
+   fixed MyFix(10);
+   record MyRec {
+      int my_int;
+      string my_string = "wasd";
+      float my_float = 12.34;
+      boolean my_bool = false;
+      MyFix my_custom;
+      union {boolean, null} my_union = null;
+      date my_date = 123456;
+      decimal(5, 2) my_decimal = 1222;
+
+      array<int> my_int_array;
+      array<int> my_int_array_def = [1, 2, 3];
+      array<string> my_str_array_def = ["123", "456", "cdf"];
+
+      map<float> my_map = {"a": 1.23, "b": 45.67};
+   }
+   @namespace("org.erlang.ftp")
+   record MyAnnotated {
+       org.erlang.www.MyError `error`;
+   }
+   error MyError {
+      MyEnum2 code;
+      string description;
+   }
+
+   float mul(int arg1, float arg2 = 1.0);
+
+   MyFix append(bytes arg1, string arg2 = "tail") throws MyError;
 
-   int my_int;
-   string my_string = "wasd";
-   float my_float = 12.34;
-   boolean my_bool = false;
+   void gen_server_cast(map<float> opts) oneway;
 
-   array<int> my_int_array;
-   array<int> my_int_array_def = [1, 2, 3];
-   array<string> my_str_array_def = ["123", "456", "cdf"];
+   MyEnum1 ping();
 }

From 93ffe1d09714d879269c1ed2bf618f122c166229 Mon Sep 17 00:00:00 2001
From: Sergey Prokhorov <sergey.prokhorov@klarna.com>
Date: Mon, 22 Jul 2019 17:59:12 +0200
Subject: [PATCH 03/13] More strict lexical rule for namespaced ID

---
 src/avro_idl_lexer.xrl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/avro_idl_lexer.xrl b/src/avro_idl_lexer.xrl
index 7e98033..f288882 100644
--- a/src/avro_idl_lexer.xrl
+++ b/src/avro_idl_lexer.xrl
@@ -59,9 +59,9 @@ error|throws|oneway|void|import|idl|protocol|schema : {token, {list_to_atom(Toke
 
 @[a-zA-Z0-9_-]+ : {token, {annotation_v, TokenLine, unescape_annotation(TokenChars)}}.
 
-[A-Za-z_][A-Za-z_0-9]* : {token, {id, TokenLine, TokenChars}}.
+[A-Za-z_][A-Za-z0-9_]* : {token, {id, TokenLine, TokenChars}}.
 %% namespaced will only be allowed in data type spec
-[A-Za-z_][A-Za-z_0-9\.]+[A-Za-z_0-9] : {token, {ns_id, TokenLine, TokenChars}}.
+[A-Za-z_][A-Za-z0-9_]+(\.[A-Za-z_][A-Za-z0-9_]+)+ : {token, {ns_id, TokenLine, TokenChars}}.
 
 Erlang code.
 

From afd1aa82248ecebece38df5f33841da64b654bcc Mon Sep 17 00:00:00 2001
From: Sergey Prokhorov <sergey.prokhorov@klarna.com>
Date: Tue, 23 Jul 2019 11:29:32 +0200
Subject: [PATCH 04/13] Use `string:strip` to be compatible with OTP19

---
 src/avro_idl_lexer.xrl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/avro_idl_lexer.xrl b/src/avro_idl_lexer.xrl
index f288882..6c54997 100644
--- a/src/avro_idl_lexer.xrl
+++ b/src/avro_idl_lexer.xrl
@@ -67,7 +67,7 @@ Erlang code.
 
 
 unescape(Token, Char) ->
-    string:trim(Token, both, [Char]).
+    string:strip(Token, both, Char).
 
 unescape_line_comment("//" ++ Comment) ->
     Comment.

From e23b925cdfd78b6fa4ebdec14d2f8b8dd6f59088 Mon Sep 17 00:00:00 2001
From: Sergey Prokhorov <sergey.prokhorov@klarna.com>
Date: Wed, 24 Jul 2019 16:08:49 +0200
Subject: [PATCH 05/13] Don't ignore function return type

---
 src/avro_idl_parser.yrl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/avro_idl_parser.yrl b/src/avro_idl_parser.yrl
index b873c3a..0ff9afa 100644
--- a/src/avro_idl_parser.yrl
+++ b/src/avro_idl_parser.yrl
@@ -187,7 +187,7 @@ map ->
 
 function ->
     fun_return id '(' fun_arguments ')' fun_extra ';' :
-        {function, value_of('$2'), '$4', '$6'}.
+        {function, value_of('$2'), '$4', '$1', '$6'}.
 
 fun_return -> type : '$1'.
 fun_return -> void_k : void.

From c74d28cf69a67d19e5542ba6083cd1684c44fddf Mon Sep 17 00:00:00 2001
From: Sergey Prokhorov <seriy.pr@gmail.com>
Date: Sun, 8 Mar 2020 03:44:10 +0100
Subject: [PATCH 06/13] Full annotations support; parser tests added

---
 src/avro_idl_parser.yrl       |  65 ++++++++++++------
 src/idl.hrl                   |  41 ++++++++++++
 test/avro_idl_parse_tests.erl | 121 ++++++++++++++++++++++++++++++++++
 test/data/annotations.avdl    |  23 +++++++
 test/data/empty_protocol.avdl |   7 +-
 5 files changed, 232 insertions(+), 25 deletions(-)
 create mode 100644 src/idl.hrl
 create mode 100644 test/avro_idl_parse_tests.erl
 create mode 100644 test/data/annotations.avdl

diff --git a/src/avro_idl_parser.yrl b/src/avro_idl_parser.yrl
index 0ff9afa..75e48b1 100644
--- a/src/avro_idl_parser.yrl
+++ b/src/avro_idl_parser.yrl
@@ -14,10 +14,10 @@ Terminals id ns_id null string_v doc_v float_v integer_v bool_v annotation_v
 
 Nonterminals
     protocol
-    annotation annotation_value string array_of_strings array_of_strings_tail
+    annotations annotation annotation_value string array_of_strings array_of_strings_tail
     declaration declaration_tail
     import import_file_type
-    record record_field record_tail
+    record record_field record_field_name record_tail
     type error
     decimal
     enum enum_variants
@@ -33,19 +33,26 @@ Rootsymbol protocol.
 
 protocol ->
     protocol_k id '{' '}' :
-        {protocol, value_of('$2'), []}.
+        #protocol{name = value_of('$2')}.
 protocol ->
     protocol_k id '{' declaration declaration_tail :
-        {protocol, value_of('$2'), ['$4' | '$5']}.
+        #protocol{name = value_of('$2'), definitions = ['$4' | '$5']}.
 protocol ->
-    annotation protocol :
-        {annotated, '$1', '$2'}.
+    annotations protocol :
+        ('$2')#protocol{annotations = '$1'}.
 
 
 %% == Annotation ==
+annotations ->
+    annotation :
+        ['$1'].
+annotations ->
+    annotation annotations :
+        ['$1' | '$2'].
+
 annotation ->
     annotation_v '(' annotation_value ')' :
-        {annotation, value_of('$1'), '$3'}.
+        #annotation{name = value_of('$1'), value = '$3'}.
 
 %% Maybe can just use `data` instead of `decorator_value`?
 annotation_value ->
@@ -104,7 +111,10 @@ import_file_type -> schema_k : schema.
 %% -- Enum typedef
 enum ->
     enum_t id '{' id enum_variants :
-        {enum, value_of('$2'), [value_of('$4') | '$5']}.
+        #enum{name = value_of('$2'), variants = [value_of('$4') | '$5']}.
+enum ->
+    annotations enum :
+        ('$2')#enum{annotations = '$1'}.
 
 enum_variants ->
     '}' :
@@ -115,22 +125,27 @@ enum_variants ->
 %% -- Fixed typedef
 fixed ->
     fixed_t id '(' integer_v ')' ';':
-        {fixed, value_of('$2'), value_of('$4')}.
+        #fixed{name = value_of('$2'), size = value_of('$4')}.
+fixed ->
+    annotations fixed :
+        ('$2')#fixed{annotations = '$1'}.
 
 %% -- Error typedef
 error ->
     error_k id '{' record_field record_tail :
-        {error, value_of('$2'), ['$4' | '$5']}.
-
+        #error{name = value_of('$2'), fields = ['$4' | '$5']}.
+error ->
+    annotations error :
+        ('$2')#error{annotations = '$1'}.
 
 %% -- Record
 
 record ->
     record_t id '{' record_field record_tail :
-        {record, value_of('$2'), ['$4' | '$5']}.
+        #record{name = value_of('$2'), fields = ['$4' | '$5']}.
 record ->
-    annotation record :
-        {annotated, '$1', '$2'}.
+    annotations record :
+        ('$2')#record{annotations = '$1'}.
 
 record_tail ->
     '}' :
@@ -140,11 +155,22 @@ record_tail ->
         ['$1' | '$2'].
 
 record_field ->
-    type id ';' :
-        {field, value_of('$2'), '$1', undefined}.
+    type record_field_name ';' :
+        #field{name = element(1, '$2'), annotations = element(2, '$2'), type = '$1'}.
 record_field ->
-    type id '=' data ';' :
-        {field, value_of('$2'), '$1', '$4'}.
+    type record_field_name '=' data ';' :
+        #field{name = element(1, '$2'), annotations = element(2, '$2'),
+               type = '$1', default = '$4'}.
+record_field ->
+    annotations record_field :
+        ('$2')#field{annotations = '$1' ++ ('$2')#field.annotations}.
+
+record_field_name ->
+    id :
+        {value_of('$1'), []}.
+record_field_name ->
+    annotations id :
+        {value_of('$2'), '$1'}.
 
 type -> primitive_t : value_of('$1').
 type -> logical_t : value_of('$1').
@@ -187,7 +213,7 @@ map ->
 
 function ->
     fun_return id '(' fun_arguments ')' fun_extra ';' :
-        {function, value_of('$2'), '$4', '$1', '$6'}.
+        #function{name = value_of('$2'), arguments = '$4', return = '$1', extra = '$6'}.
 
 fun_return -> type : '$1'.
 fun_return -> void_k : void.
@@ -257,6 +283,7 @@ map_of_data_tail ->
         ('$5')#{value_of('$2') => '$4'}.
 
 Erlang code.
+-include("idl.hrl").
 
 value_of(Token) ->
     try element(3, Token)
diff --git a/src/idl.hrl b/src/idl.hrl
new file mode 100644
index 0000000..cd37ed0
--- /dev/null
+++ b/src/idl.hrl
@@ -0,0 +1,41 @@
+-record(protocol,
+        {name,
+         annotations = [],
+         definitions = []}).
+
+-record(annotation,
+        {name,
+         value}).
+
+-record(enum,
+        {name,
+         annotations = [],
+         variants = []}).
+
+-record(fixed,
+        {name,
+         annotations = [],
+         size}).
+
+-record(error,
+        {name,
+         annotations = [],
+         fields = []}).
+
+-record(record,
+        {name,
+         annotations = [],
+         fields = []}).
+
+-record(field,
+        {name,
+         annotations = [],
+         type,
+         default}).
+
+-record(function,
+        {name,
+         %% annotations = [],
+         arguments = [],
+         return,
+         extra}).
diff --git a/test/avro_idl_parse_tests.erl b/test/avro_idl_parse_tests.erl
new file mode 100644
index 0000000..998751c
--- /dev/null
+++ b/test/avro_idl_parse_tests.erl
@@ -0,0 +1,121 @@
+%% coding: latin-1
+%%%-------------------------------------------------------------------
+%%% Copyright (c) 2013-2018 Klarna AB
+%%%
+%%% This file is provided to you under the Apache License,
+%%% Version 2.0 (the "License"); you may not use this file
+%%% except in compliance with the License.  You may obtain
+%%% a copy of the License at
+%%%
+%%%   http://www.apache.org/licenses/LICENSE-2.0
+%%%
+%%% Unless required by applicable law or agreed to in writing,
+%%% software distributed under the License is distributed on an
+%%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+%%% KIND, either express or implied.  See the License for the
+%%% specific language governing permissions and limitations
+%%% under the License.
+%%%
+%%%-------------------------------------------------------------------
+-module(avro_idl_parse_tests).
+
+-include("../src/idl.hrl").
+-include_lib("eunit/include/eunit.hrl").
+
+parse_empty_protocol_test() ->
+    ?assertEqual(
+       #protocol{name = "MyProto"},
+       parse_idl("empty_protocol")).
+
+parse_annotations_test() ->
+    ?assertEqual(
+       #protocol{
+          name = "MyProto",
+          annotations =
+              [#annotation{name = "version",
+                           value = "1.0"},
+               #annotation{name = "aliases",
+                           value = ["ns.Proto1", "ns.Proto2"]}
+              ],
+          definitions =
+              [#enum{name = "MyEnum",
+                     annotations =
+                         [#annotation{name = "namespace",
+                                      value = "enums"}],
+                     variants = ["A", "B", "C"]},
+               #fixed{name = "MyFixed",
+                      annotations =
+                          [#annotation{name = "namespace",
+                                       value = "fixeds"}],
+                     size = 16},
+               #error{name = "MyError",
+                      annotations =
+                          [#annotation{name = "namespace",
+                                       value = "errors"}],
+                      fields =
+                          [#field{name = "my_err_field",
+                                  annotations =
+                                      [#annotation{name = "order",
+                                                   value = "ignore"}],
+                                  type = string}]},
+               #record{name = "MyRecord",
+                       annotations =
+                           [#annotation{name = "namespace",
+                                        value = "records"}],
+                       fields =
+                           [#field{name = "my_record_field",
+                                   annotations =
+                                       [#annotation{name = "order",
+                                                    value = "ignore"},
+                                        #annotation{name = "aliases",
+                                                    value = ["my_alias"]}],
+                                   type = string}]}]
+         },
+       parse_idl("annotations")).
+
+full_protocol_test() ->
+    ?assertMatch(
+      #protocol{name = "Simple",
+                definitions =
+                    [#enum{name = "Kind"},
+                     #fixed{name = "MD5"},
+                     #record{name = "TestRecord"},
+                     #error{name = "TestError"},
+                     #function{name = "hello"},
+                     #function{name = "echo"},
+                     #function{name = "add"},
+                     #function{name = "echoBytes"},
+                     #function{name = "error"},
+                     #function{name = "ping"}]},
+      parse_idl("full_protocol")).
+
+protocol_with_typedeffs_test() ->
+    ?assertMatch(
+      #protocol{name = "MyProto",
+                definitions =
+                    [{import, idl, "foo.avdl"},
+                     {import, protocol, "bar.avpr"},
+                     {import, schema, "baz.avsc"},
+                     #enum{name = "MyEnum1"},
+                     #enum{name = "MyEnum2"},
+                     #fixed{name = "MyFix"},
+                     #record{name = "MyRec"},
+                     #record{name = "MyAnnotated"},
+                     #error{name = "MyError"},
+                     #function{name = "mul"},
+                     #function{name = "append"},
+                     #function{name = "gen_server_cast"},
+                     #function{name = "ping"}]},
+       parse_idl("protocol_with_typedefs")).
+
+parse_idl(Name) ->
+    File = "test/data/" ++ Name ++ ".avdl",
+    {ok, B} = file:read_file(File),
+    {ok, T, _} =  avro_idl_lexer:string(binary_to_list(B)),
+    NoComments = lists:filter(
+                  fun({doc_v, _, _}) -> false;
+                     ({comment_v, _, _}) -> false;
+                     (_) -> true
+                  end, T),
+    {ok, Tree} = avro_idl_parser:parse(NoComments),
+    Tree.
diff --git a/test/data/annotations.avdl b/test/data/annotations.avdl
new file mode 100644
index 0000000..a94a705
--- /dev/null
+++ b/test/data/annotations.avdl
@@ -0,0 +1,23 @@
+@version("1.0")
+@aliases(["ns.Proto1", "ns.Proto2"])
+protocol MyProto {
+    @namespace("enums")
+    enum MyEnum {
+        A, B, C
+    }
+
+    @namespace("fixeds")
+    fixed MyFixed(16);
+
+    @namespace("errors")
+    error MyError {
+        @order("ignore")
+        string my_err_field;
+    }
+
+    @namespace("records")
+    record MyRecord {
+        @order("ignore")
+        string @aliases(["my_alias"]) my_record_field;
+    }
+}
diff --git a/test/data/empty_protocol.avdl b/test/data/empty_protocol.avdl
index 20082c9..8c2e382 100644
--- a/test/data/empty_protocol.avdl
+++ b/test/data/empty_protocol.avdl
@@ -1,6 +1 @@
-@deco1("wasd")
-@deco2(["abc", "def"])
-@deco3([])
-protocol MyProto {
-
-}
+protocol MyProto {}

From 788f725463d93738c8f6dab430a9e16582490f14 Mon Sep 17 00:00:00 2001
From: Sergey Prokhorov <seriy.pr@gmail.com>
Date: Tue, 10 Mar 2020 03:21:37 +0100
Subject: [PATCH 07/13] Add support for docstrings; rename `annotations` field
 to `meta`

---
 rebar.config                  |  4 ++-
 src/avro_idl_lexer.xrl        | 38 ++++++++++++++++++++---
 src/avro_idl_parser.yrl       | 57 ++++++++++++++++++++---------------
 src/idl.hrl                   | 14 ++++-----
 test/avro_idl_parse_tests.erl | 52 +++++++++++++++++++-------------
 test/data/annotations.avdl    | 15 ++++++++-
 6 files changed, 121 insertions(+), 59 deletions(-)

diff --git a/rebar.config b/rebar.config
index 9f49d6c..c5a0be6 100644
--- a/rebar.config
+++ b/rebar.config
@@ -3,7 +3,7 @@
 %                       , warnings_as_errors
                        , {d,'NOTEST'}
                        ]}.
-{eunit_opts,           [verbose]}.
+%% {eunit_opts,           [verbose]}.
 {xref_checks,          [ undefined_function_calls
                        , deprecated_function_calls
                        ]}.
@@ -15,3 +15,5 @@
 {cover_opts, [verbose]}.
 {cover_enabled, true}.
 {cover_export_enabled, true}.
+
+%% {yrl_opts, [{verbose, true}]}.
diff --git a/src/avro_idl_lexer.xrl b/src/avro_idl_lexer.xrl
index 6c54997..274a1ee 100644
--- a/src/avro_idl_lexer.xrl
+++ b/src/avro_idl_lexer.xrl
@@ -14,10 +14,6 @@ Rules.
 
 //[^\r\n]* : {token, {comment_v, TokenLine, unescape_line_comment(TokenChars)}}.
 
-%% `/**` is a docstring for the following object
-/\*\*(.|[\r\n])*\*/ : {token, {doc_v, TokenLine, unescape_multiline_comment(TokenChars)}}.
-/\*(.|[\r\n])*\*/ : {token, {comment_v, TokenLine, unescape_multiline_comment(TokenChars)}}.
-
 \{ : {token, {'{', TokenLine}}.
 \} : {token, {'}', TokenLine}}.
 \( : {token, {'(', TokenLine}}.
@@ -63,9 +59,41 @@ error|throws|oneway|void|import|idl|protocol|schema : {token, {list_to_atom(Toke
 %% namespaced will only be allowed in data type spec
 [A-Za-z_][A-Za-z0-9_]+(\.[A-Za-z_][A-Za-z0-9_]+)+ : {token, {ns_id, TokenLine, TokenChars}}.
 
-Erlang code.
+%% https://blog.ostermiller.org/finding-comments-in-source-code-using-regular-expressions/
+%% `/** .. */` is a docstring for the following object
+(/\*\*([^*]|[\r\n]|(\*+([^*/]|[\r\n])))*\*+/)  : {token, {doc_v, TokenLine, unescape_multiline_comment(TokenChars)}}.
+%% `/* .. */` is just a comment
+(/\*([^*]|[\r\n]|(\*+([^*/]|[\r\n])))*\*+/) : {token, {comment_v, TokenLine, unescape_multiline_comment(TokenChars)}}.
 
 
+Erlang code.
+-export([preprocess/2]).
+
+%% Api helpers
+
+-spec preprocess(Tokens, [drop_comments | trim_doc]) -> Tokens when
+      Tokens :: [tuple()].
+preprocess(Tokens, Actions) ->
+    lists:foldl(fun do_preprocess/2, Tokens, Actions).
+
+do_preprocess(drop_comments, T) ->
+    lists:filter(
+      fun({comment_v, _, _}) -> false;
+         (_) -> true
+      end, T);
+do_preprocess(trim_doc, T) ->
+    lists:map(
+      fun({doc_v, Loc, Val}) ->
+              {doc_v, Loc, trim_doc(Val)};
+         (Tok) -> Tok
+      end, T).
+
+trim_doc(Doc) ->
+    re:replace(Doc, "^[\\s\\*]*((?U).*)[\\s]*$", "\\1",
+               [global, multiline, {return, list}]).
+
+%% Lexer internal helpers
+
 unescape(Token, Char) ->
     string:strip(Token, both, Char).
 
diff --git a/src/avro_idl_parser.yrl b/src/avro_idl_parser.yrl
index 75e48b1..1e9e38f 100644
--- a/src/avro_idl_parser.yrl
+++ b/src/avro_idl_parser.yrl
@@ -1,8 +1,6 @@
 %% @doc Avro IDL parser
-%% https://avro.apache.org/docs/current/idl.html
-%% XXX: all `comment_v` tockens should be filtered-out before parsing!
-%% TODO: docstrings
-%% TODO: better annotations support
+%% https://avro.apache.org/docs/1.9.2/idl.html
+%% XXX: all `comment_v` tokens should be filtered-out before parsing!
 
 Header "%% Hello".
 
@@ -14,7 +12,7 @@ Terminals id ns_id null string_v doc_v float_v integer_v bool_v annotation_v
 
 Nonterminals
     protocol
-    annotations annotation annotation_value string array_of_strings array_of_strings_tail
+    meta meta_item annotation annotation_value string array_of_strings array_of_strings_tail
     declaration declaration_tail
     import import_file_type
     record record_field record_field_name record_tail
@@ -30,6 +28,10 @@ Nonterminals
 
 Rootsymbol protocol.
 
+%% There are 2 shift/reduce conflicts expected due to ambiguity in
+%% meta / meta_item that is automatically correctly resolved with shift.
+%% See https://www.gnu.org/software/bison/manual/html_node/Shift_002fReduce.html
+Expect 2.
 
 protocol ->
     protocol_k id '{' '}' :
@@ -38,18 +40,25 @@ protocol ->
     protocol_k id '{' declaration declaration_tail :
         #protocol{name = value_of('$2'), definitions = ['$4' | '$5']}.
 protocol ->
-    annotations protocol :
-        ('$2')#protocol{annotations = '$1'}.
+    meta protocol :
+        ('$2')#protocol{meta = '$1'}.
 
 
 %% == Annotation ==
-annotations ->
-    annotation :
+meta ->
+    meta_item :
         ['$1'].
-annotations ->
-    annotation annotations :
+meta ->
+    meta_item meta :
         ['$1' | '$2'].
 
+meta_item ->
+    annotation :
+        '$1'.
+meta_item ->
+    doc_v :
+        {doc, value_of('$1')}.
+
 annotation ->
     annotation_v '(' annotation_value ')' :
         #annotation{name = value_of('$1'), value = '$3'}.
@@ -113,8 +122,8 @@ enum ->
     enum_t id '{' id enum_variants :
         #enum{name = value_of('$2'), variants = [value_of('$4') | '$5']}.
 enum ->
-    annotations enum :
-        ('$2')#enum{annotations = '$1'}.
+    meta enum :
+        ('$2')#enum{meta = '$1'}.
 
 enum_variants ->
     '}' :
@@ -127,16 +136,16 @@ fixed ->
     fixed_t id '(' integer_v ')' ';':
         #fixed{name = value_of('$2'), size = value_of('$4')}.
 fixed ->
-    annotations fixed :
-        ('$2')#fixed{annotations = '$1'}.
+    meta fixed :
+        ('$2')#fixed{meta = '$1'}.
 
 %% -- Error typedef
 error ->
     error_k id '{' record_field record_tail :
         #error{name = value_of('$2'), fields = ['$4' | '$5']}.
 error ->
-    annotations error :
-        ('$2')#error{annotations = '$1'}.
+    meta error :
+        ('$2')#error{meta = '$1'}.
 
 %% -- Record
 
@@ -144,8 +153,8 @@ record ->
     record_t id '{' record_field record_tail :
         #record{name = value_of('$2'), fields = ['$4' | '$5']}.
 record ->
-    annotations record :
-        ('$2')#record{annotations = '$1'}.
+    meta record :
+        ('$2')#record{meta = '$1'}.
 
 record_tail ->
     '}' :
@@ -156,20 +165,20 @@ record_tail ->
 
 record_field ->
     type record_field_name ';' :
-        #field{name = element(1, '$2'), annotations = element(2, '$2'), type = '$1'}.
+        #field{name = element(1, '$2'), meta = element(2, '$2'), type = '$1'}.
 record_field ->
     type record_field_name '=' data ';' :
-        #field{name = element(1, '$2'), annotations = element(2, '$2'),
+        #field{name = element(1, '$2'), meta = element(2, '$2'),
                type = '$1', default = '$4'}.
 record_field ->
-    annotations record_field :
-        ('$2')#field{annotations = '$1' ++ ('$2')#field.annotations}.
+    meta record_field :
+        ('$2')#field{meta = '$1' ++ ('$2')#field.meta}.
 
 record_field_name ->
     id :
         {value_of('$1'), []}.
 record_field_name ->
-    annotations id :
+    meta id :
         {value_of('$2'), '$1'}.
 
 type -> primitive_t : value_of('$1').
diff --git a/src/idl.hrl b/src/idl.hrl
index cd37ed0..7b3a9e6 100644
--- a/src/idl.hrl
+++ b/src/idl.hrl
@@ -1,6 +1,6 @@
 -record(protocol,
         {name,
-         annotations = [],
+         meta = [],
          definitions = []}).
 
 -record(annotation,
@@ -9,33 +9,33 @@
 
 -record(enum,
         {name,
-         annotations = [],
+         meta = [],
          variants = []}).
 
 -record(fixed,
         {name,
-         annotations = [],
+         meta = [],
          size}).
 
 -record(error,
         {name,
-         annotations = [],
+         meta = [],
          fields = []}).
 
 -record(record,
         {name,
-         annotations = [],
+         meta = [],
          fields = []}).
 
 -record(field,
         {name,
-         annotations = [],
+         meta = [],
          type,
          default}).
 
 -record(function,
         {name,
-         %% annotations = [],
+         %% meta = [],
          arguments = [],
          return,
          extra}).
diff --git a/test/avro_idl_parse_tests.erl b/test/avro_idl_parse_tests.erl
index 998751c..cbca2df 100644
--- a/test/avro_idl_parse_tests.erl
+++ b/test/avro_idl_parse_tests.erl
@@ -31,42 +31,52 @@ parse_annotations_test() ->
     ?assertEqual(
        #protocol{
           name = "MyProto",
-          annotations =
-              [#annotation{name = "version",
+          meta =
+              [{doc, "My protocol"},
+               {doc, "No, really\nIt's some multiline doc\n"
+                "bullet points will be stripped\nso no unordered lists"},
+               #annotation{name = "version",
                            value = "1.0"},
                #annotation{name = "aliases",
                            value = ["ns.Proto1", "ns.Proto2"]}
               ],
           definitions =
               [#enum{name = "MyEnum",
-                     annotations =
-                         [#annotation{name = "namespace",
+                     meta =
+                         [{doc, "My enum"},
+                          #annotation{name = "namespace",
                                       value = "enums"}],
                      variants = ["A", "B", "C"]},
                #fixed{name = "MyFixed",
-                      annotations =
-                          [#annotation{name = "namespace",
+                      meta =
+                          [{doc, "My Fixed"},
+                           #annotation{name = "namespace",
                                        value = "fixeds"}],
                      size = 16},
                #error{name = "MyError",
-                      annotations =
-                          [#annotation{name = "namespace",
+                      meta =
+                          [{doc, "My Error"},
+                           #annotation{name = "namespace",
                                        value = "errors"}],
                       fields =
                           [#field{name = "my_err_field",
-                                  annotations =
-                                      [#annotation{name = "order",
+                                  meta =
+                                      [{doc, "My Err Field"},
+                                       #annotation{name = "order",
                                                    value = "ignore"}],
                                   type = string}]},
                #record{name = "MyRecord",
-                       annotations =
-                           [#annotation{name = "namespace",
+                       meta =
+                           [{doc, "My Record"},
+                            #annotation{name = "namespace",
                                         value = "records"}],
                        fields =
                            [#field{name = "my_record_field",
-                                   annotations =
-                                       [#annotation{name = "order",
+                                   meta =
+                                       [{doc, "My Rec Field Type"},
+                                        #annotation{name = "order",
                                                     value = "ignore"},
+                                        {doc, "My Rec Field"},
                                         #annotation{name = "aliases",
                                                     value = ["my_alias"]}],
                                    type = string}]}]
@@ -76,6 +86,9 @@ parse_annotations_test() ->
 full_protocol_test() ->
     ?assertMatch(
       #protocol{name = "Simple",
+                meta =
+                    [{doc, "An example protocol in Avro IDL"},
+                     #annotation{}],
                 definitions =
                     [#enum{name = "Kind"},
                      #fixed{name = "MD5"},
@@ -111,11 +124,8 @@ protocol_with_typedeffs_test() ->
 parse_idl(Name) ->
     File = "test/data/" ++ Name ++ ".avdl",
     {ok, B} = file:read_file(File),
-    {ok, T, _} =  avro_idl_lexer:string(binary_to_list(B)),
-    NoComments = lists:filter(
-                  fun({doc_v, _, _}) -> false;
-                     ({comment_v, _, _}) -> false;
-                     (_) -> true
-                  end, T),
-    {ok, Tree} = avro_idl_parser:parse(NoComments),
+    {ok, T0, _} =  avro_idl_lexer:string(binary_to_list(B)),
+    %% ?debugFmt("Name: ~p~nTokens:~n~p", [Name, T0]),
+    T = avro_idl_lexer:preprocess(T0, [drop_comments, trim_doc]),
+    {ok, Tree} = avro_idl_parser:parse(T),
     Tree.
diff --git a/test/data/annotations.avdl b/test/data/annotations.avdl
index a94a705..09c6db6 100644
--- a/test/data/annotations.avdl
+++ b/test/data/annotations.avdl
@@ -1,23 +1,36 @@
+/** My protocol */
+/** No, really
+ * It's some multiline doc
+ *
+ * * bullet points will be stripped
+ * * so no unordered lists
+ */
 @version("1.0")
 @aliases(["ns.Proto1", "ns.Proto2"])
 protocol MyProto {
+    /** My enum */
     @namespace("enums")
     enum MyEnum {
         A, B, C
     }
 
+    /** My Fixed */
     @namespace("fixeds")
     fixed MyFixed(16);
 
+    /** My Error */
     @namespace("errors")
     error MyError {
+        /** My Err Field */
         @order("ignore")
         string my_err_field;
     }
 
+    /** My Record */
     @namespace("records")
     record MyRecord {
+        /** My Rec Field Type */
         @order("ignore")
-        string @aliases(["my_alias"]) my_record_field;
+        string /** My Rec Field */@aliases(["my_alias"]) my_record_field;
     }
 }

From e8c8739275b94ed08632f171f2d9c49f7c446a5f Mon Sep 17 00:00:00 2001
From: Sergey Prokhorov <sergey.prokhorov@klarna.com>
Date: Tue, 10 Mar 2020 19:13:37 +0100
Subject: [PATCH 08/13] IDL to avpr/avsc converter; support multiple `throw`
 types

---
 rebar.config                          |   2 +-
 src/avro_idl.erl                      | 173 ++++++++++++++++++++++++++
 src/avro_idl_parser.yrl               |  17 ++-
 src/idl.hrl                           |   6 +-
 test/avro_idl_parse_tests.erl         |  77 +++++++-----
 test/avro_idl_tests.erl               | 125 +++++++++++++++++++
 test/data/annotations.avdl            |   3 +
 test/data/protocol_with_typedefs.avdl |   4 +-
 8 files changed, 369 insertions(+), 38 deletions(-)
 create mode 100644 src/avro_idl.erl
 create mode 100644 test/avro_idl_tests.erl

diff --git a/rebar.config b/rebar.config
index c5a0be6..b792ee1 100644
--- a/rebar.config
+++ b/rebar.config
@@ -16,4 +16,4 @@
 {cover_enabled, true}.
 {cover_export_enabled, true}.
 
-%% {yrl_opts, [{verbose, true}]}.
+{yrl_opts, [{verbose, true}]}.
diff --git a/src/avro_idl.erl b/src/avro_idl.erl
new file mode 100644
index 0000000..122b3eb
--- /dev/null
+++ b/src/avro_idl.erl
@@ -0,0 +1,173 @@
+-module(avro_idl).
+
+-export([new_context/1,
+         str_to_avpr/2,
+         protocol_to_avpr/2,
+         typedecl_to_avsc/2]).
+-include("idl.hrl").
+
+-record(st, {cwd}).
+
+new_context(Cwd) ->
+    #st{cwd = Cwd}.
+
+str_to_avpr(String, Cwd) ->
+    str_to_avpr(String, Cwd, [drop_comments, trim_doc]).
+
+str_to_avpr(String, Cwd, Opts) ->
+    {ok, T0, _} =  avro_idl_lexer:string(String),
+    T = avro_idl_lexer:preprocess(T0, Opts),
+    {ok, Tree} = avro_idl_parser:parse(T),
+    protocol_to_avpr(Tree, new_context(Cwd)).
+
+protocol_to_avpr(#protocol{name = Name,
+                           meta = Meta,
+                           definitions = Defs0}, St) ->
+    Defs = process_imports(Defs0, St),
+    {Types, Messages} =
+        lists:partition(fun(#function{}) -> false;
+                           (_) -> true
+                        end, Defs),
+    Protocol0 =
+        #{protocol => Name,
+          types =>
+              lists:map(
+                fun(Type) ->
+                        typedecl_to_avsc(Type, St)
+                end, Types),
+          messages =>
+              lists:map(
+                fun(Message) ->
+                        message_to_avsc(Message, St)
+                end, Messages)
+         },
+    meta(Protocol0, Meta).
+
+process_imports(Defs, _St) ->
+    %% TODO
+    lists:filter(fun({import, _, _}) -> false;
+                    (_) -> true
+                 end, Defs).
+
+typedecl_to_avsc(#enum{name = Name, meta = Meta, variants = Vars}, _St) ->
+    meta(
+      #{type => enum,
+        name => Name,
+        variants => Vars
+       },
+      Meta);
+typedecl_to_avsc(#fixed{name = Name, meta = Meta, size = Size}, _St) ->
+    meta(
+      #{type => fixed,
+        name => Name,
+        size => Size},
+      Meta);
+typedecl_to_avsc(#error{name = Name, meta = Meta, fields = Fields}, St) ->
+    meta(
+      #{type => error,
+        name => Name,
+        fields => [field_to_avsc(Field, St) || Field <- Fields]},
+      Meta);
+typedecl_to_avsc(#record{name = Name, meta = Meta, fields = Fields}, St) ->
+    meta(
+      #{type => record,
+        name => Name,
+        fields => [field_to_avsc(Field, St) || Field <- Fields]},
+      Meta).
+
+field_to_avsc(#field{name = Name, meta = Meta,
+                     type = Type, default = Default}, St) ->
+    meta(
+      default(
+        #{name => Name,
+          type => type_to_avsc(Type, St)},
+        Default),         % TODO: maybe validate default matches type
+      Meta).
+
+message_to_avsc(#function{name = Name, meta = Meta,
+                          arguments = Args, return = Return,
+                          extra = Extra}, St) ->
+    %% TODO: arguments can just reuse `#field{}`
+    ArgsSchema =
+        [default(
+           #{name => ArgName,
+             type => type_to_avsc(Type, St)},
+           Default)
+         || {arg, ArgName, Type, Default} <- Args],
+    Schema0 =
+        #{name => Name,
+          request => ArgsSchema,
+          response => type_to_avsc(Return, St)},
+    Schema1 = case Extra of
+                  undefined -> Schema0;
+                  oneway ->
+                      Schema0#{'one-way' => true};
+                  {throws, ThrowsTypes} ->
+                      %% Throws = [type_to_avsc(TType, St)
+                      %%           || TType <- ThrowsTypes],
+                      Schema0#{error => ThrowsTypes}
+              end,
+    meta(Schema1, Meta).
+
+
+type_to_avsc(void, _St) ->
+    null;
+type_to_avsc(null, _St) ->
+    null;
+type_to_avsc(T, _St) when T == int;
+                          T == long;
+                          T == string;
+                          T == boolean;
+                          T == float;
+                          T == double;
+                          T == bytes ->
+    T;
+type_to_avsc({decimal, Precision, Scale}, _St) ->
+    #{type => bytes,
+      'logicalType' => "decimal",
+      precision => Precision,
+      scale => Scale};
+type_to_avsc(date, _St) ->
+    #{type => int,
+      'logicalType' => "date"};
+type_to_avsc(time_ms, _St) ->
+    #{type => int,
+      'logicalType' => "time-millis"};
+type_to_avsc(timestamp_ms, _St) ->
+    #{type => long,
+      'logicalType' => "timestamp-millis"};
+type_to_avsc({custom, Id}, _St) ->
+    Id;
+type_to_avsc({union, Types}, St) ->
+    [type_to_avsc(Type, St) || Type <- Types];
+type_to_avsc({array, Of}, St) ->
+    #{type => array,
+      items => type_to_avsc(Of, St)};
+type_to_avsc({map, ValType}, St) ->
+    #{type => map,
+      values => type_to_avsc(ValType, St)}.
+
+meta(Schema, Meta) ->
+    {Docs, Annotations} =
+        lists:partition(
+          fun({doc, _}) -> true;
+             (#annotation{}) -> false
+          end, Meta),
+    Schema1 = case Docs of
+                  [] -> Schema;
+                  _ ->
+                      DocStrings = [S || {doc, S} <- Docs],
+                      Schema#{"doc" => lists:flatten(lists:join(
+                                                       "\n", DocStrings))}
+              end,
+    lists:foldl(
+     fun(#annotation{name = Name, value = Value}, Schema2) ->
+             maps:is_key(Name, Schema2) andalso
+                 error({duplicate_annotation, Name, Value, Schema2}),
+             Schema2#{Name => Value}
+     end, Schema1, Annotations).
+
+default(Obj, undefined) ->
+    Obj;
+default(Obj, Default) ->
+    Obj#{default => Default}.
diff --git a/src/avro_idl_parser.yrl b/src/avro_idl_parser.yrl
index 1e9e38f..d2c33b0 100644
--- a/src/avro_idl_parser.yrl
+++ b/src/avro_idl_parser.yrl
@@ -23,7 +23,7 @@ Nonterminals
     fixed
     array
     map
-    function fun_return fun_arguments fun_argument fun_extra
+    function fun_return fun_arguments fun_argument fun_extra throws
     data array_of_data array_of_data_tail map_of_data map_of_data_tail.
 
 Rootsymbol protocol.
@@ -111,7 +111,7 @@ declaration -> function : '$1'.
 
 import ->
     import_k import_file_type string_v ';' :
-        {import, '$2', value_of('$3')}.
+        #import{type = '$2', file_path = value_of('$3')}.
 
 import_file_type -> idl_k : idl.
 import_file_type -> protocol_k : protocol.
@@ -223,6 +223,9 @@ map ->
 function ->
     fun_return id '(' fun_arguments ')' fun_extra ';' :
         #function{name = value_of('$2'), arguments = '$4', return = '$1', extra = '$6'}.
+function ->
+    doc_v function :
+        ('$2')#function{meta = [{doc, value_of('$1')}]}.
 
 fun_return -> type : '$1'.
 fun_return -> void_k : void.
@@ -247,12 +250,18 @@ fun_argument ->
 fun_extra ->
     '$empty' : undefined.
 fun_extra ->
-    throws_k id :
-        {throws, value_of('$2')}.
+    throws_k id throws :
+        {throws, [value_of('$2') | '$3']}.
 fun_extra ->
     oneway_k :
         oneway.
 
+throws ->
+    '$empty' :
+        [].
+throws ->
+    ',' id throws:
+        [value_of('$2') | '$3'].
 
 %% == Data (JSON) for default values
 data -> string_v : value_of('$1').
diff --git a/src/idl.hrl b/src/idl.hrl
index 7b3a9e6..e5a9b8f 100644
--- a/src/idl.hrl
+++ b/src/idl.hrl
@@ -7,6 +7,10 @@
         {name,
          value}).
 
+-record(import,
+        {type,
+         file_path}).
+
 -record(enum,
         {name,
          meta = [],
@@ -35,7 +39,7 @@
 
 -record(function,
         {name,
-         %% meta = [],
+         meta = [],
          arguments = [],
          return,
          extra}).
diff --git a/test/avro_idl_parse_tests.erl b/test/avro_idl_parse_tests.erl
index cbca2df..09128fb 100644
--- a/test/avro_idl_parse_tests.erl
+++ b/test/avro_idl_parse_tests.erl
@@ -1,22 +1,4 @@
-%% coding: latin-1
-%%%-------------------------------------------------------------------
-%%% Copyright (c) 2013-2018 Klarna AB
-%%%
-%%% This file is provided to you under the Apache License,
-%%% Version 2.0 (the "License"); you may not use this file
-%%% except in compliance with the License.  You may obtain
-%%% a copy of the License at
-%%%
-%%%   http://www.apache.org/licenses/LICENSE-2.0
-%%%
-%%% Unless required by applicable law or agreed to in writing,
-%%% software distributed under the License is distributed on an
-%%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-%%% KIND, either express or implied.  See the License for the
-%%% specific language governing permissions and limitations
-%%% under the License.
-%%%
-%%%-------------------------------------------------------------------
+%% @doc Tests for IDL lexer + parser
 -module(avro_idl_parse_tests).
 
 -include("../src/idl.hrl").
@@ -79,7 +61,12 @@ parse_annotations_test() ->
                                         {doc, "My Rec Field"},
                                         #annotation{name = "aliases",
                                                     value = ["my_alias"]}],
-                                   type = string}]}]
+                                   type = string}]},
+              #function{name = "hello",
+                        meta = [{doc, "My Fun"}],
+                        arguments = [],
+                        return = string,
+                        extra = undefined}]
          },
        parse_idl("annotations")).
 
@@ -102,23 +89,53 @@ full_protocol_test() ->
                      #function{name = "ping"}]},
       parse_idl("full_protocol")).
 
-protocol_with_typedeffs_test() ->
+protocol_with_typedefs_test() ->
     ?assertMatch(
       #protocol{name = "MyProto",
                 definitions =
-                    [{import, idl, "foo.avdl"},
-                     {import, protocol, "bar.avpr"},
-                     {import, schema, "baz.avsc"},
+                    [#import{type = idl, file_path = "foo.avdl"},
+                     #import{type = protocol, file_path = "bar.avpr"},
+                     #import{type = schema, file_path = "baz.avsc"},
                      #enum{name = "MyEnum1"},
                      #enum{name = "MyEnum2"},
                      #fixed{name = "MyFix"},
-                     #record{name = "MyRec"},
-                     #record{name = "MyAnnotated"},
+                     #record{name = "MyRec",
+                             fields =
+                                 [#field{name = "my_int", type = int},
+                                  #field{name = "my_string", type = string},
+                                  #field{name = "my_float", type = float},
+                                  #field{name = "my_bool", type = boolean,
+                                         default = false},
+                                  #field{name = "my_custom",
+                                         type = {custom, "MyFix"}},
+                                  #field{name = "my_union",
+                                         type = {union, [boolean, null]},
+                                         default = null},
+                                  #field{name = "my_date",
+                                         type = date},
+                                  #field{name = "my_decimal",
+                                         type = {decimal, 5, 2}},
+                                  #field{name = "my_int_array",
+                                         type = {array, int}},
+                                  #field{},
+                                  #field{},
+                                  #field{name = "my_map",
+                                         type = {map, float}}
+                                 ]},
+                     #record{name = "MyAnnotated",
+                             fields =
+                                 [#field{
+                                     name = "error",
+                                     type = {custom,
+                                             "org.erlang.www.MyError"}}
+                                 ]},
                      #error{name = "MyError"},
-                     #function{name = "mul"},
-                     #function{name = "append"},
-                     #function{name = "gen_server_cast"},
-                     #function{name = "ping"}]},
+                     #function{name = "div",
+                               extra = {throws, ["DivisionByZero"]}},
+                     #function{name = "append",
+                               extra = {throws, ["MyError", "TheirError"]}},
+                     #function{name = "gen_server_cast", extra = oneway},
+                     #function{name = "ping", extra = undefined}]},
        parse_idl("protocol_with_typedefs")).
 
 parse_idl(Name) ->
diff --git a/test/avro_idl_tests.erl b/test/avro_idl_tests.erl
new file mode 100644
index 0000000..f90475e
--- /dev/null
+++ b/test/avro_idl_tests.erl
@@ -0,0 +1,125 @@
+%% @doc Tests for IDL converter / loader
+-module(avro_idl_tests).
+
+-include("../src/idl.hrl").
+-include_lib("eunit/include/eunit.hrl").
+
+
+empty_protocol_avpr_test() ->
+    ?assertEqual(
+       #{protocol => "MyProto",
+         types => [],
+         messages => []},
+       idl_to_avpr("empty_protocol")).
+
+
+annotations_avpr_test() ->
+    ?assertEqual(
+      #{"doc" => ("My protocol\nNo, really\nIt's some multiline doc\n"
+                  "bullet points will be stripped\nso no unordered lists"),
+        "version" => "1.0",
+        "aliases" => ["ns.Proto1", "ns.Proto2"],
+        protocol => "MyProto",
+        types =>
+            [#{"doc" => "My enum",
+               "namespace" => "enums",
+               type => enum,
+               name => "MyEnum",
+               variants => ["A", "B", "C"]},
+             #{"doc" => "My Fixed",
+               "namespace" => "fixeds",
+               type => fixed,
+               name => "MyFixed",
+               size => 16},
+             #{"doc" => "My Error",
+               "namespace" => "errors",
+               type => error,
+               name => "MyError",
+               fields =>
+                   [#{"doc" => "My Err Field",
+                      "order" => "ignore",
+                      type => string,
+                      name => "my_err_field"}]},
+             #{"doc" => "My Record",
+               "namespace" => "records",
+               type => record,
+               name => "MyRecord",
+               fields =>
+                   [#{"doc" => "My Rec Field Type\nMy Rec Field",
+                      "order" => "ignore",
+                      "aliases" => ["my_alias"],
+                      type => string,
+                      name => "my_record_field"}]}],
+        messages =>
+            [#{"doc" => "My Fun",
+               name => "hello",
+               request => [],
+               response => string}]
+       },
+      idl_to_avpr("annotations")).
+
+
+full_protocol_avpr_test() ->
+    ?assertMatch(
+       #{},
+      idl_to_avpr("full_protocol")).
+
+
+protocol_with_typedefs_avpr_test() ->
+    ?assertMatch(
+      #{"namespace" := "org.erlang.www",
+        protocol := "MyProto",
+        types :=
+            [#{name := "MyEnum1"},
+             #{name := "MyEnum2",
+               type := enum,
+               variants := ["VAR21", "VAR22", "VAR23"]},
+             #{name := "MyFix",
+               type := fixed,
+               size := 10},
+             #{name := "MyRec",
+               fields :=
+                   [#{type := int},
+                    #{type := string},
+                    #{type := float},
+                    #{type := boolean},
+                    #{type := "MyFix"},
+                    #{type := [boolean, null]},
+                    #{type := #{type := int, 'logicalType' := "date"}},
+                    #{type := #{type := bytes, precision := 5, scale := 2}},
+                    #{type := #{type := array, items := int}},
+                    #{type := #{type := array, items := int}},
+                    #{type := #{type := array, items := string}},
+                    #{type := #{type := map, values := float}}]
+              },
+             #{name := "MyAnnotated",
+               "namespace" := "org.erlang.ftp",
+               fields :=
+                   [#{name := "error",
+                      type := "org.erlang.www.MyError"}]},
+             #{name := "MyError",
+               fields :=
+                   [#{type := "MyEnum2"},
+                    #{type := string}]}],
+        messages :=
+            [#{name := "div"},
+             #{name := "append"},
+             #{name := "gen_server_cast"},
+             #{name := "ping"}]},
+       idl_to_avpr("protocol_with_typedefs")).
+
+%% Helpers
+
+idl_to_avpr(Name) ->
+    ProtocolTree = parse_idl(Name),
+    avro_idl:protocol_to_avpr(ProtocolTree,
+                             avro_idl:new_context("")).
+
+parse_idl(Name) ->
+    File = "test/data/" ++ Name ++ ".avdl",
+    {ok, B} = file:read_file(File),
+    {ok, T0, _} =  avro_idl_lexer:string(binary_to_list(B)),
+    %% ?debugFmt("Name: ~p~nTokens:~n~p", [Name, T0]),
+    T = avro_idl_lexer:preprocess(T0, [drop_comments, trim_doc]),
+    {ok, Tree} = avro_idl_parser:parse(T),
+    Tree.
diff --git a/test/data/annotations.avdl b/test/data/annotations.avdl
index 09c6db6..e081f1f 100644
--- a/test/data/annotations.avdl
+++ b/test/data/annotations.avdl
@@ -33,4 +33,7 @@ protocol MyProto {
         @order("ignore")
         string /** My Rec Field */@aliases(["my_alias"]) my_record_field;
     }
+
+    /** My Fun */
+    string hello();
 }
diff --git a/test/data/protocol_with_typedefs.avdl b/test/data/protocol_with_typedefs.avdl
index b428d2d..e9c035a 100644
--- a/test/data/protocol_with_typedefs.avdl
+++ b/test/data/protocol_with_typedefs.avdl
@@ -40,9 +40,9 @@ protocol MyProto {
       string description;
    }
 
-   float mul(int arg1, float arg2 = 1.0);
+   float div(int arg1, float arg2 = 1.0) throws DivisionByZero;
 
-   MyFix append(bytes arg1, string arg2 = "tail") throws MyError;
+   MyFix append(bytes arg1, string arg2 = "tail") throws MyError, TheirError;
 
    void gen_server_cast(map<float> opts) oneway;
 

From 795be3852c706329ba5e75fb8dae9d1d653e7c84 Mon Sep 17 00:00:00 2001
From: Sergey Prokhorov <seriy.pr@gmail.com>
Date: Sat, 14 Mar 2020 22:41:30 +0100
Subject: [PATCH 09/13] Better test coverage

---
 test/avro_idl_parse_tests.erl         |  4 ++++
 test/avro_idl_tests.erl               | 22 ++++++++++++----------
 test/data/annotations.avdl            | 10 ++++++----
 test/data/protocol_with_typedefs.avdl |  2 ++
 4 files changed, 24 insertions(+), 14 deletions(-)

diff --git a/test/avro_idl_parse_tests.erl b/test/avro_idl_parse_tests.erl
index 09128fb..24fece5 100644
--- a/test/avro_idl_parse_tests.erl
+++ b/test/avro_idl_parse_tests.erl
@@ -113,6 +113,10 @@ protocol_with_typedefs_test() ->
                                          default = null},
                                   #field{name = "my_date",
                                          type = date},
+                                  #field{name = "my_time",
+                                         type = time_ms},
+                                  #field{name = "my_timestamp",
+                                         type = timestamp_ms},
                                   #field{name = "my_decimal",
                                          type = {decimal, 5, 2}},
                                   #field{name = "my_int_array",
diff --git a/test/avro_idl_tests.erl b/test/avro_idl_tests.erl
index f90475e..148392f 100644
--- a/test/avro_idl_tests.erl
+++ b/test/avro_idl_tests.erl
@@ -86,6 +86,9 @@ protocol_with_typedefs_avpr_test() ->
                     #{type := "MyFix"},
                     #{type := [boolean, null]},
                     #{type := #{type := int, 'logicalType' := "date"}},
+                    #{type := #{type := int, 'logicalType' := "time-millis"}},
+                    #{type := #{type := long,
+                                'logicalType' := "timestamp-millis"}},
                     #{type := #{type := bytes, precision := 5, scale := 2}},
                     #{type := #{type := array, items := int}},
                     #{type := #{type := array, items := int}},
@@ -108,18 +111,17 @@ protocol_with_typedefs_avpr_test() ->
              #{name := "ping"}]},
        idl_to_avpr("protocol_with_typedefs")).
 
+
+duplicate_annotation_test() ->
+    ?assertError(
+       {duplicate_annotation, "my_decorator", _, _},
+       avro_idl:str_to_avpr(
+         "@my_decorator(\"a\") @my_decorator(\"b\") protocol MyProto{}", "")
+      ).
+
 %% Helpers
 
 idl_to_avpr(Name) ->
-    ProtocolTree = parse_idl(Name),
-    avro_idl:protocol_to_avpr(ProtocolTree,
-                             avro_idl:new_context("")).
-
-parse_idl(Name) ->
     File = "test/data/" ++ Name ++ ".avdl",
     {ok, B} = file:read_file(File),
-    {ok, T0, _} =  avro_idl_lexer:string(binary_to_list(B)),
-    %% ?debugFmt("Name: ~p~nTokens:~n~p", [Name, T0]),
-    T = avro_idl_lexer:preprocess(T0, [drop_comments, trim_doc]),
-    {ok, Tree} = avro_idl_parser:parse(T),
-    Tree.
+    avro_idl:str_to_avpr(binary_to_list(B), "").
diff --git a/test/data/annotations.avdl b/test/data/annotations.avdl
index e081f1f..8bdcdc3 100644
--- a/test/data/annotations.avdl
+++ b/test/data/annotations.avdl
@@ -10,10 +10,12 @@
 protocol MyProto {
     /** My enum */
     @namespace("enums")
-    enum MyEnum {
+    enum MyEnum {               // my inline comment
         A, B, C
     }
-
+    /*
+     * My multiline comment
+     */
     /** My Fixed */
     @namespace("fixeds")
     fixed MyFixed(16);
@@ -23,14 +25,14 @@ protocol MyProto {
     error MyError {
         /** My Err Field */
         @order("ignore")
-        string my_err_field;
+        string my_err_field;    // other inline
     }
 
     /** My Record */
     @namespace("records")
     record MyRecord {
         /** My Rec Field Type */
-        @order("ignore")
+        @order("ignore") /* other multiline */
         string /** My Rec Field */@aliases(["my_alias"]) my_record_field;
     }
 
diff --git a/test/data/protocol_with_typedefs.avdl b/test/data/protocol_with_typedefs.avdl
index e9c035a..73f25ec 100644
--- a/test/data/protocol_with_typedefs.avdl
+++ b/test/data/protocol_with_typedefs.avdl
@@ -23,6 +23,8 @@ protocol MyProto {
       MyFix my_custom;
       union {boolean, null} my_union = null;
       date my_date = 123456;
+      time_ms my_time = 23456;
+      timestamp_ms my_timestamp = 3456;
       decimal(5, 2) my_decimal = 1222;
 
       array<int> my_int_array;

From 8142a7109c5d5c435dcafee9a0149612883e9159 Mon Sep 17 00:00:00 2001
From: Sergey Prokhorov <seriy.pr@gmail.com>
Date: Sat, 14 Mar 2020 22:42:15 +0100
Subject: [PATCH 10/13] Optimize doc strip preprocessor by pre-compiling regexp

---
 src/avro_idl.erl       | 6 ++++++
 src/avro_idl_lexer.xrl | 9 ++++-----
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/src/avro_idl.erl b/src/avro_idl.erl
index 122b3eb..7507cea 100644
--- a/src/avro_idl.erl
+++ b/src/avro_idl.erl
@@ -1,3 +1,6 @@
+%%% @doc APIs to work with Avro IDL format
+%%%
+%%% See [https://avro.apache.org/docs/1.9.2/idl.html]
 -module(avro_idl).
 
 -export([new_context/1,
@@ -45,6 +48,9 @@ protocol_to_avpr(#protocol{name = Name,
 
 process_imports(Defs, _St) ->
     %% TODO
+    %% https://avro.apache.org/docs/1.9.2/spec.html#names
+    %% when importing definitions from avdl or avpr, copy namespaces from
+    %% protocol to definitions, if not specified
     lists:filter(fun({import, _, _}) -> false;
                     (_) -> true
                  end, Defs).
diff --git a/src/avro_idl_lexer.xrl b/src/avro_idl_lexer.xrl
index 274a1ee..d1cd5df 100644
--- a/src/avro_idl_lexer.xrl
+++ b/src/avro_idl_lexer.xrl
@@ -82,16 +82,15 @@ do_preprocess(drop_comments, T) ->
          (_) -> true
       end, T);
 do_preprocess(trim_doc, T) ->
+    {ok, Re} = re:compile("^[\\s\\*]*((?U).*)[\\s]*$", [multiline]),
     lists:map(
       fun({doc_v, Loc, Val}) ->
-              {doc_v, Loc, trim_doc(Val)};
+              Stripped = re:replace(Val, Re, "\\1",
+                                    [global, {return, list}]),
+              {doc_v, Loc, Stripped};
          (Tok) -> Tok
       end, T).
 
-trim_doc(Doc) ->
-    re:replace(Doc, "^[\\s\\*]*((?U).*)[\\s]*$", "\\1",
-               [global, multiline, {return, list}]).
-
 %% Lexer internal helpers
 
 unescape(Token, Char) ->

From 2b2406b60728462b4e0e9725adcc8959179aa894 Mon Sep 17 00:00:00 2001
From: Sergey Prokhorov <seriy.pr@gmail.com>
Date: Sat, 14 Mar 2020 23:15:04 +0100
Subject: [PATCH 11/13] Allow any type inside array and map; fix single-letter
 namespaced IDs

---
 src/avro_idl_lexer.xrl        |  2 +-
 src/avro_idl_parser.yrl       |  8 +++---
 test/avro_idl_parse_tests.erl | 46 ++++++++++++++++++++++++++++++++++-
 test/avro_idl_tests.erl       | 21 ++++++++++++++++
 4 files changed, 71 insertions(+), 6 deletions(-)

diff --git a/src/avro_idl_lexer.xrl b/src/avro_idl_lexer.xrl
index d1cd5df..6650125 100644
--- a/src/avro_idl_lexer.xrl
+++ b/src/avro_idl_lexer.xrl
@@ -57,7 +57,7 @@ error|throws|oneway|void|import|idl|protocol|schema : {token, {list_to_atom(Toke
 
 [A-Za-z_][A-Za-z0-9_]* : {token, {id, TokenLine, TokenChars}}.
 %% namespaced will only be allowed in data type spec
-[A-Za-z_][A-Za-z0-9_]+(\.[A-Za-z_][A-Za-z0-9_]+)+ : {token, {ns_id, TokenLine, TokenChars}}.
+[A-Za-z_][A-Za-z0-9_]*(\.[A-Za-z_][A-Za-z0-9_]*)+ : {token, {ns_id, TokenLine, TokenChars}}.
 
 %% https://blog.ostermiller.org/finding-comments-in-source-code-using-regular-expressions/
 %% `/** .. */` is a docstring for the following object
diff --git a/src/avro_idl_parser.yrl b/src/avro_idl_parser.yrl
index d2c33b0..2a8d0cd 100644
--- a/src/avro_idl_parser.yrl
+++ b/src/avro_idl_parser.yrl
@@ -210,13 +210,13 @@ union_tail ->
 
 %% -- Array typedef
 array ->
-    array_t '<' primitive_t '>' :
-        {array, value_of('$3')}.         %FIXME: not just primitives!
+    array_t '<' type '>' :
+        {array, '$3'}.
 
 %% -- Map typedef
 map ->
-    map_t '<' primitive_t '>' :
-        {map, value_of('$3')}.           %FIXME: not just primitives!
+    map_t '<' type '>' :
+        {map, '$3'}.
 
 %% == Function (message) definitions
 
diff --git a/test/avro_idl_parse_tests.erl b/test/avro_idl_parse_tests.erl
index 24fece5..e65ce31 100644
--- a/test/avro_idl_parse_tests.erl
+++ b/test/avro_idl_parse_tests.erl
@@ -142,10 +142,54 @@ protocol_with_typedefs_test() ->
                      #function{name = "ping", extra = undefined}]},
        parse_idl("protocol_with_typedefs")).
 
+array_types_test() ->
+    Probes =
+        [{int, "int"},
+         {{decimal, 1, 2}, "decimal(1, 2)"},
+         {null, "null"},
+         {{custom, "MyType"}, "MyType"},
+         {{custom, "my_ns.MyType"}, "my_ns.MyType"},
+         {{union, [int, null]}, "union{int, null}"},
+         {{array, int}, "array<int>"},
+         {{map, int}, "map<int>"}],
+    lists:foreach(
+     fun({ExpectType, IdlType}) ->
+             test_field_type({array, ExpectType}, "array<" ++ IdlType ++ ">")
+     end, Probes).
+
+map_types_test() ->
+    Probes =
+        [{int, "int"},
+         {{custom, "MyType"}, "MyType"},
+         {{array, int}, "array<int>"},
+         {{map, int}, "map<int>"}],
+    lists:foreach(
+     fun({ExpectType, IdlType}) ->
+             test_field_type({map, ExpectType}, "map<" ++ IdlType ++ ">")
+     end, Probes).
+
+%% Helpers
+
+test_field_type(ExpectType, IdlType) ->
+    Idl = ("protocol P {"
+           " record R { " ++ IdlType ++ " f; }"
+           "}"),
+    #protocol{
+       definitions =
+           [#record{
+               fields =
+                   [#field{type = Type}]}]} = parse_str(Idl),
+    ?assertEqual(ExpectType, Type).%% ,  % ?assertEqual/3 only OTP-20+
+                 %% #{proto => Idl,
+                 %%   type => IdlType}).
+
 parse_idl(Name) ->
     File = "test/data/" ++ Name ++ ".avdl",
     {ok, B} = file:read_file(File),
-    {ok, T0, _} =  avro_idl_lexer:string(binary_to_list(B)),
+    parse_str(binary_to_list(B)).
+
+parse_str(Str) ->
+    {ok, T0, _} =  avro_idl_lexer:string(Str),
     %% ?debugFmt("Name: ~p~nTokens:~n~p", [Name, T0]),
     T = avro_idl_lexer:preprocess(T0, [drop_comments, trim_doc]),
     {ok, Tree} = avro_idl_parser:parse(T),
diff --git a/test/avro_idl_tests.erl b/test/avro_idl_tests.erl
index 148392f..ec94b7e 100644
--- a/test/avro_idl_tests.erl
+++ b/test/avro_idl_tests.erl
@@ -119,6 +119,27 @@ duplicate_annotation_test() ->
          "@my_decorator(\"a\") @my_decorator(\"b\") protocol MyProto{}", "")
       ).
 
+nested_complex_types_test() ->
+    ?assertEqual(
+       #{protocol => "P",
+         messages => [],
+         types =>
+             [#{type => record,
+                name => "R",
+                fields =>
+                    [#{name => "f",
+                       type =>
+                           #{type => array,
+                             items =>
+                                 #{type => map,
+                                   values => [null, "ns.T"]}
+                            }
+                      }
+                    ]}]},
+       avro_idl:str_to_avpr(
+         "protocol P { record R { array<map<union{null, ns.T}>> f; }}", "")
+      ).
+
 %% Helpers
 
 idl_to_avpr(Name) ->

From c3bea118e83b30da1c9718d83e877a0376d345da Mon Sep 17 00:00:00 2001
From: Sergey Prokhorov <seriy.pr@gmail.com>
Date: Sun, 15 Mar 2020 01:47:11 +0100
Subject: [PATCH 12/13] Use binaries for avpr JSON-map representation

---
 include/erlavro.hrl           |   1 +
 src/avro_idl.erl              | 113 +++++++++--------
 src/avro_idl_lexer.xrl        |   4 +-
 src/avro_idl_parser.yrl       |  11 +-
 test/avro_idl_parse_tests.erl |   2 +
 test/avro_idl_tests.erl       | 227 +++++++++++++++++++---------------
 6 files changed, 201 insertions(+), 157 deletions(-)

diff --git a/include/erlavro.hrl b/include/erlavro.hrl
index c3a331b..76984bf 100644
--- a/include/erlavro.hrl
+++ b/include/erlavro.hrl
@@ -37,6 +37,7 @@
 -define(AVRO_MAP,     <<"map">>).
 -define(AVRO_UNION,   <<"union">>).
 -define(AVRO_FIXED,   <<"fixed">>).
+-define(AVRO_ERROR,   <<"error">>).             % idl
 
 -define(IS_AVRO_PRIMITIVE_NAME(N),
         (N =:= ?AVRO_NULL    orelse
diff --git a/src/avro_idl.erl b/src/avro_idl.erl
index 7507cea..78a611c 100644
--- a/src/avro_idl.erl
+++ b/src/avro_idl.erl
@@ -1,6 +1,10 @@
 %%% @doc APIs to work with Avro IDL format
 %%%
-%%% See [https://avro.apache.org/docs/1.9.2/idl.html]
+%%% This module allows to convert .avdl format to .avpr and .avsc as well
+%%% as create Avro encoders and decoders.
+%%% @end
+%%% @reference See [https://avro.apache.org/docs/current/idl.html]
+%%% @author Sergey Prokhhorov <me@seriyps.ru>
 -module(avro_idl).
 
 -export([new_context/1,
@@ -8,6 +12,7 @@
          protocol_to_avpr/2,
          typedecl_to_avsc/2]).
 -include("idl.hrl").
+-include("erlavro.hrl").
 
 -record(st, {cwd}).
 
@@ -32,13 +37,13 @@ protocol_to_avpr(#protocol{name = Name,
                            (_) -> true
                         end, Defs),
     Protocol0 =
-        #{protocol => Name,
-          types =>
+        #{<<"protocol">> => b(Name),
+          <<"types">> =>
               lists:map(
                 fun(Type) ->
                         typedecl_to_avsc(Type, St)
                 end, Types),
-          messages =>
+          <<"messages">> =>
               lists:map(
                 fun(Message) ->
                         message_to_avsc(Message, St)
@@ -57,36 +62,36 @@ process_imports(Defs, _St) ->
 
 typedecl_to_avsc(#enum{name = Name, meta = Meta, variants = Vars}, _St) ->
     meta(
-      #{type => enum,
-        name => Name,
-        variants => Vars
+      #{<<"type">> => ?AVRO_ENUM,
+        <<"name">> => b(Name),
+        <<"variants">> => lists:map(fun b/1, Vars)
        },
       Meta);
 typedecl_to_avsc(#fixed{name = Name, meta = Meta, size = Size}, _St) ->
     meta(
-      #{type => fixed,
-        name => Name,
-        size => Size},
+      #{<<"type">> => ?AVRO_FIXED,
+        <<"name">> => b(Name),
+        <<"size">> => Size},
       Meta);
 typedecl_to_avsc(#error{name = Name, meta = Meta, fields = Fields}, St) ->
     meta(
-      #{type => error,
-        name => Name,
-        fields => [field_to_avsc(Field, St) || Field <- Fields]},
+      #{<<"type">> => ?AVRO_ERROR,
+        <<"name">> => b(Name),
+        <<"fields">> => [field_to_avsc(Field, St) || Field <- Fields]},
       Meta);
 typedecl_to_avsc(#record{name = Name, meta = Meta, fields = Fields}, St) ->
     meta(
-      #{type => record,
-        name => Name,
-        fields => [field_to_avsc(Field, St) || Field <- Fields]},
+      #{<<"type">> => ?AVRO_RECORD,
+        <<"name">> => b(Name),
+        <<"fields">> => [field_to_avsc(Field, St) || Field <- Fields]},
       Meta).
 
 field_to_avsc(#field{name = Name, meta = Meta,
                      type = Type, default = Default}, St) ->
     meta(
       default(
-        #{name => Name,
-          type => type_to_avsc(Type, St)},
+        #{<<"name">> => b(Name),
+          <<"type">> => type_to_avsc(Type, St)},
         Default),         % TODO: maybe validate default matches type
       Meta).
 
@@ -96,30 +101,28 @@ message_to_avsc(#function{name = Name, meta = Meta,
     %% TODO: arguments can just reuse `#field{}`
     ArgsSchema =
         [default(
-           #{name => ArgName,
-             type => type_to_avsc(Type, St)},
+           #{<<"name">> => b(ArgName),
+             <<"type">> => type_to_avsc(Type, St)},
            Default)
          || {arg, ArgName, Type, Default} <- Args],
     Schema0 =
-        #{name => Name,
-          request => ArgsSchema,
-          response => type_to_avsc(Return, St)},
+        #{<<"name">> => b(Name),
+          <<"request">> => ArgsSchema,
+          <<"response">> => type_to_avsc(Return, St)},
     Schema1 = case Extra of
                   undefined -> Schema0;
                   oneway ->
-                      Schema0#{'one-way' => true};
+                      Schema0#{<<"one-way">> => true};
                   {throws, ThrowsTypes} ->
-                      %% Throws = [type_to_avsc(TType, St)
-                      %%           || TType <- ThrowsTypes],
-                      Schema0#{error => ThrowsTypes}
+                      Schema0#{<<"error">> => lists:map(fun b/1, ThrowsTypes)}
               end,
     meta(Schema1, Meta).
 
 
 type_to_avsc(void, _St) ->
-    null;
+    ?AVRO_NULL;
 type_to_avsc(null, _St) ->
-    null;
+    ?AVRO_NULL;
 type_to_avsc(T, _St) when T == int;
                           T == long;
                           T == string;
@@ -127,31 +130,31 @@ type_to_avsc(T, _St) when T == int;
                           T == float;
                           T == double;
                           T == bytes ->
-    T;
+    atom_to_binary(T, utf8);
 type_to_avsc({decimal, Precision, Scale}, _St) ->
-    #{type => bytes,
-      'logicalType' => "decimal",
-      precision => Precision,
-      scale => Scale};
+    #{<<"type">> => ?AVRO_BYTES,
+      <<"logicalType">> => <<"decimal">>,
+      <<"precision">> => Precision,
+      <<"scale">> => Scale};
 type_to_avsc(date, _St) ->
-    #{type => int,
-      'logicalType' => "date"};
+    #{<<"type">> => ?AVRO_INT,
+      <<"logicalType">> => <<"date">>};
 type_to_avsc(time_ms, _St) ->
-    #{type => int,
-      'logicalType' => "time-millis"};
+    #{<<"type">> => ?AVRO_INT,
+      <<"logicalType">> => <<"time-millis">>};
 type_to_avsc(timestamp_ms, _St) ->
-    #{type => long,
-      'logicalType' => "timestamp-millis"};
+    #{<<"type">> => ?AVRO_LONG,
+      <<"logicalType">> => <<"timestamp-millis">>};
 type_to_avsc({custom, Id}, _St) ->
-    Id;
+    b(Id);
 type_to_avsc({union, Types}, St) ->
     [type_to_avsc(Type, St) || Type <- Types];
 type_to_avsc({array, Of}, St) ->
-    #{type => array,
-      items => type_to_avsc(Of, St)};
+    #{<<"type">> => ?AVRO_ARRAY,
+      <<"items">> => type_to_avsc(Of, St)};
 type_to_avsc({map, ValType}, St) ->
-    #{type => map,
-      values => type_to_avsc(ValType, St)}.
+    #{<<"type">> => ?AVRO_MAP,
+      <<"values">> => type_to_avsc(ValType, St)}.
 
 meta(Schema, Meta) ->
     {Docs, Annotations} =
@@ -163,17 +166,27 @@ meta(Schema, Meta) ->
                   [] -> Schema;
                   _ ->
                       DocStrings = [S || {doc, S} <- Docs],
-                      Schema#{"doc" => lists:flatten(lists:join(
-                                                       "\n", DocStrings))}
+                      Schema#{<<"doc">> => b(lists:join(
+                                               "\n", DocStrings))}
               end,
     lists:foldl(
      fun(#annotation{name = Name, value = Value}, Schema2) ->
-             maps:is_key(Name, Schema2) andalso
+             BName = b(Name),
+             BVal = case Value of
+                        [] -> <<>>;
+                        [C | _] when is_integer(C) -> b(Value);
+                        _ ->
+                            [b(Str) || Str <- Value]
+                    end,
+             maps:is_key(BName, Schema2) andalso
                  error({duplicate_annotation, Name, Value, Schema2}),
-             Schema2#{Name => Value}
+             Schema2#{BName => BVal}
      end, Schema1, Annotations).
 
 default(Obj, undefined) ->
     Obj;
 default(Obj, Default) ->
-    Obj#{default => Default}.
+    Obj#{<<"default">> => Default}.
+
+b(Str) when is_list(Str) ->
+    unicode:characters_to_binary(Str).
diff --git a/src/avro_idl_lexer.xrl b/src/avro_idl_lexer.xrl
index 6650125..4ec393c 100644
--- a/src/avro_idl_lexer.xrl
+++ b/src/avro_idl_lexer.xrl
@@ -1,5 +1,7 @@
 %% @doc Avro IDL lexer
-%% https://avro.apache.org/docs/current/idl.html
+%% @end
+%% @reference See [https://avro.apache.org/docs/current/idl.html]
+%% @author Sergey Prokhhorov <me@seriyps.ru>
 
 Definitions.
 
diff --git a/src/avro_idl_parser.yrl b/src/avro_idl_parser.yrl
index 2a8d0cd..147fa09 100644
--- a/src/avro_idl_parser.yrl
+++ b/src/avro_idl_parser.yrl
@@ -1,8 +1,9 @@
-%% @doc Avro IDL parser
-%% https://avro.apache.org/docs/1.9.2/idl.html
-%% XXX: all `comment_v` tokens should be filtered-out before parsing!
-
-Header "%% Hello".
+Header "%%% @doc Avro IDL parser
+%%%
+%%% XXX: all `comment_v' tokens should be filtered-out before parsing!
+%%% @end
+%%% @reference See [https://avro.apache.org/docs/current/idl.html]
+%%% @author Sergey Prokhhorov <me@seriyps.ru>".
 
 Terminals id ns_id null string_v doc_v float_v integer_v bool_v annotation_v
     primitive_t logical_t decimal_t
diff --git a/test/avro_idl_parse_tests.erl b/test/avro_idl_parse_tests.erl
index e65ce31..3119dd7 100644
--- a/test/avro_idl_parse_tests.erl
+++ b/test/avro_idl_parse_tests.erl
@@ -1,4 +1,6 @@
 %% @doc Tests for IDL lexer + parser
+%% @end
+%% @author Sergey Prokhhorov <me@seriyps.ru>
 -module(avro_idl_parse_tests).
 
 -include("../src/idl.hrl").
diff --git a/test/avro_idl_tests.erl b/test/avro_idl_tests.erl
index ec94b7e..ceaba01 100644
--- a/test/avro_idl_tests.erl
+++ b/test/avro_idl_tests.erl
@@ -1,62 +1,71 @@
 %% @doc Tests for IDL converter / loader
+%% @end
+%% @author Sergey Prokhhorov <me@seriyps.ru>
 -module(avro_idl_tests).
 
 -include("../src/idl.hrl").
 -include_lib("eunit/include/eunit.hrl").
-
+-include("erlavro.hrl").
 
 empty_protocol_avpr_test() ->
     ?assertEqual(
-       #{protocol => "MyProto",
-         types => [],
-         messages => []},
+       #{<<"protocol">> => <<"MyProto">>,
+         <<"types">> => [],
+         <<"messages">> => []},
        idl_to_avpr("empty_protocol")).
 
 
 annotations_avpr_test() ->
+    Proto = idl_to_avpr("annotations"),
+    ?assertEqual(
+       #{<<"doc">> =>
+             <<"My protocol\nNo, really\nIt's some multiline doc\n"
+               "bullet points will be stripped\nso no unordered lists">>,
+         <<"version">> => <<"1.0">>,
+         <<"aliases">> => [<<"ns.Proto1">>, <<"ns.Proto2">>],
+         <<"protocol">> => <<"MyProto">>
+        },
+       maps:without([<<"types">>, <<"messages">>], Proto)
+      ),
+    #{<<"types">> := Types,
+      <<"messages">> := Messages} = Proto,
+    ?assertEqual(
+            [#{<<"doc">> => <<"My enum">>,
+               <<"namespace">> => <<"enums">>,
+               <<"type">> => ?AVRO_ENUM,
+               <<"name">> => <<"MyEnum">>,
+               <<"variants">> => [<<"A">>, <<"B">>, <<"C">>]},
+             #{<<"doc">> => <<"My Fixed">>,
+               <<"namespace">> => <<"fixeds">>,
+               <<"type">> => ?AVRO_FIXED,
+               <<"name">> => <<"MyFixed">>,
+               <<"size">> => 16},
+             #{<<"doc">> => <<"My Error">>,
+               <<"namespace">> => <<"errors">>,
+               <<"type">> => ?AVRO_ERROR,
+               <<"name">> => <<"MyError">>,
+               <<"fields">> =>
+                   [#{<<"doc">> => <<"My Err Field">>,
+                      <<"order">> => <<"ignore">>,
+                      <<"type">> => ?AVRO_STRING,
+                      <<"name">> => <<"my_err_field">>}]},
+             #{<<"doc">> => <<"My Record">>,
+               <<"namespace">> => <<"records">>,
+               <<"type">> => ?AVRO_RECORD,
+               <<"name">> => <<"MyRecord">>,
+               <<"fields">> =>
+                   [#{<<"doc">> => <<"My Rec Field Type\nMy Rec Field">>,
+                      <<"order">> => <<"ignore">>,
+                      <<"aliases">> => [<<"my_alias">>],
+                      <<"type">> => ?AVRO_STRING,
+                      <<"name">> => <<"my_record_field">>}]}],
+       Types),
     ?assertEqual(
-      #{"doc" => ("My protocol\nNo, really\nIt's some multiline doc\n"
-                  "bullet points will be stripped\nso no unordered lists"),
-        "version" => "1.0",
-        "aliases" => ["ns.Proto1", "ns.Proto2"],
-        protocol => "MyProto",
-        types =>
-            [#{"doc" => "My enum",
-               "namespace" => "enums",
-               type => enum,
-               name => "MyEnum",
-               variants => ["A", "B", "C"]},
-             #{"doc" => "My Fixed",
-               "namespace" => "fixeds",
-               type => fixed,
-               name => "MyFixed",
-               size => 16},
-             #{"doc" => "My Error",
-               "namespace" => "errors",
-               type => error,
-               name => "MyError",
-               fields =>
-                   [#{"doc" => "My Err Field",
-                      "order" => "ignore",
-                      type => string,
-                      name => "my_err_field"}]},
-             #{"doc" => "My Record",
-               "namespace" => "records",
-               type => record,
-               name => "MyRecord",
-               fields =>
-                   [#{"doc" => "My Rec Field Type\nMy Rec Field",
-                      "order" => "ignore",
-                      "aliases" => ["my_alias"],
-                      type => string,
-                      name => "my_record_field"}]}],
-        messages =>
-            [#{"doc" => "My Fun",
-               name => "hello",
-               request => [],
-               response => string}]
-       },
-      idl_to_avpr("annotations")).
+       [#{<<"doc">> => <<"My Fun">>,
+          <<"name">> => <<"hello">>,
+          <<"request">> => [],
+          <<"response">> => ?AVRO_STRING}],
+       Messages).
 
 
 full_protocol_avpr_test() ->
@@ -66,50 +75,66 @@ full_protocol_avpr_test() ->
 
 
 protocol_with_typedefs_avpr_test() ->
+    Proto = idl_to_avpr("protocol_with_typedefs"),
+    ?assertMatch(
+       #{<<"namespace">> := <<"org.erlang.www">>,
+         <<"protocol">> := <<"MyProto">>,
+         <<"types">> := _,
+         <<"messages">> := _},
+       Proto),
+    #{<<"types">> := Types,
+      <<"messages">> := Messages} = Proto,
+    ?assertMatch(
+       [#{<<"name">> := <<"MyEnum1">>},
+        #{<<"name">> := <<"MyEnum2">>,
+          <<"type">> := ?AVRO_ENUM,
+          <<"variants">> := [<<"VAR21">>, <<"VAR22">>, <<"VAR23">>]},
+        #{<<"name">> := <<"MyFix">>,
+          <<"type">> := ?AVRO_FIXED,
+          <<"size">> := 10},
+        #{<<"name">> := <<"MyRec">>,
+          <<"fields">> :=
+              [#{<<"type">> := ?AVRO_INT},
+               #{<<"type">> := ?AVRO_STRING},
+               #{<<"type">> := ?AVRO_FLOAT},
+               #{<<"type">> := ?AVRO_BOOLEAN},
+               #{<<"type">> := <<"MyFix">>},
+               #{<<"type">> := [?AVRO_BOOLEAN, ?AVRO_NULL]},
+               #{<<"type">> := #{<<"type">> := ?AVRO_INT,
+                                 <<"logicalType">> := <<"date">>}},
+               #{<<"type">> := #{<<"type">> := ?AVRO_INT,
+                                 <<"logicalType">> := <<"time-millis">>}},
+               #{<<"type">> := #{<<"type">> := ?AVRO_LONG,
+                                 <<"logicalType">> := <<"timestamp-millis">>}},
+               #{<<"type">> := #{<<"type">> := ?AVRO_BYTES,
+                                 <<"precision">> := 5,
+                                 <<"scale">> := 2}},
+               #{<<"type">> := #{<<"type">> := ?AVRO_ARRAY,
+                                 <<"items">> := ?AVRO_INT}},
+               #{<<"type">> := #{<<"type">> := ?AVRO_ARRAY,
+                                 <<"items">> := ?AVRO_INT}},
+               #{<<"type">> := #{<<"type">> := ?AVRO_ARRAY,
+                                 <<"items">> := ?AVRO_STRING}},
+               #{<<"type">> := #{<<"type">> := ?AVRO_MAP,
+                                 <<"values">> := ?AVRO_FLOAT}}]
+         },
+        #{<<"name">> := <<"MyAnnotated">>,
+          <<"namespace">> := <<"org.erlang.ftp">>,
+          <<"fields">> :=
+              [#{<<"name">> := <<"error">>,
+                 <<"type">> := <<"org.erlang.www.MyError">>}]},
+        #{<<"name">> := <<"MyError">>,
+          <<"fields">> :=
+              [#{<<"type">> := <<"MyEnum2">>},
+               #{<<"type">> := ?AVRO_STRING}]}],
+       Types),
     ?assertMatch(
-      #{"namespace" := "org.erlang.www",
-        protocol := "MyProto",
-        types :=
-            [#{name := "MyEnum1"},
-             #{name := "MyEnum2",
-               type := enum,
-               variants := ["VAR21", "VAR22", "VAR23"]},
-             #{name := "MyFix",
-               type := fixed,
-               size := 10},
-             #{name := "MyRec",
-               fields :=
-                   [#{type := int},
-                    #{type := string},
-                    #{type := float},
-                    #{type := boolean},
-                    #{type := "MyFix"},
-                    #{type := [boolean, null]},
-                    #{type := #{type := int, 'logicalType' := "date"}},
-                    #{type := #{type := int, 'logicalType' := "time-millis"}},
-                    #{type := #{type := long,
-                                'logicalType' := "timestamp-millis"}},
-                    #{type := #{type := bytes, precision := 5, scale := 2}},
-                    #{type := #{type := array, items := int}},
-                    #{type := #{type := array, items := int}},
-                    #{type := #{type := array, items := string}},
-                    #{type := #{type := map, values := float}}]
-              },
-             #{name := "MyAnnotated",
-               "namespace" := "org.erlang.ftp",
-               fields :=
-                   [#{name := "error",
-                      type := "org.erlang.www.MyError"}]},
-             #{name := "MyError",
-               fields :=
-                   [#{type := "MyEnum2"},
-                    #{type := string}]}],
-        messages :=
-            [#{name := "div"},
-             #{name := "append"},
-             #{name := "gen_server_cast"},
-             #{name := "ping"}]},
-       idl_to_avpr("protocol_with_typedefs")).
+       [#{<<"name">> := <<"div">>},
+        #{<<"name">> := <<"append">>,
+          <<"error">> := [<<"MyError">>, <<"TheirError">>]},
+        #{<<"name">> := <<"gen_server_cast">>, <<"one-way">> := true},
+        #{<<"name">> := <<"ping">>}],
+       Messages).
 
 
 duplicate_annotation_test() ->
@@ -121,18 +146,18 @@ duplicate_annotation_test() ->
 
 nested_complex_types_test() ->
     ?assertEqual(
-       #{protocol => "P",
-         messages => [],
-         types =>
-             [#{type => record,
-                name => "R",
-                fields =>
-                    [#{name => "f",
-                       type =>
-                           #{type => array,
-                             items =>
-                                 #{type => map,
-                                   values => [null, "ns.T"]}
+       #{<<"protocol">> => <<"P">>,
+         <<"messages">> => [],
+         <<"types">> =>
+             [#{<<"type">> => ?AVRO_RECORD,
+                <<"name">> => <<"R">>,
+                <<"fields">> =>
+                    [#{<<"name">> => <<"f">>,
+                       <<"type">> =>
+                           #{<<"type">> => ?AVRO_ARRAY,
+                             <<"items">> =>
+                                 #{<<"type">> => ?AVRO_MAP,
+                                   <<"values">> => [?AVRO_NULL, <<"ns.T">>]}
                             }
                       }
                     ]}]},

From 0cbf415ba7116932f7521655e9e558c237769aaf Mon Sep 17 00:00:00 2001
From: Sergey Prokhorov <seriy.pr@gmail.com>
Date: Sun, 15 Mar 2020 03:21:12 +0100
Subject: [PATCH 13/13] Load .avdl to erlavro internal format

Now it's possible to use basic .avdl to encode/decode avro
---
 src/avro.erl                  |  9 ++++++---
 src/avro_idl.erl              | 22 ++++++++++++++++++++--
 src/avro_idl_lexer.xrl        |  2 +-
 src/avro_idl_parser.yrl       |  4 ++--
 src/avro_json_decoder.erl     | 18 +++++++++++++++---
 test/avro_idl_parse_tests.erl |  2 +-
 test/avro_idl_tests.erl       | 24 +++++++++++++++++-------
 7 files changed, 62 insertions(+), 19 deletions(-)

diff --git a/src/avro.erl b/src/avro.erl
index 8c7437e..38faecd 100644
--- a/src/avro.erl
+++ b/src/avro.erl
@@ -143,7 +143,8 @@
 -type crc64_fingerprint() :: avro_fingerprint:crc64().
 
 %% @doc Decode JSON format avro schema into `erlavro' internals.
--spec decode_schema(binary()) -> avro_type().
+%% @param JSON: JSON binary or erlang `map()' json representation
+-spec decode_schema(binary() | map() | [map()]) -> avro_type().
 decode_schema(JSON) -> avro_json_decoder:decode_schema(JSON).
 
 %% @doc Make type lookup function from type definition.
@@ -176,7 +177,8 @@ make_lkup_fun(AssignedName, Type) ->
 %%  * allow_type_redefine: `boolean()'
 %%     This option is to allow one type being defined more than once.
 %% @end
--spec decode_schema(binary(), proplists:proplist()) -> avro_type().
+-spec decode_schema(binary() | map() | [map()], proplists:proplist()) ->
+                       avro_type().
 decode_schema(JSON, Options) ->
   avro_json_decoder:decode_schema(JSON, Options).
 
@@ -269,7 +271,8 @@ make_decoder(Schema, Options) ->
 %% takes only one `binary()' input arg.
 -spec make_simple_decoder(avro_type() | binary(), codec_options()) ->
         simple_decoder().
-make_simple_decoder(JSON, Options) when is_binary(JSON) ->
+make_simple_decoder(JSON, Options) when is_binary(JSON);
+                                        is_map(JSON) ->
   make_simple_decoder(decode_schema(JSON), Options);
 make_simple_decoder(Type, Options) when ?IS_TYPE_RECORD(Type) ->
   Lkup = make_lkup_fun(Type),
diff --git a/src/avro_idl.erl b/src/avro_idl.erl
index 78a611c..a1b9533 100644
--- a/src/avro_idl.erl
+++ b/src/avro_idl.erl
@@ -4,9 +4,10 @@
 %%% as create Avro encoders and decoders.
 %%% @end
 %%% @reference See [https://avro.apache.org/docs/current/idl.html]
-%%% @author Sergey Prokhhorov <me@seriyps.ru>
+%%% @author Sergey Prokhorov <me@seriyps.ru>
 -module(avro_idl).
 
+-export([decode_schema/2]).
 -export([new_context/1,
          str_to_avpr/2,
          protocol_to_avpr/2,
@@ -16,6 +17,23 @@
 
 -record(st, {cwd}).
 
+decode_schema(SchemaStr, Cwd) ->
+    Protocol = str_to_avpr(SchemaStr, Cwd),
+    #{<<"types">> := Types0} = Protocol,
+    Types1 = lists:filter(
+              fun(#{<<"type">> := TName}) ->TName =/= <<"error">> end, Types0),
+    Ns = maps:get(<<"namespace">>, Protocol, ?AVRO_NS_GLOBAL),
+    Types = lists:map(
+        fun(T) ->
+                case maps:is_key(<<"namespace">>, T) of
+                    false ->
+                        T#{<<"namespace">> => Ns};
+                    true ->
+                        T
+                end
+        end, Types1),
+    avro:decode_schema(Types, [{ignore_bad_default_values, true}]).
+
 new_context(Cwd) ->
     #st{cwd = Cwd}.
 
@@ -64,7 +82,7 @@ typedecl_to_avsc(#enum{name = Name, meta = Meta, variants = Vars}, _St) ->
     meta(
       #{<<"type">> => ?AVRO_ENUM,
         <<"name">> => b(Name),
-        <<"variants">> => lists:map(fun b/1, Vars)
+        <<"symbols">> => lists:map(fun b/1, Vars)
        },
       Meta);
 typedecl_to_avsc(#fixed{name = Name, meta = Meta, size = Size}, _St) ->
diff --git a/src/avro_idl_lexer.xrl b/src/avro_idl_lexer.xrl
index 4ec393c..4d4d545 100644
--- a/src/avro_idl_lexer.xrl
+++ b/src/avro_idl_lexer.xrl
@@ -1,7 +1,7 @@
 %% @doc Avro IDL lexer
 %% @end
 %% @reference See [https://avro.apache.org/docs/current/idl.html]
-%% @author Sergey Prokhhorov <me@seriyps.ru>
+%% @author Sergey Prokhorov <me@seriyps.ru>
 
 Definitions.
 
diff --git a/src/avro_idl_parser.yrl b/src/avro_idl_parser.yrl
index 147fa09..a76ac6b 100644
--- a/src/avro_idl_parser.yrl
+++ b/src/avro_idl_parser.yrl
@@ -3,7 +3,7 @@ Header "%%% @doc Avro IDL parser
 %%% XXX: all `comment_v' tokens should be filtered-out before parsing!
 %%% @end
 %%% @reference See [https://avro.apache.org/docs/current/idl.html]
-%%% @author Sergey Prokhhorov <me@seriyps.ru>".
+%%% @author Sergey Prokhorov <me@seriyps.ru>".
 
 Terminals id ns_id null string_v doc_v float_v integer_v bool_v annotation_v
     primitive_t logical_t decimal_t
@@ -120,7 +120,7 @@ import_file_type -> schema_k : schema.
 
 %% -- Enum typedef
 enum ->
-    enum_t id '{' id enum_variants :
+    enum_t id '{' id enum_variants :            % TODO: add support for default
         #enum{name = value_of('$2'), variants = [value_of('$4') | '$5']}.
 enum ->
     meta enum :
diff --git a/src/avro_json_decoder.erl b/src/avro_json_decoder.erl
index 3749206..248e9e5 100644
--- a/src/avro_json_decoder.erl
+++ b/src/avro_json_decoder.erl
@@ -47,7 +47,7 @@
 %%%_* APIs =====================================================================
 
 %% @doc Decode JSON format avro schema into erlavro internals.
--spec decode_schema(binary()) -> avro_type().
+-spec decode_schema(binary() | map() | [map()]) -> avro_type().
 decode_schema(JSON) ->
   decode_schema(JSON, _Opts = []).
 
@@ -66,7 +66,7 @@ decode_schema(JSON) ->
 %%  * allow_type_redefine: `boolean()'
 %%     This option is to allow one type being defined more than once.
 %% @end
--spec decode_schema(binary(), sc_opts()) -> avro_type().
+-spec decode_schema(binary() | map() | [map()], sc_opts()) -> avro_type().
 decode_schema(JSON, Opts) when is_list(Opts) ->
   %% Parse JSON first
   Type = parse_schema(decode_json(JSON)),
@@ -511,9 +511,21 @@ do_parse_union_ex(ValueTypeName, Value, UnionType,
 %% 'map' is a better option, but we have to keep it backward compatible.
 %% 'proplist' is not an option because otherwise there is no way to tell
 %% apart 'object' and 'array'.
--spec decode_json(binary()) -> json_value().
+-spec decode_json(binary() | map() | [map()]) -> json_value().
+decode_json(Parsed) when is_map(Parsed);
+                         is_list(Parsed) ->
+  map_to_tuple(Parsed);
 decode_json(JSON) -> jsone:decode(JSON, [{object_format, tuple}]).
 
+%% recursively convert map to json-tuple format
+map_to_tuple(Map) when is_map(Map) ->
+  {[{K, map_to_tuple(V)}
+    || {K, V} <- maps:to_list(Map)]};
+map_to_tuple(Array) when is_list(Array) ->
+  lists:map(fun map_to_tuple/1, Array);
+map_to_tuple(Other) ->
+  Other.
+
 %% Filter out non-custom properties.
 -spec filter_custom_props([{binary(), json_value()}], [name()]) ->
         [custom_prop()].
diff --git a/test/avro_idl_parse_tests.erl b/test/avro_idl_parse_tests.erl
index 3119dd7..074f510 100644
--- a/test/avro_idl_parse_tests.erl
+++ b/test/avro_idl_parse_tests.erl
@@ -1,6 +1,6 @@
 %% @doc Tests for IDL lexer + parser
 %% @end
-%% @author Sergey Prokhhorov <me@seriyps.ru>
+%% @author Sergey Prokhorov <me@seriyps.ru>
 -module(avro_idl_parse_tests).
 
 -include("../src/idl.hrl").
diff --git a/test/avro_idl_tests.erl b/test/avro_idl_tests.erl
index ceaba01..56d0e68 100644
--- a/test/avro_idl_tests.erl
+++ b/test/avro_idl_tests.erl
@@ -1,6 +1,6 @@
 %% @doc Tests for IDL converter / loader
 %% @end
-%% @author Sergey Prokhhorov <me@seriyps.ru>
+%% @author Sergey Prokhorov <me@seriyps.ru>
 -module(avro_idl_tests).
 
 -include("../src/idl.hrl").
@@ -34,7 +34,7 @@ annotations_avpr_test() ->
                <<"namespace">> => <<"enums">>,
                <<"type">> => ?AVRO_ENUM,
                <<"name">> => <<"MyEnum">>,
-               <<"variants">> => [<<"A">>, <<"B">>, <<"C">>]},
+               <<"symbols">> => [<<"A">>, <<"B">>, <<"C">>]},
              #{<<"doc">> => <<"My Fixed">>,
                <<"namespace">> => <<"fixeds">>,
                <<"type">> => ?AVRO_FIXED,
@@ -88,7 +88,7 @@ protocol_with_typedefs_avpr_test() ->
        [#{<<"name">> := <<"MyEnum1">>},
         #{<<"name">> := <<"MyEnum2">>,
           <<"type">> := ?AVRO_ENUM,
-          <<"variants">> := [<<"VAR21">>, <<"VAR22">>, <<"VAR23">>]},
+          <<"symbols">> := [<<"VAR21">>, <<"VAR22">>, <<"VAR23">>]},
         #{<<"name">> := <<"MyFix">>,
           <<"type">> := ?AVRO_FIXED,
           <<"size">> := 10},
@@ -137,14 +137,14 @@ protocol_with_typedefs_avpr_test() ->
        Messages).
 
 
-duplicate_annotation_test() ->
+duplicate_annotation_avpr_test() ->
     ?assertError(
        {duplicate_annotation, "my_decorator", _, _},
        avro_idl:str_to_avpr(
          "@my_decorator(\"a\") @my_decorator(\"b\") protocol MyProto{}", "")
       ).
 
-nested_complex_types_test() ->
+nested_complex_types_avr_test() ->
     ?assertEqual(
        #{<<"protocol">> => <<"P">>,
          <<"messages">> => [],
@@ -165,9 +165,19 @@ nested_complex_types_test() ->
          "protocol P { record R { array<map<union{null, ns.T}>> f; }}", "")
       ).
 
+full_protocol_load_test() ->
+    Schema = read_schema("full_protocol"),
+    DecSchema = avro_idl:decode_schema(Schema, ""),
+    _EncSchema = avro:encode_schema(DecSchema).
+    %% ?debugFmt("~n~p~n~s", [DecSchema, EncSchema]).
+
 %% Helpers
 
-idl_to_avpr(Name) ->
+read_schema(Name) ->
     File = "test/data/" ++ Name ++ ".avdl",
     {ok, B} = file:read_file(File),
-    avro_idl:str_to_avpr(binary_to_list(B), "").
+    binary_to_list(B).
+
+idl_to_avpr(Name) ->
+    Schema = read_schema(Name),
+    avro_idl:str_to_avpr(Schema, "").