From de00fd9b9bbe69ada88a013f9dfffaaa94970ca1 Mon Sep 17 00:00:00 2001
From: Lawrie <Jock.Lawrie@education.vic.gov.au>
Date: Thu, 11 Feb 2021 10:17:57 +1100
Subject: [PATCH] v3.0.0, using Julia 1.6 and TOML.jl instead of YAML.jl

---
 Manifest.toml                      | 162 ++++++++++++++++++++---------
 Project.toml                       |  17 ++-
 README.md                          |  61 +++++------
 scripts/compare.jl                 |   2 +-
 src/Schemata.jl                    |   2 +-
 src/compare/inmemory_table.jl      |   3 +-
 src/compare/ondisk_table.jl        |   2 +-
 src/readwrite.jl                   |  62 ++++++++++-
 src/types.jl                       |  89 ++++++++++++----
 test/schemata/fever.toml           |  12 +++
 test/schemata/fever.yaml           |  35 -------
 test/schemata/row_constraints.toml |  12 +++
 test/schemata/row_constraints.yaml |  29 ------
 test/test_inmemory_tables.jl       |  15 +--
 test/test_ondisk_tables.jl         |   2 +-
 15 files changed, 316 insertions(+), 189 deletions(-)
 create mode 100644 test/schemata/fever.toml
 delete mode 100644 test/schemata/fever.yaml
 create mode 100644 test/schemata/row_constraints.toml
 delete mode 100644 test/schemata/row_constraints.yaml

diff --git a/Manifest.toml b/Manifest.toml
index b675ee6..254cc1e 100644
--- a/Manifest.toml
+++ b/Manifest.toml
@@ -1,42 +1,53 @@
 # This file is machine-generated - editing it directly is not advised
 
+[[ArgTools]]
+uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f"
+
+[[Artifacts]]
+uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
+
 [[Base64]]
 uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
 
 [[CSV]]
-deps = ["CategoricalArrays", "DataFrames", "Dates", "FilePathsBase", "Mmap", "Parsers", "PooledArrays", "Tables", "Unicode", "WeakRefStrings"]
-git-tree-sha1 = "52a8e60c7822f53d57e4403b7f2811e7e1bdd32b"
+deps = ["Dates", "Mmap", "Parsers", "PooledArrays", "SentinelArrays", "Tables", "Unicode"]
+git-tree-sha1 = "1f79803452adf73e2d3fc84785adb7aaca14db36"
 uuid = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
-version = "0.6.2"
+version = "0.8.3"
 
 [[CategoricalArrays]]
-deps = ["DataAPI", "Future", "JSON", "Missings", "Printf", "Statistics", "Unicode"]
-git-tree-sha1 = "a6c17353ee38ddab30e73dcfaa1107752de724ec"
+deps = ["DataAPI", "Future", "JSON", "Missings", "Printf", "Statistics", "StructTypes", "Unicode"]
+git-tree-sha1 = "99809999c8ee01fa89498480b147f7394ea5450f"
 uuid = "324d7699-5711-5eae-9e2f-1d82baa6b597"
-version = "0.8.1"
+version = "0.9.2"
 
 [[Compat]]
 deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "SHA", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"]
-git-tree-sha1 = "054993b6611376ddb40203e973e954fd9d1d1902"
+git-tree-sha1 = "919c7f3151e79ff196add81d7f4e45d91bbf420b"
 uuid = "34da2185-b29b-5c13-b0c7-acf172513d20"
-version = "3.12.0"
+version = "3.25.0"
+
+[[Crayons]]
+git-tree-sha1 = "3f71217b538d7aaee0b69ab47d9b7724ca8afa0d"
+uuid = "a8cc5b0e-0ffa-5ad4-8c14-923d3ee1735f"
+version = "4.0.4"
 
 [[DataAPI]]
-git-tree-sha1 = "176e23402d80e7743fc26c19c681bfb11246af32"
+git-tree-sha1 = "8ab70b4de35bb3b8cc19654f6b893cf5164f8ee8"
 uuid = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a"
-version = "1.3.0"
+version = "1.5.1"
 
 [[DataFrames]]
-deps = ["CategoricalArrays", "Compat", "DataAPI", "Future", "InvertedIndices", "IteratorInterfaceExtensions", "Missings", "PooledArrays", "Printf", "REPL", "Reexport", "SortingAlgorithms", "Statistics", "TableTraits", "Tables", "Unicode"]
-git-tree-sha1 = "02f08ae77249b7f6d4186b081a016fb7454c616f"
+deps = ["CategoricalArrays", "Compat", "DataAPI", "Future", "InvertedIndices", "IteratorInterfaceExtensions", "LinearAlgebra", "Markdown", "Missings", "PooledArrays", "PrettyTables", "Printf", "REPL", "Reexport", "SortingAlgorithms", "Statistics", "TableTraits", "Tables", "Unicode"]
+git-tree-sha1 = "b0db5579803eabb33f1274ca7ca2f472fdfb7f2a"
 uuid = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
-version = "0.21.2"
+version = "0.22.5"
 
 [[DataStructures]]
-deps = ["InteractiveUtils", "OrderedCollections"]
-git-tree-sha1 = "be680f1ad03c0a03796aa3fda5a2180df7f83b46"
+deps = ["Compat", "InteractiveUtils", "OrderedCollections"]
+git-tree-sha1 = "4437b64df1e0adccc3e5d1adbc3ac741095e4677"
 uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
-version = "0.17.18"
+version = "0.18.9"
 
 [[DataValueInterfaces]]
 git-tree-sha1 = "bfc1187b79289637fa0ef6d4436ebdfe6905cbd6"
@@ -55,11 +66,15 @@ uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab"
 deps = ["Random", "Serialization", "Sockets"]
 uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"
 
-[[FilePathsBase]]
-deps = ["Dates", "LinearAlgebra", "Printf", "Test", "UUIDs"]
-git-tree-sha1 = "923fd3b942a11712435682eaa95cc8518c428b2c"
-uuid = "48062228-2e41-5def-b9a4-89aafe57970f"
-version = "0.8.0"
+[[Downloads]]
+deps = ["ArgTools", "LibCURL", "NetworkOptions"]
+uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
+
+[[Formatting]]
+deps = ["Printf"]
+git-tree-sha1 = "8339d61043228fdd3eb658d86c926cb282ae72a8"
+uuid = "59287772-0a20-5a39-b81b-1366585eb4c0"
+version = "0.4.2"
 
 [[Future]]
 deps = ["Random"]
@@ -82,14 +97,26 @@ version = "1.0.0"
 
 [[JSON]]
 deps = ["Dates", "Mmap", "Parsers", "Unicode"]
-git-tree-sha1 = "b34d7cef7b337321e97d22242c3c2b91f476748e"
+git-tree-sha1 = "81690084b6198a2e1da36fcfda16eeca9f9f24e4"
 uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
-version = "0.21.0"
+version = "0.21.1"
+
+[[LibCURL]]
+deps = ["LibCURL_jll", "MozillaCACerts_jll"]
+uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21"
+
+[[LibCURL_jll]]
+deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"]
+uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0"
 
 [[LibGit2]]
-deps = ["Printf"]
+deps = ["Base64", "NetworkOptions", "Printf", "SHA"]
 uuid = "76f85450-5226-5b5a-8eaa-529ad045b433"
 
+[[LibSSH2_jll]]
+deps = ["Artifacts", "Libdl", "MbedTLS_jll"]
+uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8"
+
 [[Libdl]]
 uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
 
@@ -104,42 +131,58 @@ uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
 deps = ["Base64"]
 uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
 
+[[MbedTLS_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1"
+
 [[Missings]]
 deps = ["DataAPI"]
-git-tree-sha1 = "de0a5ce9e5289f27df672ffabef4d1e5861247d5"
+git-tree-sha1 = "f8c673ccc215eb50fcadb285f522420e29e69e1c"
 uuid = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28"
-version = "0.4.3"
+version = "0.4.5"
 
 [[Mmap]]
 uuid = "a63ad114-7e13-5084-954f-fe012c677804"
 
+[[MozillaCACerts_jll]]
+uuid = "14a3606d-f60d-562e-9121-12d972cd8159"
+
+[[NetworkOptions]]
+uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908"
+
 [[OrderedCollections]]
-git-tree-sha1 = "12ce190210d278e12644bcadf5b21cbdcf225cd3"
+git-tree-sha1 = "d45739abcfc03b51f6a42712894a593f74c80a23"
 uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
-version = "1.2.0"
+version = "1.3.3"
 
 [[Parsers]]
-deps = ["Dates", "Test"]
-git-tree-sha1 = "eb3e09940c0d7ae01b01d9291ebad7b081c844d3"
+deps = ["Dates"]
+git-tree-sha1 = "50c9a9ed8c714945e01cd53a21007ed3865ed714"
 uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0"
-version = "1.0.5"
+version = "1.0.15"
 
 [[Pkg]]
-deps = ["Dates", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "UUIDs"]
+deps = ["Artifacts", "Dates", "Downloads", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs"]
 uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
 
 [[PooledArrays]]
 deps = ["DataAPI"]
-git-tree-sha1 = "b1333d4eced1826e15adbdf01a4ecaccca9d353c"
+git-tree-sha1 = "0e8f5c428a41a81cd71f76d76f2fc3415fe5a676"
 uuid = "2dfb63ee-cc39-5dd5-95bd-886bf059d720"
-version = "0.5.3"
+version = "1.1.0"
+
+[[PrettyTables]]
+deps = ["Crayons", "Formatting", "Markdown", "Reexport", "Tables"]
+git-tree-sha1 = "42126c4e2677cdc664baea004c98cc60a664fe40"
+uuid = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d"
+version = "0.11.0"
 
 [[Printf]]
 deps = ["Unicode"]
 uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 
 [[REPL]]
-deps = ["InteractiveUtils", "Markdown", "Sockets"]
+deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"]
 uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
 
 [[Random]]
@@ -147,14 +190,19 @@ deps = ["Serialization"]
 uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 
 [[Reexport]]
-deps = ["Pkg"]
-git-tree-sha1 = "7b1d07f411bc8ddb7977ec7f377b97b158514fe0"
+git-tree-sha1 = "57d8440b0c7d98fc4f889e478e80f268d534c9d5"
 uuid = "189a3867-3050-52da-a836-e630ba90ab69"
-version = "0.2.0"
+version = "1.0.0"
 
 [[SHA]]
 uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
 
+[[SentinelArrays]]
+deps = ["Dates", "Random"]
+git-tree-sha1 = "6ccde405cf0759eba835eb613130723cb8f10ff9"
+uuid = "91c51154-3ec4-41a3-a24f-3f23e20d615c"
+version = "1.2.16"
+
 [[Serialization]]
 uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
 
@@ -179,6 +227,16 @@ uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 deps = ["LinearAlgebra", "SparseArrays"]
 uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 
+[[StructTypes]]
+deps = ["Dates", "UUIDs"]
+git-tree-sha1 = "65a43f5218197bc7091b76bc273a5e323a1d7b0d"
+uuid = "856f2bd8-1eba-4b0a-8007-ebc267875bd4"
+version = "1.2.3"
+
+[[TOML]]
+deps = ["Dates"]
+uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
+
 [[TableTraits]]
 deps = ["IteratorInterfaceExtensions"]
 git-tree-sha1 = "b1ad568ba658d8cbb3b892ed5380a6f3e781a81e"
@@ -187,12 +245,16 @@ version = "1.0.0"
 
 [[Tables]]
 deps = ["DataAPI", "DataValueInterfaces", "IteratorInterfaceExtensions", "LinearAlgebra", "TableTraits", "Test"]
-git-tree-sha1 = "c45dcc27331febabc20d86cb3974ef095257dcf3"
+git-tree-sha1 = "a716dde43d57fa537a19058d044b495301ba6565"
 uuid = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
-version = "1.0.4"
+version = "1.3.2"
+
+[[Tar]]
+deps = ["ArgTools", "SHA"]
+uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e"
 
 [[Test]]
-deps = ["Distributed", "InteractiveUtils", "Logging", "Random"]
+deps = ["InteractiveUtils", "Logging", "Random", "Serialization"]
 uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [[UUIDs]]
@@ -202,14 +264,10 @@ uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
 [[Unicode]]
 uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
 
-[[WeakRefStrings]]
-deps = ["DataAPI", "Random", "Test"]
-git-tree-sha1 = "28807f85197eaad3cbd2330386fac1dcb9e7e11d"
-uuid = "ea10d353-3f73-51f8-a26c-33c1cb351aa5"
-version = "0.6.2"
-
-[[YAML]]
-deps = ["Base64", "Dates", "Printf"]
-git-tree-sha1 = "c5e2eaa5ce818c5277388377d592eb4c81f27c00"
-uuid = "ddb6d928-2868-570f-bddf-ab3f9cf99eb6"
-version = "0.4.0"
+[[Zlib_jll]]
+deps = ["Libdl"]
+uuid = "83775a58-1f1d-513f-b197-d71354ab007a"
+
+[[nghttp2_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d"
diff --git a/Project.toml b/Project.toml
index e74677a..9f56db5 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "Schemata"
 uuid = "b4d66a32-c6c0-5461-b6fa-34bb9cecaf85"
 authors = ["Jock Lawrie <jock.lawrie@gmail.com>"]
-version = "2.0.8"
+version = "3.0.0"
 
 [deps]
 CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
@@ -9,17 +9,16 @@ CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"
 DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
 Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
 Parsers = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0"
+TOML = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
 Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
-YAML = "ddb6d928-2868-570f-bddf-ab3f9cf99eb6"
 
 [compat]
-CSV = "0.6.2"
-CategoricalArrays = "0.8.1"
-DataFrames = "0.21.2"
-Parsers = "1.0.5"
-Tables = "1.0.4"
-YAML = "0.4.0"
-julia = "1"
+CSV = "0.8.3"
+CategoricalArrays = "0.9.2"
+DataFrames = "0.22.5"
+Parsers = "1.0.15"
+Tables = "1.3.2"
+julia = "1.6"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/README.md b/README.md
index 1fd949b..5da3ebc 100644
--- a/README.md
+++ b/README.md
@@ -6,7 +6,7 @@ It exists independently of any particular data set, and therefore can be constru
 
 This package facilitates 3 use cases:
 
-1. Read/write a schema from/to a yaml file.
+1. Read/write a schema from/to a [TOML](https://toml.io/en/v1.0.0) file.
 
 2. Compare a data set to a schema and list the non-compliance issues.
 
@@ -19,28 +19,29 @@ Indeed the 3 use cases listed above can be carried out without writing any Julia
 
 # Usage
 
-A `TableSchema` looks like this `yaml` file:
-
-```YAML
-name: mytable
-description: "My table"
-primarykey: patientid  # A column name or a vector of column names
-columns:
-  - patientid: {description: Patient ID,  datatype: UInt,   iscategorical: false, isrequired: true, isunique: true,  validvalues: UInt}
-  - age:       {description: Age (years), datatype: Int,    iscategorical: false, isrequired: true, isunique: false, validvalues: "0:120"}
-  - dose:      {description: Dose size,   datatype: String, iscategorical: true,  isrequired: true, isunique: false, 
-                validvalues: ["small", "medium", "large"]
-  - fever:     {description: Had fever,   datatype: Bool,   iscategorical: true,  isrequired: true, isunique: false, validvalues: Bool}
+A `TableSchema` looks like this `TOML` file:
+
+```toml
+name = "mytable"
+description = "My table"
+primarykey = "patientid"  # A column name or a vector of column names
+columns = [
+    {name = "patientid", description = "Patient ID", datatype = "UInt", validvalues = "UInt", iscategorical = false, isrequired = true, isunique = true},
+    {name = "age", description = "Age (years)", datatype = "Int", validvalues = "Int", iscategorical = false, isrequired = true, isunique = false},
+    {name = "dose", description = "Dose size", datatype = "String", validvalues = ["small", "medium", "large"], iscategorical = true, isrequired = true, isunique = false},
+    {name = "fever", description = "Had fever", datatype = "Bool", validvalues = "Bool", iscategorical = true, isrequired = true, isunique = false}
+]
 ```
 
 A `Schema` contains 1 or more `TableSchema`. For example:
 
-```YAML
-name: fever
-description: "Fever schema"
-tables:
-  table1: *table1_schema
-  table2: *table2_schema
+```TOML
+name = "fever"
+description = "Fever schema"
+
+[tables]
+table1 = "table1_schema"
+table2 = "table2_schema"
 ```
 
 For tables that fit into memory, usage is as follows:
@@ -49,7 +50,7 @@ For tables that fit into memory, usage is as follows:
 # Read in a schema
 using Schemata
 
-schema = readschema(joinpath(dirname(pathof(Schemata)), "..", "test/schemata/fever.yaml"))
+schema = readschema(joinpath(dirname(pathof(Schemata)), "..", "test/schemata/fever.toml"))
 ts     = schema.tables[:mytable]  # TableSchema for mytable
 
 # Construct/import a table (any object that satisfies the Tables.jl interface)
@@ -152,16 +153,16 @@ We often want to ensure that certain relationships hold between variables within
 For example, we might require that a person's marriage date is after his/her birth date.
 We can achieve this by specifying one or more intra-row constraints in a `TableSchema` as follows:
 
-```yaml
-name: intrarow_constraints_demo
-description: "Table with intra-row constraints"
-primarykey: id
-intrarow_constraints:
-  birth date before marriage date: "r[:dob] < r[:date_of_marriage]"
-columns:
-  - id:  {description: ID, datatype: UInt, iscategorical: false, isrequired: true, isunique: true, validvalues: UInt}
-  - dob: {description: Date of birth, datatype: Date, iscategorical: false, isrequired: true, isunique: false, validvalues: Date}
-  - date_of_marriage: {description: Date of marriage, datatype: Date, iscategorical: false, isrequired: false, isunique: false, validvalues: Date}
+```toml
+name = "intrarow_constraints_demo"
+description = "Table with intra-row constraints"
+primarykey  = "patientid"
+intrarow_constraints = {"birth date before marriage date" = "r[:dob] < r[:date_of_marriage]"}
+columns = [
+    {name="patientid", description = "Patient ID", datatype = "UInt", validvalues = "UInt", iscategorical = false, isrequired = true, isunique = true},
+    {name="dob", description = "Date of birth", datatype = "Date", validvalues = "Date", iscategorical = false, isrequired = true, isunique = false},
+    {name="date_of_marriage", description = "Date of marriage", datatype = "Date", validvalues = "Date", iscategorical = false, isrequired = false, isunique = false}
+]
 ```
 
 Each constraint is specified as a key-value pair, where the key is a description of the constraint and
diff --git a/scripts/compare.jl b/scripts/compare.jl
index 2e029ed..e7fbc35 100644
--- a/scripts/compare.jl
+++ b/scripts/compare.jl
@@ -1,7 +1,7 @@
 #=
   Run this script as follows:
   $ cd /path/to/Schemata.jl
-  $ /path/to/julia scripts/compare.jl /path/to/config.yaml /path/to/inputdata sorted_by_primarykey
+  $ /path/to/julia scripts/compare.jl /path/to/config.toml /path/to/inputdata sorted_by_primarykey
   The 3rd argument, sorted_by_primarykey is either "true" or "false".
   If "true" the compare function assumes that your table is sorted by its primary key,
   which enables a faster comparison to the schema to be made.
diff --git a/src/Schemata.jl b/src/Schemata.jl
index 9393207..112460f 100644
--- a/src/Schemata.jl
+++ b/src/Schemata.jl
@@ -2,7 +2,7 @@ module Schemata
 
 export Schema, TableSchema, ColumnSchema, # types
        compare,                           # core function
-       readschema                         # read schema from config file
+       readschema, writeschema            # read/write schema from/to config file
 
 include("handle_validvalues.jl")
 include("types.jl")
diff --git a/src/compare/inmemory_table.jl b/src/compare/inmemory_table.jl
index 3917fb7..49bf5b1 100644
--- a/src/compare/inmemory_table.jl
+++ b/src/compare/inmemory_table.jl
@@ -3,6 +3,7 @@ Compare an in-memory table to a table schema.
 """
 module inmemory_table
 
+using CategoricalArrays
 using DataFrames
 using Tables
 
@@ -66,7 +67,7 @@ function compare(tableschema::TableSchema, indata, sorted_by_primarykey::Bool)
     # Column-level checks
     for (colname, colschema) in colname2colschema
         !colschema.iscategorical && continue
-        categorical!(outdata, colname)
+        transform!(outdata, colname => categorical, renamecols=false)
     end
     datacols_match_schemacols!(issues_in, tableschema, Set(propertynames(indata)))  # By construction this issue doesn't exist for outdata
     compare_datatypes!(issues_in,  indata,  colname2colschema)
diff --git a/src/compare/ondisk_table.jl b/src/compare/ondisk_table.jl
index f15a08e..3eafde6 100644
--- a/src/compare/ondisk_table.jl
+++ b/src/compare/ondisk_table.jl
@@ -37,7 +37,7 @@ function compare(tableschema::TableSchema, input_data_file::String, output_data_
     colissues_in  = issues_in[:columnissues]
     colissues_out = issues_out[:columnissues]
     CSV.write(output_data_file, init_outdata(tableschema, 0); delim=delim_outdata)  # Write column headers to disk
-    csvrows = CSV.Rows(input_data_file; reusebuffer=true, use_mmap=true)
+    csvrows = CSV.Rows(input_data_file; reusebuffer=true)
     for inputrow in csvrows
         # Parse inputrow into outputrow according to ColumnSchema
         i_outdata += 1
diff --git a/src/readwrite.jl b/src/readwrite.jl
index f0eaf00..0b3ea26 100644
--- a/src/readwrite.jl
+++ b/src/readwrite.jl
@@ -1,15 +1,71 @@
 module readwrite
 
-export readschema
+export readschema, writeschema
 
-using YAML
+using TOML
 using ..types
 
 "Returns either a Schema or a TableSchema, depending on the contents of the config file."
 function readschema(filename::String)
-    d = YAML.load_file(filename)
+    d = TOML.parsefile(filename)
     haskey(d, "columns") && return TableSchema(d)  # Config is for a TableSchema
     Schema(d)  # Config is for a Schema
 end
 
+writeschema(outfile::String, schema::Schema)           = toml_to_file(outfile, schema_to_dict(schema))
+writeschema(outfile::String, tableschema::TableSchema) = toml_to_file(outfile, tableschema_to_dict(tableschema))
+
+function toml_to_file(outfile::String, d)
+    io = open(outfile, "w")
+    TOML.print(io, d)
+    close(io)
+end
+
+################################################################################
+# Utils
+
+function schema_to_dict(schema::Schema)
+    result = Dict{String, Any}()
+    result["name"]        = String(schema.name)
+    result["description"] = schema.description
+    result["tables"]      = Dict(String(tablename) => tableschema_to_dict(tableschema) for (tablename, tableschema) in schema.tables)
+    result
+end
+
+function tableschema_to_dict(tableschema::TableSchema)
+    result = Dict{String, Any}()
+    result["name"]        = String(tableschema.name)
+    result["description"] = tableschema.description
+    result["primarykey"]  = String.(tableschema.primarykey)
+    columns = Dict{String, Any}[]  # colname => colschema
+    for colname in tableschema.columnorder
+        push!(columns, colschema_to_dict(tableschema.colname2colschema[colname]))
+    end
+    result["columns"] = columns
+    if !isempty(tableschema.intrarow_constraints)
+        result["intrarow_constraints"] = Dict(msg => func_as_supplied for (func_as_supplied, f, msg) in tableschema.intrarow_constraints)
+    end
+    result
+end
+
+function colschema_to_dict(colschema::ColumnSchema)
+    result = Dict{String, Any}()
+    result["name"]          = string(colschema.name)
+    result["description"]   = colschema.description
+    result["datatype"]      = string(colschema.datatype)
+    result["iscategorical"] = colschema.iscategorical
+    result["isrequired"]    = colschema.isrequired
+    result["isunique"]      = colschema.isunique
+    result["validvalues"]   = format_validvalues(colschema.validvalues, colschema.valueorder)
+    if !isnothing(colschema.parser_as_supplied)
+        result["parser"] = colschema.parser_as_supplied
+    end
+    result
+end
+
+format_validvalues(vv::DataType, valueorder)      = string(vv)
+format_validvalues(vv::AbstractRange, valueorder) = string(vv)
+format_validvalues(vv::Set, valueorder::Nothing)  = sort!([x for x in vv])
+format_validvalues(vv::Set, valueorder::Vector)   = valueorder
+
 end
diff --git a/src/types.jl b/src/types.jl
index c9880ae..9ad72c2 100644
--- a/src/types.jl
+++ b/src/types.jl
@@ -6,6 +6,7 @@ using Dates
 using Parsers
 
 import Base.parse  # For extending Base.parse to Base.parse(s::ColumnSchema, val)
+import Base.==
 
 using ..handle_validvalues
 
@@ -19,20 +20,21 @@ mutable struct ColumnSchema
     validvalues::Union{DataType, <:AbstractRange, <:Set}             # Either the full range of the data type or a user-supplied restriction.
     valueorder::Union{DataType, <:AbstractRange, <:Vector, Nothing}  # If iscategorical, valueorder specifies the ordering of categories. Else nothing.
     parser::Function      # outputvalue = parser(inputvalue)
+    parser_as_supplied::Union{Dict, Nothing}  # Internal use only; for writing the parser to disk in writeschema.
 
-    function ColumnSchema(name, description, datatype, iscategorical, isrequired, isunique, validvalues, valueorder, parser)
+    function ColumnSchema(name, description, datatype, iscategorical, isrequired, isunique, validvalues, valueorder, parser, parser_as_supplied)
         # Ensure eltyp and validvalues are consistent with each other
         tp_validvals = get_datatype(validvalues)
         datatype != tp_validvals && error("Column :$(name). Type of valid values ($(tp_validvals)) does not match that of eltype ($(datatype)).")
-        new(Symbol(name), description, datatype, iscategorical, isrequired, isunique, validvalues, valueorder, parser)
+        new(Symbol(name), description, datatype, iscategorical, isrequired, isunique, validvalues, valueorder, parser, parser_as_supplied)
     end
 end
 
-function ColumnSchema(name, description, datatype, iscategorical, isrequired, isunique, validvalues)
+function ColumnSchema(name, description, datatype, iscategorical, isrequired, isunique, validvalues, parser_as_supplied=nothing)
     valueorder  = iscategorical ? validvalues : nothing
     validvalues = validvalues isa Vector ? Set(validvalues) : validvalues
     parser      = constructparser(nothing, nothing, nothing, datatype)
-    ColumnSchema(name, description, datatype, iscategorical, isrequired, isunique, validvalues, valueorder, parser)
+    ColumnSchema(name, description, datatype, iscategorical, isrequired, isunique, validvalues, valueorder, parser, parser_as_supplied)
 end
 
 function ColumnSchema(d::Dict)
@@ -53,18 +55,21 @@ function ColumnSchema(d::Dict)
     valueorder  = parse_validvalues(parser, datatype, d["validvalues"])
     validvalues = valueorder isa Vector ? Set(valueorder) : valueorder
     valueorder  = iscategorical ? valueorder : nothing
-    ColumnSchema(name, description, datatype, iscategorical, isrequired, isunique, validvalues, valueorder, parser)
+    parser_as_supplied = haskey(d, "parser") ? d["parser"] : nothing
+    ColumnSchema(name, description, datatype, iscategorical, isrequired, isunique, validvalues, valueorder, parser, parser_as_supplied)
 end
 
 function constructparser(func, args, kwargs, returntype)
     # Special cases
-    if ((func == Date) || (isnothing(func) && returntype == Date)) && !isnothing(args) && length(args) == 1
-        df = DateFormat(args[1])
-        return (x) -> try Date(x, df) catch e missing end
+    if (isnothing(func) && returntype === Date) || (!isnothing(func) && func isa DataType && func === Date)
+        if !isnothing(args) && length(args) == 1
+            df = DateFormat(args[1])
+            return (x) -> try Date(x, df) catch e missing end
+        end
     end
 
     # General cases
-    if func isa DataType || isnothing(func)
+    if isnothing(func) || func isa DataType
         opts = isnothing(kwargs) ? Parsers.Options() : Parsers.Options(kwargs...)
         function closure(val)
             len = val isa IO ? 0 : sizeof(val)  # Use default pos=1
@@ -81,6 +86,19 @@ function constructparser(func, args, kwargs, returntype)
     error("Invalid specification of the parser.")
 end
 
+function ==(cs1::ColumnSchema, cs2::ColumnSchema)
+    cs1.name          !== cs2.name          && return false
+    cs1.description   != cs2.description    && return false
+    cs1.datatype      !== cs2.datatype      && return false
+    cs1.iscategorical !== cs2.iscategorical && return false
+    cs1.isrequired    !== cs2.isrequired    && return false
+    cs1.isunique      !== cs2.isunique      && return false
+    cs1.validvalues   != cs2.validvalues    && return false
+    cs1.valueorder    != cs2.valueorder     && return false
+    cs1.parser_as_supplied != cs2.parser_as_supplied && return false
+    true
+end
+
 ################################################################################
 struct TableSchema
     name::Symbol
@@ -113,24 +131,18 @@ function TableSchema(d::Dict)
     name        = Symbol(d["name"])
     description = d["description"]
     pk          = d["primarykey"]  # String or Vector{String}
-    primarykey  = typeof(pk) == String ? [Symbol(pk)] : [Symbol(colname) for colname in pk]
+    primarykey  = pk isa String ? [Symbol(pk)] : [Symbol(colname) for colname in pk]
     columns     = d["columns"]
     columnorder = fill(Symbol("x"), size(columns, 1))
     colname2colschema = Dict{Symbol, ColumnSchema}()
-    i = 0
-    for colname2schema in columns
-        for (colname, colschema) in colname2schema
-            i += 1
-            columnorder[i]    = Symbol(colname)
-            colschema["name"] = columnorder[i]
-            colname2colschema[columnorder[i]] = ColumnSchema(colschema)
-        end
+    for (i, d2) in enumerate(columns)
+        columnorder[i] = Symbol(d2["name"])
+        colname2colschema[columnorder[i]] = ColumnSchema(d2)
     end
     intrarow_constraints = construct_intrarow_constraints(d)
     TableSchema(name, description, colname2colschema, columnorder, primarykey, intrarow_constraints)
 end
 
-
 function construct_intrarow_constraints(d::Dict)
     !haskey(d, "intrarow_constraints") && return Tuple{String, Function}[]
     d = d["intrarow_constraints"]
@@ -146,6 +158,33 @@ function construct_intrarow_constraints(d::Dict)
     result
 end
 
+function ==(ts1::TableSchema, ts2::TableSchema)
+    ts1.name !== ts2.name && return false
+    ts1.description != ts2.description && return false
+    length(ts1.colname2colschema) != length(ts2.colname2colschema) && return false
+    for (colname1, colschema1) in ts1.colname2colschema
+        !haskey(ts2.colname2colschema, colname1) && return false
+        colschema2 = ts2.colname2colschema[colname1]
+        colschema1 != colschema2 && return false
+    end
+    length(ts1.columnorder) != length(ts2.columnorder) && return false
+    for (i, colname1) in enumerate(ts1.columnorder)
+        colname2 = ts2.columnorder[i]
+        colname1 !== colname2 && return false
+    end
+    length(ts1.primarykey) != length(ts2.primarykey) && return false
+    for (i, colname1) in enumerate(ts1.primarykey)
+        colname2 = ts2.primarykey[i]
+        colname1 !== colname2 && return false
+    end
+    length(ts1.intrarow_constraints) != length(ts2.intrarow_constraints) && return false
+    for (i, cname_func1) in enumerate(ts1.intrarow_constraints)
+        cname_func2 = ts2.intrarow_constraints[i]
+        cname_func1[1] != cname_func2[1] && return false
+        cname_func1[2] != cname_func2[2] && return false
+    end
+    true
+end
 
 ################################################################################
 struct Schema
@@ -169,4 +208,16 @@ function Schema(d::Dict)
     Schema(name, description, tables)
 end
 
+function ==(s1::Schema, s2::Schema)
+    s1.name !== s2.name && return false
+    s1.description != s2.description && return false
+    length(s1.tables) != length(s2.tables) && return false
+    for (tablename1, tableschema1) in s1.tables
+        !haskey(s2.tables, tablename1) && return false
+        tableschema2 = s2.tables[tablename1]
+        tableschema1 != tableschema2 && return false
+    end
+    true
+end
+
 end
diff --git a/test/schemata/fever.toml b/test/schemata/fever.toml
new file mode 100644
index 0000000..ef3cd31
--- /dev/null
+++ b/test/schemata/fever.toml
@@ -0,0 +1,12 @@
+name = "fever"
+description = "Fever schema"
+
+[tables.mytable]
+description = "My table"
+primarykey  = "patientid"
+columns = [
+    {name = "patientid", description = "Patient ID", datatype = "UInt", validvalues = "UInt", iscategorical = false, isrequired = true, isunique = true},
+    {name = "age", description = "Age (years)", datatype = "Int", validvalues = "Int", iscategorical = false, isrequired = true, isunique = false},
+    {name = "dose", description = "Dose size", datatype = "String", validvalues = ["small", "medium", "large"], iscategorical = true, isrequired = true, isunique = false},
+    {name = "fever", description = "Had fever", datatype = "Bool", validvalues = "Bool", iscategorical = true, isrequired = true, isunique = false}
+]
\ No newline at end of file
diff --git a/test/schemata/fever.yaml b/test/schemata/fever.yaml
deleted file mode 100644
index 7a74efb..0000000
--- a/test/schemata/fever.yaml
+++ /dev/null
@@ -1,35 +0,0 @@
-name: fever
-description: "Fever schema"
-tables:
-  mytable:
-    description: "My table"
-    primarykey: patientid
-    columns:
-    - patientid:
-        description: Patient ID
-        datatype: UInt
-        iscategorical: false
-        isrequired: true
-        isunique: true
-        validvalues: UInt
-    - age:
-        description: Age (years)
-        datatype: Int
-        iscategorical: false
-        isrequired: true
-        isunique: false
-        validvalues: Int
-    - dose:
-        description: Dose size
-        datatype: String
-        iscategorical: true
-        isrequired: true
-        isunique: false
-        validvalues: ["small", "medium", "large"]
-    - fever:
-        description: Had fever
-        datatype: Bool
-        iscategorical: true
-        isrequired: true
-        isunique: false
-        validvalues: Bool
diff --git a/test/schemata/row_constraints.toml b/test/schemata/row_constraints.toml
new file mode 100644
index 0000000..e33f1e7
--- /dev/null
+++ b/test/schemata/row_constraints.toml
@@ -0,0 +1,12 @@
+name = "constraints"
+description = "Date constraints"
+
+[tables.dates]
+description = "Table with date constraints"
+primarykey  = "patientid"
+intrarow_constraints = {"birth date before marriage date" = "r[:dob] < r[:date_of_marriage]"}
+columns = [
+    {name="patientid", description = "Patient ID", datatype = "UInt", validvalues = "UInt", iscategorical = false, isrequired = true, isunique = true},
+    {name="dob", description = "Date of birth", datatype = "Date", validvalues = "Date", iscategorical = false, isrequired = true, isunique = false},
+    {name="date_of_marriage", description = "Date of marriage", datatype = "Date", validvalues = "Date", iscategorical = false, isrequired = false, isunique = false}
+]
\ No newline at end of file
diff --git a/test/schemata/row_constraints.yaml b/test/schemata/row_constraints.yaml
deleted file mode 100644
index aae681c..0000000
--- a/test/schemata/row_constraints.yaml
+++ /dev/null
@@ -1,29 +0,0 @@
-name: constraints
-description: "Date constraints"
-tables:
-  dates:
-    description: "Table with date constraints"
-    primarykey: patientid
-    intrarow_constraints: {birth date before marriage date: "r[:dob] < r[:date_of_marriage]"}
-    columns:
-    - patientid:
-        description: Patient ID
-        datatype: UInt
-        iscategorical: false
-        isrequired: true
-        isunique: true
-        validvalues: UInt
-    - dob:
-        description: Date of birth
-        datatype: Date
-        iscategorical: false
-        isrequired: true
-        isunique: false
-        validvalues: Date
-    - date_of_marriage:
-        description: Date of marriage
-        datatype: Date
-        iscategorical: false
-        isrequired: false
-        isunique: false
-        validvalues: Date
diff --git a/test/test_inmemory_tables.jl b/test/test_inmemory_tables.jl
index 6fd497c..7dba625 100644
--- a/test/test_inmemory_tables.jl
+++ b/test/test_inmemory_tables.jl
@@ -37,7 +37,7 @@ outdata, issues_in, issues_out = compare(ts, tbl)
 @test size(issues_in, 1) == 4
 
 # Modify data to comply with the schema
-categorical!(tbl, [:dose, :fever])  # Ensure :dose and :fever contain categorical data
+transform!(tbl, [:dose, :fever] .=> categorical, renamecols=false)  # Ensure :dose and :fever contain categorical data
 outdata, issues_in, issues_out = compare(ts, tbl)
 @test size(issues_in,  1) == 2
 @test size(issues_out, 1) == 0
@@ -69,10 +69,11 @@ push!(schema.tables[:mytable].columnorder, :zipcode)
 @test schema.tables[:mytable].colname2colschema[:zipcode] == zipcode
 
 # Write the updated schema to disk
-#schemafile = joinpath(dirname(pathof(Schemata)), "..", "test/schemata/fever_updated.yaml")
-#writeschema(schemafile, schema)
-#schema_from_disk = readschema(schemafile)
-#@test schema == schema_from_disk
+schemafile = joinpath(dirname(pathof(Schemata)), "..", "test/schemata/fever_updated.toml")
+writeschema(schemafile, schema)
+schema_from_disk = readschema(schemafile)
+rm(schemafile)
+@test schema == schema_from_disk
 
 # Add a corresponding (non-compliant) column to the data
 tbl[!, :zipcode] = ["11111", "22222", "33333", "NULL"];  # CSV file was supplied with "NULL" values, forcing eltype to be String.
@@ -103,7 +104,7 @@ end
 
 my_zdt_custom_parser(dttm::DateTime, tz::String) = ZonedDateTime(dttm, TimeZone(tz))
 
-# Dict for ColumnSchema constructor, obtained after reading yaml
+# Dict for ColumnSchema constructor, obtained after reading toml
 d = Dict("name"          => "zdt", "description" => "Test custom parser for TimeZones.ZonedDateTime",
          "datatype"      => "ZonedDateTime",
          "iscategorical" => false, "isrequired" => true, "isunique" => true,
@@ -150,7 +151,7 @@ outdata, issues_in, issues_out = compare(ts, tbl);
 ################################################################################
 # Test intra-row constraints
 function test_row_constraints()
-    filename = joinpath(dirname(pathof(Schemata)), "..", "test/schemata/row_constraints.yaml")
+    filename = joinpath(dirname(pathof(Schemata)), "..", "test/schemata/row_constraints.toml")
     schema   = readschema(filename)
     d = DataFrame(
                   patientid = UInt.([1,2,3]),
diff --git a/test/test_ondisk_tables.jl b/test/test_ondisk_tables.jl
index 193dae5..c384f5a 100644
--- a/test/test_ondisk_tables.jl
+++ b/test/test_ondisk_tables.jl
@@ -60,7 +60,7 @@ issues_in = DataFrame(CSV.File(issues_infile; delim='\t'))
 ################################################################################
 # Test intra-row constraints
 function test_row_constraints()
-    filename = joinpath(dirname(pathof(Schemata)), "..", "test/schemata/row_constraints.yaml")
+    filename = joinpath(dirname(pathof(Schemata)), "..", "test/schemata/row_constraints.toml")
     schema   = readschema(filename)
     indata   = DataFrame(
                   patientid = UInt.([1,2,3]),