Skip to content

Commit

Permalink
[WIP] Gracefully handle validating empty JSONL datasets
Browse files Browse the repository at this point in the history
Signed-off-by: Juan Cruz Viotti <[email protected]>
  • Loading branch information
jviotti committed Aug 20, 2024
1 parent 6ffc926 commit da72220
Show file tree
Hide file tree
Showing 30 changed files with 654 additions and 86 deletions.
2 changes: 1 addition & 1 deletion DEPENDENCIES
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
vendorpull https://github.com/sourcemeta/vendorpull dea311b5bfb53b6926a4140267959ae334d3ecf4
noa https://github.com/sourcemeta/noa 7e26abce7a4e31e86a16ef2851702a56773ca527
jsontoolkit https://github.com/sourcemeta/jsontoolkit 00251a4161434463c24bc18418e3ffd37f998f29
jsontoolkit https://github.com/sourcemeta/jsontoolkit 4d1dfef7be91ecadd810370b3d8a1d2e591bf574
hydra https://github.com/sourcemeta/hydra 3c53d3fdef79e9ba603d48470a508cc45472a0dc
6 changes: 3 additions & 3 deletions src/command_metaschema.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,9 @@ auto intelligence::jsonschema::cli::metaschema(
const auto dialect{sourcemeta::jsontoolkit::dialect(entry.second)};
assert(dialect.has_value());

const auto metaschema{
sourcemeta::jsontoolkit::metaschema(entry.second, custom_resolver)};
if (!cache.contains(dialect.value())) {
const auto metaschema{
sourcemeta::jsontoolkit::metaschema(entry.second, custom_resolver)};
const auto metaschema_template{sourcemeta::jsontoolkit::compile(
metaschema, sourcemeta::jsontoolkit::default_schema_walker,
custom_resolver, sourcemeta::jsontoolkit::default_schema_compiler)};
Expand All @@ -43,7 +43,7 @@ auto intelligence::jsonschema::cli::metaschema(
if (sourcemeta::jsontoolkit::evaluate(
cache.at(dialect.value()), entry.second,
sourcemeta::jsontoolkit::SchemaCompilerEvaluationMode::Fast,
pretty_evaluate_callback(error,
pretty_evaluate_callback(error, metaschema,
sourcemeta::jsontoolkit::empty_pointer))) {
log_verbose(options)
<< entry.first.string()
Expand Down
2 changes: 1 addition & 1 deletion src/command_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ auto intelligence::jsonschema::cli::test(
schema_template,
get_data(test_case, entry.first.parent_path(), verbose),
sourcemeta::jsontoolkit::SchemaCompilerEvaluationMode::Fast,
pretty_evaluate_callback(error, {"$ref"}))};
pretty_evaluate_callback(error, schema.value(), {"$ref"}))};

std::ostringstream test_case_description;
if (test_case.defines("description")) {
Expand Down
8 changes: 6 additions & 2 deletions src/command_validate.cc
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ auto intelligence::jsonschema::cli::validate(
schema_template, instance,
sourcemeta::jsontoolkit::SchemaCompilerEvaluationMode::Fast,
pretty_evaluate_callback(
error, sourcemeta::jsontoolkit::empty_pointer));
error, instance, sourcemeta::jsontoolkit::empty_pointer));
}

if (subresult) {
Expand Down Expand Up @@ -117,6 +117,10 @@ auto intelligence::jsonschema::cli::validate(
// For producing better error messages
throw sourcemeta::jsontoolkit::FileParseError(instance_path, error);
}

if (index == 0) {
log_verbose(options) << "warning: The JSONL file is empty\n";
}
} else {
const auto instance{sourcemeta::jsontoolkit::from_file(instance_path)};
std::ostringstream error;
Expand All @@ -139,7 +143,7 @@ auto intelligence::jsonschema::cli::validate(
subresult = sourcemeta::jsontoolkit::evaluate(
schema_template, instance,
sourcemeta::jsontoolkit::SchemaCompilerEvaluationMode::Fast,
pretty_evaluate_callback(error,
pretty_evaluate_callback(error, instance,
sourcemeta::jsontoolkit::empty_pointer));
}

Expand Down
12 changes: 8 additions & 4 deletions src/utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -176,23 +176,27 @@ auto parse_options(const std::span<const std::string> &arguments,
}

auto pretty_evaluate_callback(std::ostringstream &output,
const sourcemeta::jsontoolkit::JSON &instance,
const sourcemeta::jsontoolkit::Pointer &base)
-> sourcemeta::jsontoolkit::SchemaCompilerEvaluationCallback {
output << "error: Schema validation failure\n";
return [&output, &base](
return [&output, &instance, &base](
const sourcemeta::jsontoolkit::SchemaCompilerEvaluationType,
const bool result,
const sourcemeta::jsontoolkit::SchemaCompilerTemplate::value_type
&step,
const sourcemeta::jsontoolkit::Pointer &evaluate_path,
const sourcemeta::jsontoolkit::Pointer &instance_location,
const sourcemeta::jsontoolkit::JSON &,
const sourcemeta::jsontoolkit::JSON &) -> void {
const sourcemeta::jsontoolkit::JSON &annotation) -> void {
if (result) {
return;
}

output << " " << sourcemeta::jsontoolkit::describe(step) << "\n";
output << " "
<< sourcemeta::jsontoolkit::describe(result, step, evaluate_path,
instance_location, instance,
annotation)
<< "\n";
output << " at instance location \"";
sourcemeta::jsontoolkit::stringify(instance_location, output);
output << "\"\n";
Expand Down
1 change: 1 addition & 0 deletions src/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ auto for_each_json(const std::vector<std::string> &arguments,
std::pair<std::filesystem::path, sourcemeta::jsontoolkit::JSON>>;

auto pretty_evaluate_callback(std::ostringstream &,
const sourcemeta::jsontoolkit::JSON &,
const sourcemeta::jsontoolkit::Pointer &)
-> sourcemeta::jsontoolkit::SchemaCompilerEvaluationCallback;

Expand Down
2 changes: 2 additions & 0 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ add_jsonschema_test_unix(validate/fail_draft7)
add_jsonschema_test_unix(validate/fail_2019_09)
add_jsonschema_test_unix(validate/fail_2020_12)
add_jsonschema_test_unix(validate/pass_jsonl)
add_jsonschema_test_unix(validate/pass_jsonl_empty)
add_jsonschema_test_unix(validate/pass_jsonl_empty_verbose)
add_jsonschema_test_unix(validate/pass_jsonl_verbose)
add_jsonschema_test_unix(validate/fail_jsonl_invalid_entry)
add_jsonschema_test_unix(validate/fail_jsonl_one)
Expand Down
2 changes: 1 addition & 1 deletion test/test/fail_multi_resolve.sh
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ $(realpath "$TMP")/test.json:
2/4 FAIL First failure
error: Schema validation failure
The target document is expected to be of the given type
The value was expected to be of type string but it was of type object
at instance location ""
at evaluate path "/type"
Expand Down
2 changes: 1 addition & 1 deletion test/test/fail_multi_resolve_verbose.sh
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ $(realpath "$TMP")/test.json:
2/4 FAIL First failure
error: Schema validation failure
The target document is expected to be of the given type
The value was expected to be of type string but it was of type object
at instance location ""
at evaluate path "/type"
Expand Down
4 changes: 2 additions & 2 deletions test/test/fail_true_resolve_fragment.sh
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,10 @@ $(realpath "$TMP")/test.json:
1/1 FAIL Fail
error: Schema validation failure
The target document is expected to be of the given type
The value was expected to be of type string but it was of type object
at instance location ""
at evaluate path "/type"
Mark the current position of the evaluation process for future jumps
The object value was expected to validate against the statically referenced schema
at instance location ""
at evaluate path ""
EOF
Expand Down
2 changes: 1 addition & 1 deletion test/test/fail_true_single_resolve.sh
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ $(realpath "$TMP")/test.json:
2/3 FAIL Unexpected
error: Schema validation failure
The target document is expected to be of the given type
The value was expected to be of type string but it was of type object
at instance location ""
at evaluate path "/type"
EOF
Expand Down
2 changes: 1 addition & 1 deletion test/test/fail_true_single_resolve_verbose.sh
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ $(realpath "$TMP")/test.json:
2/3 FAIL Unexpected
error: Schema validation failure
The target document is expected to be of the given type
The value was expected to be of type string but it was of type object
at instance location ""
at evaluate path "/type"
Expand Down
4 changes: 2 additions & 2 deletions test/validate/fail_2019_09.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,10 @@ test "$CODE" = "1" || exit 1
cat << EOF > "$TMP/expected.txt"
fail: $(realpath "$TMP")/instance.json
error: Schema validation failure
The target document is expected to be of the given type
The value was expected to be of type string but it was of type integer
at instance location "/foo"
at evaluate path "/properties/foo/type"
The target is expected to match all of the given assertions
The object value was expected to validate against the single defined property subschema
at instance location ""
at evaluate path "/properties"
EOF
Expand Down
4 changes: 2 additions & 2 deletions test/validate/fail_2020_12.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,10 @@ test "$CODE" = "1" || exit 1
cat << EOF > "$TMP/expected.txt"
fail: $(realpath "$TMP")/instance.json
error: Schema validation failure
The target document is expected to be of the given type
The value was expected to be of type string but it was of type integer
at instance location "/foo"
at evaluate path "/properties/foo/type"
The target is expected to match all of the given assertions
The object value was expected to validate against the single defined property subschema
at instance location ""
at evaluate path "/properties"
EOF
Expand Down
4 changes: 2 additions & 2 deletions test/validate/fail_draft4.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,10 @@ test "$CODE" = "1" || exit 1
cat << EOF > "$TMP/expected.txt"
fail: $(realpath "$TMP")/instance.json
error: Schema validation failure
The target document is expected to be of the given type
The value was expected to be of type string but it was of type integer
at instance location "/foo"
at evaluate path "/properties/foo/type"
The target is expected to match all of the given assertions
The object value was expected to validate against the single defined property subschema
at instance location ""
at evaluate path "/properties"
EOF
Expand Down
4 changes: 2 additions & 2 deletions test/validate/fail_draft6.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,10 @@ test "$CODE" = "1" || exit 1
cat << EOF > "$TMP/expected.txt"
fail: $(realpath "$TMP")/instance.json
error: Schema validation failure
The target document is expected to be of the given type
The value was expected to be of type string but it was of type integer
at instance location "/foo"
at evaluate path "/properties/foo/type"
The target is expected to match all of the given assertions
The object value was expected to validate against the single defined property subschema
at instance location ""
at evaluate path "/properties"
EOF
Expand Down
4 changes: 2 additions & 2 deletions test/validate/fail_draft7.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,10 @@ test "$CODE" = "1" || exit 1
cat << EOF > "$TMP/expected.txt"
fail: $(realpath "$TMP")/instance.json
error: Schema validation failure
The target document is expected to be of the given type
The value was expected to be of type string but it was of type integer
at instance location "/foo"
at evaluate path "/properties/foo/type"
The target is expected to match all of the given assertions
The object value was expected to validate against the single defined property subschema
at instance location ""
at evaluate path "/properties"
EOF
Expand Down
2 changes: 1 addition & 1 deletion test/validate/fail_jsonl_all.sh
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ fail: $(realpath "$TMP")/instance.jsonl (entry #0)
}
error: Schema validation failure
The target document is expected to be of the given type
The value was expected to be of type array but it was of type object
at instance location ""
at evaluate path "/type"
EOF
Expand Down
2 changes: 1 addition & 1 deletion test/validate/fail_jsonl_all_verbose.sh
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ fail: $(realpath "$TMP")/instance.jsonl (entry #0)
}
error: Schema validation failure
The target document is expected to be of the given type
The value was expected to be of type array but it was of type object
at instance location ""
at evaluate path "/type"
EOF
Expand Down
2 changes: 1 addition & 1 deletion test/validate/fail_jsonl_one.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ fail: $(realpath "$TMP")/instance.jsonl (entry #1)
]
error: Schema validation failure
The target document is expected to be of the given type
The value was expected to be of type object but it was of type array
at instance location ""
at evaluate path "/type"
EOF
Expand Down
2 changes: 1 addition & 1 deletion test/validate/fail_jsonl_one_verbose.sh
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ fail: $(realpath "$TMP")/instance.jsonl (entry #1)
]
error: Schema validation failure
The target document is expected to be of the given type
The value was expected to be of type object but it was of type array
at instance location ""
at evaluate path "/type"
EOF
Expand Down
4 changes: 2 additions & 2 deletions test/validate/fail_many.sh
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,10 @@ test "$CODE" = "1" || exit 1
cat << EOF > "$TMP/expected.txt"
fail: $(realpath "$TMP")/instance_2.json
error: Schema validation failure
The target document is expected to be of the given type
The value was expected to be of type string but it was of type integer
at instance location "/foo"
at evaluate path "/properties/foo/type"
The target is expected to match all of the given assertions
The object value was expected to validate against the single defined property subschema
at instance location ""
at evaluate path "/properties"
EOF
Expand Down
4 changes: 2 additions & 2 deletions test/validate/fail_many_verbose.sh
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,10 @@ ok: $(realpath "$TMP")/instance_1.json
matches $(realpath "$TMP")/schema.json
fail: $(realpath "$TMP")/instance_2.json
error: Schema validation failure
The target document is expected to be of the given type
The value was expected to be of type string but it was of type integer
at instance location "/foo"
at evaluate path "/properties/foo/type"
The target is expected to match all of the given assertions
The object value was expected to validate against the single defined property subschema
at instance location ""
at evaluate path "/properties"
ok: $(realpath "$TMP")/instance_3.json
Expand Down
28 changes: 28 additions & 0 deletions test/validate/pass_jsonl_empty.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#!/bin/sh

set -o errexit
set -o nounset

TMP="$(mktemp -d)"
clean() { rm -rf "$TMP"; }
trap clean EXIT

cat << 'EOF' > "$TMP/schema.json"
{
"$schema": "http://json-schema.org/draft-04/schema#",
"properties": {
"foo": {
"type": "string"
}
}
}
EOF

touch "$TMP/instance.jsonl"

"$1" validate "$TMP/schema.json" "$TMP/instance.jsonl" 2> "$TMP/output.txt" 1>&2

cat << EOF > "$TMP/expected.txt"
EOF

diff "$TMP/output.txt" "$TMP/expected.txt"
30 changes: 30 additions & 0 deletions test/validate/pass_jsonl_empty_verbose.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/bin/sh

set -o errexit
set -o nounset

TMP="$(mktemp -d)"
clean() { rm -rf "$TMP"; }
trap clean EXIT

cat << 'EOF' > "$TMP/schema.json"
{
"$schema": "http://json-schema.org/draft-04/schema#",
"properties": {
"foo": {
"type": "string"
}
}
}
EOF

touch "$TMP/instance.jsonl"

"$1" validate "$TMP/schema.json" "$TMP/instance.jsonl" --verbose 2> "$TMP/output.txt" 1>&2

cat << EOF > "$TMP/expected.txt"
Interpreting input as JSONL: $(realpath "$TMP")/instance.jsonl
warning: The JSONL file is empty
EOF

diff "$TMP/output.txt" "$TMP/expected.txt"
21 changes: 20 additions & 1 deletion vendor/jsontoolkit/src/jsonl/iterator.cc

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit da72220

Please sign in to comment.