From 3deb4c939a5f2fc7c0183ca134635aa4a35a98e4 Mon Sep 17 00:00:00 2001 From: Steve McCanne Date: Sat, 26 Oct 2024 10:52:33 -0700 Subject: [PATCH] first cut at ZSON to Super JSON rename in docs (#5376) This commit renames ZSON to Super JSON throughout the docs. The code and tests have not been updated, e.g., `-f zson` has not yet been changed to `-f jsup`. --- CHANGELOG.md | 6 +- README.md | 18 +- compiler/ztests/load.yaml | 6 +- docs/README.md | 8 +- docs/commands/zed.md | 4 +- docs/commands/zq.md | 8 +- docs/formats/README.md | 4 +- docs/formats/{zson.md => jsup.md} | 64 +++-- docs/formats/vng.md | 4 +- docs/formats/zjson.md | 48 ++-- docs/formats/zng.md | 22 +- docs/integrations/fluentd.md | 2 +- .../zeek/data-type-compatibility.md | 30 +-- .../zeek/reading-zeek-log-formats.md | 2 +- docs/integrations/zeek/shaping-zeek-json.md | 4 +- docs/language/conventions.md | 2 +- docs/language/data-types.md | 60 ++--- docs/language/functions/typename.md | 4 +- docs/language/functions/typeof.md | 2 +- docs/language/pipeline-model.md | 56 ++--- docs/tutorials/schools.md | 230 +++++++++--------- docs/tutorials/zq.md | 8 +- testdata/edu/README.md | 16 +- testdata/edu/{schools.zson => schools.jsup} | 0 .../edu/{testscores.zson => testscores.jsup} | 0 testdata/edu/{webaddrs.zson => webaddrs.jsup} | 0 26 files changed, 303 insertions(+), 305 deletions(-) rename docs/formats/{zson.md => jsup.md} (90%) rename testdata/edu/{schools.zson => schools.jsup} (100%) rename testdata/edu/{testscores.zson => testscores.jsup} (100%) rename testdata/edu/{webaddrs.zson => webaddrs.jsup} (100%) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9c95c715a9..dfd6ac033a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -118,7 +118,7 @@ * The [Zed Language Overview docs](docs/language/overview.md) have been split into multiple sections (#4576) * Add support for [user-defined operators](docs/language/statements.md#operator-statements) (#4417, #4635, #4646, #4644, #4663, #4674, #4698, #4702, #4716) * Add experimental support to the [`get` operator](docs/language/operators/get.md) for customized methods, headers, and body (#4572) -* Allow float decorators on integers in [ZSON](docs/formats/zson.md) (#4654) +* Allow float decorators on integers in [ZSON](docs/formats/jsup.md) (#4654) * The [shaping docs](docs/language/shaping.md) have been expanded with a new section on [error handling](docs/language/shaping.md#error-handling) (#4686) * `zq` no longer attaches positional command line file inputs directly to [`join`](docs/language/operators/join.md) inputs (use [`file`](docs/language/operators/file.md) within a Zed program instead) (#4689) * [Zeek](https://zeek.org/)-related docs have been moved to the Integrations area of the [Zed docs site](https://zed.brimdata.io/docs) (#4694, #4696) @@ -246,7 +246,7 @@ * Revamped [`zed` command](docs/commands/zed.md) * New Zed lake format (see #3634 for a migration script) * New version of the [ZNG format](docs/formats/zng.md) (with read-only support for the previous version) -* New version of the [ZSON format](docs/formats/zson.md) +* New version of the [ZSON format](docs/formats/jsup.md) ## v0.33.0 @@ -587,7 +587,7 @@ questions. * zq: Fix an issue where returned errors could cause a panic due to type mismatches (#1720, #1727, #1728, #1740, #1773) * python: Fix an issue where the [Python client](https://medium.com/brim-securitys-knowledge-funnel/visualizing-ip-traffic-with-brim-zeek-and-networkx-3844a4c25a2f) did not generate an error when `zqd` was absent (#1711) * zql: Allow the `len()` function to work on `ip` and `net` types (#1725) -* ZSON: Add a [draft specification](docs/formats/zson.md) of the new ZSON format (#1715, #1735, #1741, #1765) +* ZSON: Add a [draft specification](docs/formats/jsup.md) of the new ZSON format (#1715, #1735, #1741, #1765) * zng: Add support for marshaling of `time` values (#1743) * zar: Fix an issue where a `couldn't read trailer` failure was observed during a `zar zq` query (#1748) * zar: Fix an issue where `zar import` of a 14 GB data set triggered a SEGV (#1766) diff --git a/README.md b/README.md index 624d6484d6..b26a378aad 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # SuperDB [![Tests][tests-img]][tests] [![GoPkg][gopkg-img]][gopkg] -SuperDB is a new analytics database that supports relational tables and JSON +SuperDB is a new analytics database that supports relational tables and JSON on an equal footing. It shines when it comes to data wrangling where you need to explore or process large eclectic data sets. It's also pretty decent at analytics and @@ -23,7 +23,7 @@ system for semi-structured data, all data handled by SuperDB (e.g., JSON, CSV, Parquet files, Arrow streams, relational tables, etc) is automatically massaged into [super-structured data](https://zed.brimdata.io/docs/formats/#2-zed-a-super-structured-pattern) form. This super-structured data is then processed by a runtime that simultaneously -supports the statically-typed relational model and the dynamically-typed +supports the statically-typed relational model and the dynamically-typed JSON data model in a unified compute engine. ## SuperSQL @@ -39,7 +39,7 @@ FROM 'https://data.gharchive.org/2015-01-01-15.json.gz' GROUP BY user ORDER BY len(repo) DESC LIMIT 5 |> FORK ( - => FROM f"https://api.github.com/users/${user}" + => FROM f"https://api.github.com/users/${user}" |> SELECT VALUE {user:login,created_at:time(created_at)} => PASS ) @@ -48,10 +48,10 @@ FROM 'https://data.gharchive.org/2015-01-01-15.json.gz' ## Super JSON -Super-structured data is strongly typed and "polymorphic": any value can take on any type +Super-structured data is strongly typed and "polymorphic": any value can take on any type and sequences of data need not all conform to a predefined schema. To this end, SuperDB extends the JSON format to support super-structured data in a format called -[Super JSON](https://zed.brimdata.io/docs/formats/zson) where all JSON values +[Super JSON](https://zed.brimdata.io/docs/formats/next/jsup) where all JSON values are also Super JSON values. Similarly, the [Super Binary](https://zed.brimdata.io/docs/formats/zng) format is an efficient binary representation of Super JSON (a bit like Avro) and the @@ -78,7 +78,7 @@ using the `super db` sub-commands. ## Piped Query Syntax -The long-term goal for SuperDB's SQL syntax (SuperSQL) is to be Postgres-compatible and interoperate +The long-term goal for SuperDB's SQL syntax (SuperSQL) is to be Postgres-compatible and interoperate with BI tools though this is currently a roadmap item. At the same time, the project seeks to forge new ground on the usability of SQL for data exploration. To this end, SuperSQL supports the @@ -86,15 +86,15 @@ SuperSQL supports the of GoogleSQL, recently described in their [VLDB 2024 paper](https://research.google/pubs/sql-has-problems-we-can-fix-them-pipe-syntax-in-sql/). -In addition to the GoogleSQL syntax, SuperSQL includes additional pipeline -operators to enhance usability, e.g., for search, for traversing +In addition to the GoogleSQL syntax, SuperSQL includes additional pipeline +operators to enhance usability, e.g., for search, for traversing highly nested JSON, for data shaping, etc. To facilitate real-time, data exploration use cases, SuperDB supports an abbreviated form of SuperSQL called [SuperPipe](https://zed.brimdata.io/docs/language). -SuperPipe provides a large number of shortcuts when typing interactive +SuperPipe provides a large number of shortcuts when typing interactive queries, e.g., implied group-by clauses, dropping keywords, implied keyword searches, and so forth. Even though SuperPipe is simply a short-hand form SuperSQL, it sort of looks like the pipeline-style diff --git a/compiler/ztests/load.yaml b/compiler/ztests/load.yaml index c8c516eb1f..e11296a329 100644 --- a/compiler/ztests/load.yaml +++ b/compiler/ztests/load.yaml @@ -2,13 +2,13 @@ script: | export SUPER_DB_LAKE=test super db init -q super db create -q samples - super db load -q -use samples schools.zson + super db load -q -use samples schools.jsup super db create -q Orange super db query -z 'from samples | County=="Orange" | load Orange@main author "Diane"' | sed -E 's/[0-9a-zA-Z]{42}/xxx/' inputs: - - name: schools.zson - source: ../../testdata/edu/schools.zson + - name: schools.jsup + source: ../../testdata/edu/schools.jsup outputs: - name: stdout data: | diff --git a/docs/README.md b/docs/README.md index 6a92dae2da..195ede153e 100644 --- a/docs/README.md +++ b/docs/README.md @@ -26,7 +26,7 @@ For a non-technical user, SuperDB is as easy to use as web search while for a technical user, SuperDB exposes its technical underpinnings in a gradual slope, providing as much detail as desired, packaged up in the easy-to-understand -[Super JSON data format](formats/zson.md) and +[Super JSON data format](formats/jsup.md) and [SuperPipe language](language/README.md). While `super` and its accompanying data formats are production quality, the project's @@ -39,9 +39,9 @@ a number of different elements of the system: * The [super data model](formats/zed.md) is the abstract definition of the data types and semantics that underlie the super-structured data formats. * The [super data formats](formats/README.md) are a family of -[human-readable (Super JSON, SUP)](formats/zson.md), -[sequential (Binary Super JSON, SUPZ)](formats/zng.md), and -[columnar (Super Parquet, SPAR)](formats/vng.md) formats that all adhere to the +[human-readable (Super JSON, JSUP)](formats/jsup.md), +[sequential (Super Binary, BSUP)](formats/zng.md), and +[columnar (Super Columnar, CSUP)](formats/vng.md) formats that all adhere to the same abstract super data model. * The [SuperPipe language](language/README.md) is the system's pipeline language for performing queries, searches, analytics, transformations, or any of the above combined together. diff --git a/docs/commands/zed.md b/docs/commands/zed.md index af3159c432..619066ecb3 100644 --- a/docs/commands/zed.md +++ b/docs/commands/zed.md @@ -529,7 +529,7 @@ The `date` field here is used by the Zed lake system to do time travel through the branch and pool history, allowing you to see the state of branches at any time in their commit history. -Arbitrary metadata expressed as any [ZSON value](../formats/zson.md) +Arbitrary metadata expressed as any [ZSON value](../formats/jsup.md) may be attached to a commit via the `-meta` flag. This allows an application or user to transactionally commit metadata alongside committed data for any purpose. This approach allows external applications to implement arbitrary @@ -601,7 +601,7 @@ If the `-monitor` option is specified and the lake is [located](#locating-the-la via network connection, `zed manage` will run continuously and perform updates as needed. By default a check is performed once per minute to determine if updates are necessary. The `-interval` option may be used to specify an -alternate check frequency in [duration format](../formats/zson.md#23-primitive-values). +alternate check frequency in [duration format](../formats/jsup.md#23-primitive-values). If `-monitor` is not specified, a single maintenance pass is performed on the lake. diff --git a/docs/commands/zq.md b/docs/commands/zq.md index 91188a1e5d..46f5794064 100644 --- a/docs/commands/zq.md +++ b/docs/commands/zq.md @@ -40,7 +40,7 @@ tends to be the most space-efficient and most performant. ZNG has efficiency si and [Protocol Buffers](https://developers.google.com/protocol-buffers) but its comprehensive [Zed type system](../formats/zed.md) obviates the need for schema specification or registries. -Also, the [ZSON](../formats/zson.md) format is human-readable and entirely one-to-one with ZNG +Also, the [ZSON](../formats/jsup.md) format is human-readable and entirely one-to-one with ZNG so there is no need to represent non-readable formats like Avro or Protocol Buffers in a clunky JSON encapsulated form. @@ -104,7 +104,7 @@ Note here that the query `1+1` [implies](../language/pipeline-model.md#implied-o | `zeek` | yes | [Zeek Logs](https://docs.zeek.org/en/master/logs/index.html) | | `zjson` | yes | [ZJSON - Zed over JSON](../formats/zjson.md) | | `zng` | yes | [ZNG - Binary Row Format](../formats/zng.md) | -| `zson` | yes | [ZSON - Human-readable Format](../formats/zson.md) | +| `zson` | yes | [ZSON - Human-readable Format](../formats/jsup.md) | The input format is typically [detected automatically](#auto-detection) and the formats for which "Auto" is "yes" in the table above support _auto-detection_. @@ -146,7 +146,7 @@ would produce this output in the default ZSON format ### ZSON-JSON Auto-detection -Since [ZSON](../formats/zson.md) is a superset of JSON, `zq` must be careful in whether it +Since [ZSON](../formats/jsup.md) is a superset of JSON, `zq` must be careful in whether it interprets input as ZSON as JSON. While you can always clarify your intent with the `-i zson` or `-i json`, `zq` attempts to "just do the right thing" when you run it with JSON vs. ZSON. @@ -190,7 +190,7 @@ typically omit quotes around field names. | `zeek` | [Zeek Logs](https://docs.zeek.org/en/master/logs/index.html) | | `zjson` | [ZJSON - Zed over JSON](../formats/zjson.md) | | `zng` | [ZNG - Binary Row Format](../formats/zng.md) | -| `zson` | [ZSON - Human-readable Format](../formats/zson.md) | +| `zson` | [ZSON - Human-readable Format](../formats/jsup.md) | The output format defaults to either ZSON or ZNG and may be specified with the `-f` option. diff --git a/docs/formats/README.md b/docs/formats/README.md index fddb3b78e2..47cd8dc783 100644 --- a/docs/formats/README.md +++ b/docs/formats/README.md @@ -8,7 +8,7 @@ > providing a unified approach to row, columnar, and human-readable formats. Together these > represent a superset of both the dataframe/table model of relational systems and the > semi-structured model that is used ubiquitously in development as JSON and by NoSQL -> data stores. The Super JSON spec has [a few examples](zson.md#3-examples). +> data stores. The Super JSON spec has [a few examples](jsup.md#3-examples). ## 1. Background @@ -266,7 +266,7 @@ A set of companion documents define a family of tightly integrated serialization formats that all adhere to the same super data model, providing a unified approach to row, columnar, and human-readable formats: -* [Super JSON](zson.md) is a human-readable format for super-structured data. All JSON +* [Super JSON](jsup.md) is a human-readable format for super-structured data. All JSON documents are Super JSON values as the Super JSON format is a strict superset of the JSON syntax. * [Super Binary](zng.md) is a row-based, binary representation somewhat like Avro but leveraging the super data model to represent a sequence of arbitrarily-typed diff --git a/docs/formats/zson.md b/docs/formats/jsup.md similarity index 90% rename from docs/formats/zson.md rename to docs/formats/jsup.md index 424527713e..9bc28b5baa 100644 --- a/docs/formats/zson.md +++ b/docs/formats/jsup.md @@ -1,25 +1,26 @@ --- sidebar_position: 3 -sidebar_label: ZSON +sidebar_label: Super JSON --- -# ZSON Specification +# Super JSON Specification ## 1. Introduction -ZSON is the human-readable, text-based serialization format of -the super-structured [Zed data model](zed.md). +Super JSON is the human-readable, text-based serialization format of +the [super data model](zed.md). -ZSON builds upon the elegant simplicity of JSON with "type decorators". +Super JSON builds upon the elegant simplicity of JSON with "type decorators". Where the type of a value is not implied by its syntax, a parenthesized type decorator is appended to the value thus establishing a well-defined -type for every value expressed in ZSON text. +type for every value expressed in source text. -ZSON is also a superset of JSON in that all JSON documents are valid ZSON values. +Super JSON is also a superset of JSON in that all JSON documents are valid +Super JSON values. -## 2. The ZSON Format +## 2. The Suepr JSON Format -A ZSON text is a sequence of UTF-8 characters organized either as a bounded input +A Super JSON text is a sequence of UTF-8 characters organized either as a bounded input or an unbounded stream. The input text is organized as a sequence of one or more Zed values optionally @@ -29,11 +30,11 @@ treated as whitespace and ignored. All subsequent references to characters and strings in this section refer to the Unicode code points that result when the stream is decoded. -If a ZSON input includes data that is not valid UTF-8, the input is invalid. +If an input text includes data that is not valid UTF-8, the input is invalid. ### 2.1 Names -ZSON _names_ encode record fields, enum symbols, and named types. +Super JSON _names_ encode record fields, enum symbols, and named types. A name is either an _identifier_ or a [quoted string](#231-strings). Names are referred to as `` below. @@ -81,7 +82,7 @@ A decorator may also defined a temporary numeric reference of the form: Once defined, this numeric reference may then be used anywhere a named type is used but a named type is not created. -It is an error for the decorator to be type incompatible with its referenced value. +It is an error for the decorator to be type incompatible with its referenced value. Note that the `=` sigil here disambiguates between the case that a new type is defined, which may override a previous definition of a different type with the @@ -90,7 +91,7 @@ same name, from the case that an existing named type is merely decorating the va ### 2.3 Primitive Values The type names and format for -[Zed primitive](zed.md#1-primitive-types) values is as follows: +[primitive values](zed.md#1-primitive-types) is as follows: | Type | Value Format | |------------|---------------------------------------------------------------| @@ -224,7 +225,7 @@ record types as well as enum symbols. ### 2.4 Complex Values Complex values are built from primitive values and/or other complex values -and conform to the Zed data model's complex types: +and conform to the super data model's complex types: [record](zed.md#21-record), [array](zed.md#22-array), [set](zed.md#23-set), @@ -242,8 +243,8 @@ A record value has the form: ``` { : , : , ... } ``` -where `` is a [ZSON name](#21-names) and `` is -any optionally-decorated ZSON value inclusive of other records. +where `` is a [Super JSON name](#21-names) and `` is +any optionally-decorated value inclusive of other records. Each name/value pair is called a _field_. There may be zero or more fields. @@ -303,7 +304,7 @@ An enum value is indicated with the sigil `%` and has the form ``` % ``` -where the `` is [ZSON name](#21-names). +where the `` is [Super JSON name](#21-names). An enum value must appear in a context where the enum type is known, i.e., with an explicit enum type decorator or within a complex type where the @@ -322,7 +323,7 @@ An error value has the form: ``` error() ``` -where `` is any ZSON value. +where `` is any value. ### 2.5 Types @@ -335,7 +336,7 @@ A _record type_ has the form: ``` { : , : , ... } ``` -where `` is a [ZSON name](#21-names) and +where `` is a [Super JSON name](#21-names) and `` is any type. The order of the record fields is significant, @@ -378,7 +379,7 @@ An _enum type_ has the form: ``` enum( , , ... ) ``` -where `` is a [ZSON name](#21-names). +where `` is a [Super JSON name](#21-names). Each enum name must be unique and the order is not significant, e.g., enum type `enum(HEADS,TAILS)` is equal to type `enum(TAILS,HEADS)`. @@ -388,7 +389,7 @@ An _error type_ has the form: ``` error( ) ``` -where `` is the type of the underlying ZSON values wrapped as an error. +where `` is the type of the underlying values wrapped as an error. #### 2.5.8 Named Type @@ -422,14 +423,11 @@ The null value is represented by the string `null`. A value of any type can be null. It is up to an implementation to decide how external data structures map into and -out of null values of different types. Typically, a null value means either the -zero value or, in the case of record fields, an optional field whose -value is not present, though these semantics are not explicitly -defined by ZSON. +out of null values of different types. ## 3. Examples -The simplest ZSON value is a single value, perhaps a string like this: +The simplest Super JSON value is a single value, perhaps a string like this: ``` "hello, world" ``` @@ -441,17 +439,17 @@ A relational table might look like this: { city: "Broad Cove", state: "ME", population: 806 (uint32) } (=city_schema) { city: "Baton Rouge", state: "LA", population: 221599 (uint32) } (=city_schema) ``` -This ZSON text here depicts three record values. It defines a type called `city_schema` +The text here depicts three record values. It defines a type called `city_schema` and the inferred type of the `city_schema` has the signature: ``` { city:string, state:string, population:uint32 } ``` When all the values in a sequence have the same record type, the sequence -can be interpreted as a _table_, where the ZSON record values form the _rows_ +can be interpreted as a _table_, where record values form the _rows_ and the fields of the records form the _columns_. In this way, these three records form a relational table conforming to the schema `city_schema`. -In contrast, a ZSON text representing a semi-structured sequence of log lines +In contrast, text representing a semi-structured sequence of log lines might look like this: ``` { @@ -482,10 +480,10 @@ the value itself: The subsequent value defines a type called `access_list`. In this case, the `nets` field is an array of networks and illustrates the helpful range of -primitive types in ZSON. Note that the syntax here implies +primitive types. Note that the syntax here implies the type of the array, as it is inferred from the type of the elements. -Finally, there are four more values that show ZSON's efficacy for +Finally, there are four more values that show Super JSON's efficacy for representing metrics. Here, there are no type decorators as all of the field types are implied by their syntax, and hence, the top-level record type is implied. For instance, the `ts` field is an RFC 3339 date and time string, @@ -496,13 +494,13 @@ record type implied by each of the three variations of type of the `value` field ## 4. Grammar -Here is a left-recursive pseudo-grammar of ZSON. Note that not all +Here is a left-recursive pseudo-grammar of Super JSON. Note that not all acceptable inputs are semantically valid as type mismatches may arise. For example, union and enum values must both appear in a context that defines their type. ``` - = | | + = | | = . diff --git a/docs/formats/vng.md b/docs/formats/vng.md index 4c68fe9fe3..113b31f1a9 100644 --- a/docs/formats/vng.md +++ b/docs/formats/vng.md @@ -373,7 +373,7 @@ using the same tag within the union value. ### Hello, world -Start with this ZNG data (shown as human-readable [ZSON](zson.md)): +Start with this [Super JSON](jsup.md)): ``` {a:"hello",b:"world"} {a:"goodnight",b:"gracie"} @@ -381,7 +381,7 @@ Start with this ZNG data (shown as human-readable [ZSON](zson.md)): To convert to VNG format: ``` -super -f vng hello.zson > hello.vng +super -f vng hello.jsup > hello.vng ``` Segments in the VNG format would be laid out like this: diff --git a/docs/formats/zjson.md b/docs/formats/zjson.md index edb3f3c281..824ce13a0d 100644 --- a/docs/formats/zjson.md +++ b/docs/formats/zjson.md @@ -7,14 +7,14 @@ sidebar_label: ZJSON ## 1. Introduction -The [Zed data model](zed.md) +The [super data model](zed.md) is based on richly typed records with a deterministic field order, -as is implemented by the [ZSON](zson.md), [ZNG](zng.md), and [VNG](vng.md) formats. +as is implemented by the [Super JSON](jsup.md), [Super Binary](zng.md), and [Super Columnar](vng.md) formats. Given the ubiquity of JSON, it is desirable to also be able to serialize -Zed data into the JSON format. However, encoding Zed data values +super data into the JSON format. However, encoding super data values directly as JSON values would not work without loss of information. -For example, consider this Zed data as [ZSON](zson.md): +For example, consider this [Super JSON](jsup.md) data: ``` { ts: 2018-03-24T17:15:21.926018012Z, @@ -61,28 +61,28 @@ Also, it is at the whim of a JSON implementation whether or not the order of object keys is preserved. While JSON is well suited for data exchange of generic information, it is not -so appropriate for a [super-structured data model](./README.md#2-a-super-structured-pattern) -like Zed. That said, JSON can be used as an encoding format for Zed by mapping Zed data -onto a JSON-based protocol. This allows clients like web apps or -Electron apps to receive and understand Zed and, with the help of client +sufficient for the [super-structured data model](./README.md#2-zed-a-super-structured-pattern). +That said, JSON can be used as an encoding format for super data with another layer +of encoding on top of a JSON-based protocol. This allows clients like web apps or +Electron apps to receive and understand Super JSON and, with the help of client libraries like [zed-js](https://github.com/brimdata/zui/tree/main/packages/zed-js), -to manipulate the rich, structured Zed types that are implemented on top of +to manipulate the rich, structured Super JSON types that are implemented on top of the basic JavaScript types. In other words, because JSON objects do not have a deterministic field order nor does JSON in general have typing beyond the basics (i.e., strings, floating point numbers, -objects, arrays, and booleans), we decided to encode Zed data with -its embedded type model all in a layer above regular JSON. +objects, arrays, and booleans), Super JSON and +its embedded type model is layered on top of regular JSON. ## 2. The Format -The format for representing Zed in JSON is called ZJSON. -Converting ZSON, ZNG, or VNG to ZJSON and back results in a complete and -accurate restoration of the original Zed data. +The format for representing Super JSON data in JSON is called ZJSON. +Converting Super JSON, Super Binary, or Super Columnar to ZJSON and back results in a complete and +accurate restoration of the original super data. A ZJSON stream is defined as a sequence of JSON objects where each object -represents a Zed value and has the form: +represents a value and has the form: ``` { "type": , @@ -93,7 +93,7 @@ The type and value fields are encoded as defined below. ### 2.1 Type Encoding -The type encoding for a primitive type is simply its [Zed type name](zed.md#1-primitive-types) +The type encoding for a primitive type is simply its [type name](zed.md#1-primitive-types) e.g., "int32" or "string". Complex types are encoded with small-integer identifiers. @@ -101,7 +101,7 @@ The first instance of a unique type defines the binding between the integer identifier and its definition, where the definition may recursively refer to earlier complex types by their identifiers. -For example, the Zed type `{s:string,x:int32}` has this ZJSON format: +For example, the type `{s:string,x:int32}` has this ZJSON format: ``` { "id": 123, @@ -229,7 +229,7 @@ where `` is a recursively encoded type. #### 2.1.8 Named Type -A named type is encoded as a binding between a name and a Zed type +A named type is encoded as a binding between a name and a type and represents a new type so named. A type definition type has the form ``` { @@ -244,7 +244,7 @@ and `` is a recursively encoded type. ### 2.2 Value Encoding -The primitive values comprising an arbitrarily complex Zed data value are encoded +The primitive values comprising an arbitrarily complex data value are encoded as a JSON array of strings mixed with nested JSON arrays whose structure conforms to the nested structure of the value's schema as follows: * each record, array, and set is encoded as a JSON array of its composite values, @@ -256,8 +256,8 @@ as described recursively herein, `[ , ]` where `key` and `value` are recursively encoded, * a type value is encoded [as above](#21-type-encoding), * each primitive that is not a type value -is encoded as a string conforming to its ZSON representation, as described in the -[corresponding section of the ZSON specification](zson.md#23-primitive-values). +is encoded as a string conforming to its Super JSON representation, as described in the +[corresponding section of the Super JSON specification](jsup.md#23-primitive-values). For example, a record with three fields --- a string, an array of integers, and an array of union of string, and float64 --- might have a value that looks like this: @@ -275,9 +275,9 @@ writes its ZJSON output as lines of NDJSON. ## 4. Example Here is an example that illustrates values of a repeated type, -nesting, records, array, and union. Consider the file `input.zson`: +nesting, records, array, and union. Consider the file `input.jsup`: -```mdtest-input input.zson +```mdtest-input input.jsup {s:"hello",r:{a:1,b:2}} {s:"world",r:{a:3,b:4}} {s:"hello",r:{a:[1,2,3]}} @@ -288,7 +288,7 @@ nesting, records, array, and union. Consider the file `input.zson`: This data is represented in ZJSON as follows: ```mdtest-command -super -f zjson input.zson | jq . +super -f zjson input.jsup | jq . ``` ```mdtest-output diff --git a/docs/formats/zng.md b/docs/formats/zng.md index 1c2a3b464d..aa19f260fb 100644 --- a/docs/formats/zng.md +++ b/docs/formats/zng.md @@ -8,12 +8,12 @@ sidebar_label: ZNG ## 1. Introduction ZNG (pronounced "zing") is an efficient, sequence-oriented serialization format for any data -conforming to the [Zed data model](zed.md). +conforming to the [super data model](zed.md). ZNG is "row oriented" and analogous to [Apache Avro](https://avro.apache.org) but does not require schema definitions as it instead utilizes the fine-grained type system -of the Zed data model. +of the super data model. This binary format is based on machine-readable data types with an encoding methodology inspired by Avro, [Parquet](https://en.wikipedia.org/wiki/Apache_Parquet), and @@ -23,7 +23,7 @@ To this end, ZNG embeds all type information in the stream itself while having a binary serialization format that allows "lazy parsing" of fields such that only the fields of interest in a stream need to be deserialized and interpreted. -Unlike Avro, ZNG embeds its "schemas" in the data stream as Zed types and thereby admits +Unlike Avro, ZNG embeds its "schemas" in the data stream as types and thereby admits an efficient multiplexing of heterogeneous data types by prepending to each data value a simple integer identifier to reference its type. @@ -152,7 +152,7 @@ then interpreted according to the `T` bits of the frame code as a ### 2.1 Types Frame -A _types frame_ encodes a sequence of type definitions for complex Zed types +A _types frame_ encodes a sequence of type definitions for complex types and establishes a "type ID" for each such definition. Type IDs for the "primitive types" are predefined with the IDs listed in the [Primitive Types](#3-primitive-types) table. @@ -210,7 +210,7 @@ is further encoded as a "counted string", which is the `uvarint` encoding of the length of the string followed by that many bytes of UTF-8 encoded string data. -N.B.: As defined by [ZSON](zson.md), a field name can be any valid UTF-8 string much like JSON +N.B.: As defined by [Super JSON](jsup.md), a field name can be any valid UTF-8 string much like JSON objects can be indexed with arbitrary string keys (via index operator) even if the field names available to the dot operator are restricted by language syntax for identifiers. @@ -313,7 +313,7 @@ type that differs from the previous definition. ### 2.2 Values Frame -A _values frame_ is a sequence of Zed values each encoded as the value's type ID, +A _values frame_ is a sequence of values each encoded as the value's type ID, encoded as a `uvarint`, followed by its tag-encoded serialization as described below. Since a single type ID encodes the entire value's structure, no additional @@ -420,11 +420,11 @@ this specification to decode all of the data as described herein even if such frames provide additional semantics on top of the base ZNG format. The body of a control frame is a control message and may be JSON, -ZSON, ZNG, binary, or UTF-8 text. The serialization of the control -frame body is independent of the ZNG stream containing the control +Super JSON, Super Binary, arbitrary binary, or UTF-8 text. The serialization of the control +frame body is independent of the Super JSON stream containing the control frame. -Any control message not known by a ZNG data receiver shall be ignored. +Any control message not known by a Super Binary data receiver shall be ignored. The delivery order of control messages with respect to the delivery order of values of the ZNG stream should be preserved by an API implementing @@ -442,7 +442,7 @@ A control frame has the following form: ``` where * `` is a single byte indicating whether the body is encoded -as ZNG (0), JSON (1), ZSON (2), an arbitrary UTF-8 string (3), or arbitrary binary data (4), +as ZNG (0), JSON (1), Super JSON (2), an arbitrary UTF-8 string (3), or arbitrary binary data (4), * `` is a `uvarint` encoding the length in bytes of the body (exclusive of the length 1 encoding byte), and * `` is a control message whose semantics are outside the scope of @@ -531,7 +531,7 @@ are serialized in little-endian format. ## 4. Type Values -As the ZSON data model supports first-class types and because the ZNG design goals +As the super data model supports first-class types and because the ZNG design goals require that value serializations cannot change across type contexts, type values must be encoded in a fashion that is independent of the type context. Thus, a serialized type value encodes the entire type in a canonical form diff --git a/docs/integrations/fluentd.md b/docs/integrations/fluentd.md index 66adaf8504..7637d15b02 100644 --- a/docs/integrations/fluentd.md +++ b/docs/integrations/fluentd.md @@ -393,7 +393,7 @@ options. Varying these may impact how quickly events appear in the pool and the size of the commit objects to which they're initially stored. 2. **ZNG format** - In the [shaping example](#shaping-example) shown above, we -used Zed's [ZSON](../formats/zson.md) format for the shaped data output from +used the [Super JSON format](../formats/jsup.md) format for the shaped data output from [`zq`](../commands/zq.md). This text format is typically used in contexts where human readability is required. Due to its compact nature, [ZNG](../formats/zng.md) format would have been preferred, but in our research diff --git a/docs/integrations/zeek/data-type-compatibility.md b/docs/integrations/zeek/data-type-compatibility.md index fae6486539..616de24a67 100644 --- a/docs/integrations/zeek/data-type-compatibility.md +++ b/docs/integrations/zeek/data-type-compatibility.md @@ -5,9 +5,9 @@ sidebar_label: Zed/Zeek Data Type Compatibility # Zed/Zeek Data Type Compatibility -As the [Zed data model](../../formats/zed.md) was in many ways inspired by the +As the [super data model](../../formats/zed.md) was in many ways inspired by the [Zeek TSV log format](https://docs.zeek.org/en/master/log-formats.html#zeek-tsv-format-logs), -the rich Zed storage formats ([ZSON](../../formats/zson.md), +SuperDB's rich storage formats ([Super JSON](../../formats/jsup.md), [ZNG](../../formats/zng.md), etc.) maintain comprehensive interoperability with Zeek. When Zeek is configured to output its logs in JSON format, much of the rich type information is lost in translation, but @@ -23,7 +23,7 @@ representation of any Zeek data that is read or imported. Therefore, knowing the equivalent types will prove useful when performing operations in the [Zed language](../../language/README.md) such as [type casting](../../language/shaping.md#cast) or looking at the data -when output as ZSON. +when output as Super JSON. ## Equivalent Types @@ -64,7 +64,7 @@ there is no authoritative specification of the Zeek TSV log format. ## Example The following example shows a TSV log that includes each Zeek data type, how -it's output as ZSON by [`zq`](../../commands/zq.md), and then how it's written back out again as a Zeek +it's output as Super JSON by [`zq`](../../commands/zq.md), and then how it's written back out again as a Zeek log. You may find it helpful to refer to this example when reading the [type-specific details](#type-specific-details). @@ -86,10 +86,10 @@ cat zeek_types.log T 123 456 123.4560 1592502151.123456 123.456 smilešŸ˜smile \x09\x07\x04 80 127.0.0.1 10.0.0.0/8 tcp things,in,a,set order,is,important Jeanne 122 ``` -#### Reading the TSV log, outputting as ZSON, and saving a copy: +#### Reading the TSV log, outputting as Super JSON, and saving a copy: ```mdtest-command -super -Z zeek_types.log | tee zeek_types.zson +super -Z zeek_types.log | tee zeek_types.jsup ``` #### Output: @@ -126,10 +126,10 @@ super -Z zeek_types.log | tee zeek_types.zson } ``` -#### Reading the saved ZSON output and outputting as Zeek TSV: +#### Reading the saved Super JSON output and outputting as Zeek TSV: ```mdtest-command -super -f zeek zeek_types.zson +super -f zeek zeek_types.jsup ``` #### Output: @@ -145,14 +145,14 @@ T 123 456 123.456 1592502151.123456 123.456000 smilešŸ˜smile \x09\x07\x04 80 12 ## Type-Specific Details -As `zq` acts as a reference implementation for Zed storage formats such as -ZSON and ZNG, it's helpful to understand how it reads the following Zeek data -types into readable text equivalents in the ZSON format, then writes them back +As `zq` acts as a reference implementation for SuperDB storage formats such as +Super JSON and ZNG, it's helpful to understand how it reads the following Zeek data +types into readable text equivalents in the Super JSON format, then writes them back out again in the Zeek TSV log format. Other implementations of the Zed storage formats (should they exist) may handle these differently. Multiple Zeek types discussed below are represented via a -[type definition](../../formats/zson.md#22-type-decorators) to one of Zed's +[type definition](../../formats/jsup.md#22-type-decorators) to one of Zed's [primitive types](../../formats/zed.md#1-primitive-types). The Zed type definitions maintain the history of the field's original Zeek type name such that `zq` may restore it if the field is later output in @@ -165,9 +165,9 @@ specific Zeek type, though no such operations are currently implemented in As they do not affect accuracy, "trailing zero" decimal digits on Zeek `double` values will _not_ be preserved when they are formatted into a string, such as -via the ZSON/Zeek/table output options in `zq` (e.g., `123.4560` becomes +via the `-f jsup|zeek|table` output options in `zq` (e.g., `123.4560` becomes `123.456`). - +s ### `enum` As they're encountered in common programming languages, enum variables @@ -255,7 +255,7 @@ For instance, revisiting the data from our example, we can output all fields wit #### Command: ```mdtest-command -super -f zeek -c 'cut my_record' zeek_types.zson +super -f zeek -c 'cut my_record' zeek_types.jsup ``` #### Output: diff --git a/docs/integrations/zeek/reading-zeek-log-formats.md b/docs/integrations/zeek/reading-zeek-log-formats.md index bb9408194f..06677cc941 100644 --- a/docs/integrations/zeek/reading-zeek-log-formats.md +++ b/docs/integrations/zeek/reading-zeek-log-formats.md @@ -17,7 +17,7 @@ is Zeek's default output format for logs. This format can be read automatically with the Zed tools such as [`zq`](../../commands/zq.md). The following example shows a TSV [`conn.log`](https://docs.zeek.org/en/master/logs/conn.html) being read via `zq` and -output as [ZSON](../../formats/zson.md). +output as [Super JSON](../../formats/jsup.md). #### conn.log diff --git a/docs/integrations/zeek/shaping-zeek-json.md b/docs/integrations/zeek/shaping-zeek-json.md index 8d9bbd8857..f5065fb3e9 100644 --- a/docs/integrations/zeek/shaping-zeek-json.md +++ b/docs/integrations/zeek/shaping-zeek-json.md @@ -185,7 +185,7 @@ The bulk of this Zed shaper consists of detailed per-field data type definitions for each record in the default set of JSON logs output by Zeek. These type definitions reference the types we defined above, such as `port` and `conn_id`. The syntax for defining primitive and complex types follows the -relevant sections of the [ZSON Format](../../formats/zson.md#2-the-zson-format) +relevant sections of the [Super JSON Format](../../formats/jsup.md#2-the-super-json-format) specification. ``` @@ -331,7 +331,7 @@ operations on the richly-typed records, the Zed query on the command line should begin with a `|`, as this appends it to the pipeline at the bottom of the shaper from the included file. -For example, to see a ZSON representation of just the errors that may have +For example, to see a Super JSON representation of just the errors that may have come from attempting to shape all the logs in the current directory: ``` diff --git a/docs/language/conventions.md b/docs/language/conventions.md index 93b8b7d83b..39d5743eca 100644 --- a/docs/language/conventions.md +++ b/docs/language/conventions.md @@ -13,7 +13,7 @@ in the Zed documentation include several type classes as follows: * _float_ - any floating point Zed type * _int_ - any signed or unsigned Zed integer type * _number_ - either float or int -* _record_ - any [record](../formats/zson.md#251-record-type) type +* _record_ - any [record](../formats/jsup.md#251-record-type) type Note that there is no "any" type in Zed as all super-structured data is comprehensively typed; "any" here simply refers to a value that is allowed diff --git a/docs/language/data-types.md b/docs/language/data-types.md index f5ec978df7..5eb4a6bbca 100644 --- a/docs/language/data-types.md +++ b/docs/language/data-types.md @@ -5,37 +5,37 @@ sidebar_label: Data Types # Data Types -The Zed language includes most data types of a typical programming language -as defined in the [Zed data model](../formats/zed.md). +The SuperPipe language includes most data types of a typical programming language +as defined in the [super data model](../formats/zed.md). The syntax of individual literal values generally follows -the [ZSON syntax](../formats/zson.md) with the exception that -[type decorators](../formats/zson.md#22-type-decorators) +the [Super JSON syntax](../formats/jsup.md) with the exception that +[type decorators](../formats/jsup.md#22-type-decorators) are not included in the language. Instead, a [type cast](expressions.md#casts) may be used in any expression for explicit type conversion. In particular, the syntax of primitive types follows the -[primitive-value definitions](../formats/zson.md#23-primitive-values) in ZSON -as well as the various [complex value definitions](../formats/zson.md#24-complex-values) +[primitive-value definitions](../formats/jsup.md#23-primitive-values) in Super JSON +as well as the various [complex value definitions](../formats/jsup.md#24-complex-values) like records, arrays, sets, and so forth. However, complex values are not limited to -constant values like ZSON and can be composed from [literal expressions](expressions.md#literals). +constant values like Super JSON and can be composed from [literal expressions](expressions.md#literals). ## First-class Types -Like the Zed data model, the Zed language has first-class types: -any Zed type may be used as a value. +As in the super data model, the SuperPipe language has first-class types: +any type may be used as a value. The primitive types are listed in the [data model specification](../formats/zed.md#1-primitive-types) -and have the same syntax in the Zed language. Complex types also follow -the ZSON syntax. Note that the type of a type value is simply `type`. +and have the same syntax in SuperPipe. Complex types also follow +the Super JSON syntax. Note that the type of a type value is simply `type`. -As in ZSON, _when types are used as values_, e.g., in a Zed expression, +As in Super JSON, _when types are used as values_, e.g., in an expression, they must be referenced within angle brackets. That is, the integer type `int64` is expressed as a type value using the syntax ``. -Complex types in the Zed language follow the ZSON syntax as well. Here are +Complex types in the SuperPipe language follow the Super JSON syntax as well. Here are a few examples: * a simple record type - `{x:int64,y:int64}` * an array of integers - `[int64]` @@ -69,7 +69,7 @@ produces {typeof:,count:1(uint64)} ``` When running such a query over complex, semi-structured data, the results can -be quite illuminating and can inform the design of "data shaping" Zed queries +be quite illuminating and can inform the design of "data shaping" queries to transform raw, messy data into clean data for downstream tooling. Note the somewhat subtle difference between a record value with a field `t` of @@ -105,11 +105,11 @@ type socket = {addr:ip,port:port=uint16} defines a named type `socket` that is a record with field `addr` of type `ip` and field `port` of type "port", where type "port" is a named type for type `uint16` . -Named types may also be defined by the input data itself, as Zed data is +Named types may also be defined by the input data itself, as super data is comprehensively self describing. When named types are defined in the input data, there is no need to declare their type in a query. -In this case, a Zed expression may refer to the type by the name that simply +In this case, a SuperPipe expression may refer to the type by the name that simply appears to the runtime as a side effect of operating upon the data. If the type name referred to in this way does not exist, then the type value reference results in `error("missing")`. For example, @@ -156,16 +156,16 @@ results in Here, the two versions of type "foo" are retained in the group-by results. In general, it is bad practice to define multiple versions of a single named type, -though the Zed system and Zed data model accommodate such dynamic bindings. +though the SuperDB system and super data model accommodate such dynamic bindings. Managing and enforcing the relationship between type names and their type definitions -on a global basis (e.g., across many different data pools in a Zed lake) is outside +on a global basis (e.g., across many different data pools in a data lake) is outside the scope of the Zed data model and language. That said, Zed provides flexible building blocks so systems can define their own schema versioning and schema management policies on top of these Zed primitives. -Zed's [super-structured data model](../formats/README.md#2-a-super-structured-pattern) +The [super-structured data model](../formats/README.md#2-a-super-structured-pattern) is a superset of relational tables and -the Zed language's type system can easily make this connection. +SuperPipe's type system can easily make this connection. As an example, consider this type definition for "employee": ``` type employee = {id:int64,first:string,last:string,job:string,salary:float64} @@ -177,7 +177,7 @@ FROM employee ORDER BY salary LIMIT 5 ``` -In Zed, you would say +In SuperPipe, you would say ``` from anywhere | typeof(this)== @@ -194,7 +194,7 @@ from anywhere | sort salary | head 5 ``` -The power of Zed is that you can interpret data on the fly as belonging to +The power of SuperPipe is that you can interpret data on the fly as belonging to a certain schema, in this case "employee", and those records can be intermixed with other relevant data. There is no need to create a table called "employee" and put the data into the table before that data can be queried as an "employee". @@ -203,8 +203,8 @@ to work. ## First-class Errors -As with types, errors in Zed are first-class: any value can be transformed -into an error by wrapping it in the Zed [`error` type](../formats/zed.md#27-error). +As with types, errors in SuperPipe are first-class: any value can be transformed +into an error by wrapping it in an [`error` type](../formats/zed.md#27-error). In general, expressions and functions that result in errors simply return a value of type `error` as a result. This encourages a powerful flow-style @@ -245,7 +245,7 @@ produces ``` First-class errors are particularly useful for creating structured errors. -When a Zed query encounters a problematic condition, +When a SuperPipe query encounters a problematic condition, instead of silently dropping the problematic error and logging an error obscurely into some hard-to-find system log as so many ETL pipelines do, the Zed logic can @@ -255,9 +255,9 @@ For example, suppose a bad value shows up: ``` {kind:"bad", stuff:{foo:1,bar:2}} ``` -A Zed [shaper](shaping.md) could catch the bad value (e.g., as a default +A [shaper](shaping.md) could catch the bad value (e.g., as a default case in a [`switch`](operators/switch.md) topology) and propagate it as -an error using the Zed expression: +an error using the expression: ``` yield error({message:"unrecognized input",input:this}) ``` @@ -286,7 +286,7 @@ useful approach that Zed enables. ### Missing and Quiet -Zed's heterogeneous data model allows for queries +SuperDB's heterogeneous data model allows for queries that operate over different types of data whose structure and type may not be known ahead of time, e.g., different types of records with different field names and varying structure. @@ -314,7 +314,7 @@ To solve this problem, the `MISSING` value was proposed to represent the value t results from accessing a field that is not present. Thus, `x==NULL` and `x==MISSING` could disambiguate the two cases above. -Zed, instead, recognizes that the SQL value `MISSING` is a paradox: +SuperPipe, instead, recognizes that the SQL value `MISSING` is a paradox: I'm here but I'm not. In reality, a `MISSING` value is not a value. It's an error condition @@ -323,7 +323,7 @@ that resulted from trying to reference something that didn't exist. So why should we pretend that this is a bona fide value? SQL adopted this approach because it lacks first-class errors. -But Zed has first-class errors so +But SuperPipe has first-class errors so a reference to something that does not exist is an error of type `error(string)` whose value is `error("missing")`. For example, ```mdtest-command diff --git a/docs/language/functions/typename.md b/docs/language/functions/typename.md index 7ac73f3a71..8e68df0fab 100644 --- a/docs/language/functions/typename.md +++ b/docs/language/functions/typename.md @@ -10,8 +10,8 @@ typename(name: string) -> type ### Description -The _typename_ function returns the [type](../../formats/zson.md#25-types) of the -[named type](../../formats/zson.md#258-named-type) given by `name` if it exists. Otherwise, `error("missing")` is returned. +The _typename_ function returns the [type](../../formats/jsup.md#25-types) of the +[named type](../../formats/jsup.md#258-named-type) given by `name` if it exists. Otherwise, `error("missing")` is returned. ### Examples diff --git a/docs/language/functions/typeof.md b/docs/language/functions/typeof.md index 5addd1f227..c8499c6456 100644 --- a/docs/language/functions/typeof.md +++ b/docs/language/functions/typeof.md @@ -10,7 +10,7 @@ typeof(val: any) -> type ### Description -The _typeof_ function returns the [type](../../formats/zson.md#25-types) of +The _typeof_ function returns the [type](../../formats/jsup.md#25-types) of its argument `val`. Types are first class so the returned type is also a value. The type of a type is type `type`. diff --git a/docs/language/pipeline-model.md b/docs/language/pipeline-model.md index f07cf3ebb5..96dcdcc362 100644 --- a/docs/language/pipeline-model.md +++ b/docs/language/pipeline-model.md @@ -5,7 +5,7 @@ sidebar_label: Pipeline Model # The Pipeline Model -In Zed, each operator takes its input from the output of its upstream operator beginning +In SuperPipe, each operator takes its input from the output of its upstream operator beginning either with a data source or with an implied source. All available operators are listed on the [reference page](operators/README.md). @@ -16,30 +16,30 @@ In addition to the data sources specified as files on the `zq` command line, a source may also be specified with the [`from` operator](operators/from.md). When running on the command-line, `from` may refer to a file, an HTTP -endpoint, or an [S3](../integrations/amazon-s3.md) URI. When running in a [Zed lake](../commands/zed.md), `from` typically +endpoint, or an [S3](../integrations/amazon-s3.md) URI. When running in a [SuperDB data lake](../commands/zed.md), `from` typically refers to a collection of data called a "data pool" and is referenced using the pool's name much as SQL references database tables by their name. For more detail, see the reference page of the [`from` operator](operators/from.md), but as an example, you might use the `get` form of `from` to fetch data from an -HTTP endpoint and process it with Zed, in this case, to extract the description +HTTP endpoint and process it with `super`, in this case, to extract the description and license of a GitHub repository: ``` super -f text -c 'get https://api.github.com/repos/brimdata/super | yield description,license.name' ``` -When a Zed query is run on the command-line with `zq`, the `from` source is +When a query is run on the command-line with `super`, the `from` source is typically omitted and implied instead by the command-line file arguments. The input may be stdin via `-` as in ``` echo '"hello, world"' | super - ``` The examples throughout the language documentation use this "echo pattern" -to standard input of `zq -` to illustrate language semantics. -Note that in these examples, the input values are expressed as Zed values serialized -in the [ZSON text format](../formats/zson.md) -and the `zq` query text expressed as the first argument of the `zq` command -is expressed in the syntax of the Zed language described here. +to standard input of `super -` to illustrate language semantics. +Note that in these examples, the input values are expressed as a sequence of values serialized +in the [Super JSON format](../formats/jsup.md) +and the `super` query text expressed as the `-c` argument of the `super` command +is expressed in the syntax of the SuperPipe language described here. ## Pipeline Operators @@ -81,18 +81,18 @@ A pipeline can also be split to multiple branches using the corresponding branch (or dropped) based on the switch clauses. Switch operators typically -involve multiline Zed programs, which are easiest to edit in a file. For example, -suppose this text is in a file called `switch.zed`: -```mdtest-input switch.zed +involve multiline SuperPipe programs, which are easiest to edit in a file. For example, +suppose this text is in a file called `switch.spq`: +```mdtest-input switch.spq switch this ( case 1 => yield {val:this,message:"one"} case 2 => yield {val:this,message:"two"} default => yield {val:this,message:"many"} ) | merge val ``` -Then, running `zq` with `-I switch.zed` like so: +Then, running `super` with `-I switch.spq` like so: ```mdtest-command -echo '1 2 3 4' | super -z -I switch.zed - +echo '1 2 3 4' | super -z -I switch.spq - ``` produces ```mdtest-output @@ -107,7 +107,7 @@ a [`merge` operator](operators/merge.md) may be applied at the output of the switch specifying a sort key upon which to order the upstream data. Often such order does not matter (e.g., when the output of the switch hits an [aggregator](aggregates/README.md)), in which case it is typically more performant -to omit the merge (though the Zed system will often delete such unnecessary +to omit the merge (though the SuperDB runtime will often delete such unnecessary operations automatically as part optimizing queries when they are compiled). If no `merge` or `join` is indicated downstream of a `fork` or `switch`, @@ -116,14 +116,14 @@ forwarded from the switch to the downstream operator in an undefined order. ## The Special Value `this` -In Zed, there are no looping constructs and variables are limited to binding +In SuperPipe, there are no looping constructs and variables are limited to binding values between [lateral scopes](lateral-subqueries.md#lateral-scope). Instead, the input sequence to an operator is produced continuously and any output values are derived from input values. In contrast to SQL, where a query may refer to input tables by name, -there are no explicit tables and a Zed operator instead refers +there are no explicit tables and an operator instead refers to its input values using the special identifier `this`. For example, sorting the following input @@ -152,10 +152,10 @@ produces ## Implied Field References -A common use case for Zed is to process sequences of record-oriented data +A common SuperPipe use case is to process sequences of record-oriented data (e.g., arising from formats like JSON or Avro) in the form of events or structured logs. In this case, the input values to the operators -are Zed [records](../formats/zed.md#21-record) and the fields of a record are referenced with the dot operator. +are [records](../formats/zed.md#21-record) and the fields of a record are referenced with the dot operator. For example, if the input above were a sequence of records instead of strings and perhaps contained a second field, e.g., @@ -172,7 +172,7 @@ as above with `sort this.s`, which would give {s:"foo",x:1} ``` This pattern is so common that field references to `this` may be shortened -by simply referring to the field by name wherever a Zed expression is expected, +by simply referring to the field by name wherever an expression is expected, e.g., ``` sort s @@ -187,7 +187,7 @@ or extracting a subset of fields using the [`cut` operator](operators/cut.md). Also, when aggregating data using group-by keys, the group-by assignments create new named record fields. -In all of these cases, the Zed language uses the token `:=` to denote +In all of these cases, the SuperPipe language uses the token `:=` to denote field assignment. For example, ``` put x:=y+1 @@ -202,18 +202,18 @@ in later expressions. ## Implied Operators -When Zed is run in an application like [Zui](https://zui.brimdata.io), +When SuperPipe is utilized in an application like [SuperDB Desktop](https://zui.brimdata.io), queries are often composed interactively in a "search bar" experience. The language design here attempts to support both this "lean forward" pattern of usage along with a "coding style" of query writing where the queries might be large and complex, e.g., to perform transformations in a data pipeline, where -the Zed queries are stored under source-code control perhaps in GitHub or +the SuperPipe queries are stored under source-code control perhaps in GitHub or in Zui's query library. To facilitate both a programming-like model as well as an ad hoc search -experience, Zed has a canonical, long form that can be abbreviated +experience, SuperPipe has a canonical, long form that can be abbreviated using syntax that supports an agile, interactive query workflow. -To this end, Zed allows certain operator names to be optionally omitted when +To this end, SuperPipe allows certain operator names to be optionally omitted when they can be inferred from context. For example, the expression following the [`summarize` operator](operators/summarize.md) ``` @@ -233,7 +233,7 @@ is abbreviated foo bar or x > 100 ``` Furthermore, if an operator-free expression is not valid syntax for -a search expression but is a valid [Zed expression](expressions.md), +a search expression but is a valid [expression](expressions.md), then the abbreviation is treated as having an implied `yield` operator, e.g., ``` {s:lower(s)} @@ -262,8 +262,8 @@ can be expressed simply as ``` y:=2*x+1 ``` -When composing long-form queries that are shared via Zui or managed in GitHub, -it is best practice to include all operator names in the Zed source text. +When composing long-form queries that are shared via SuperDB Desktop or managed in GitHub, +it is best practice to include all operator names in the source text. In summary, if no operator name is given, the implied operator is determined from the operator-less source text, in the order given, as follows: diff --git a/docs/tutorials/schools.md b/docs/tutorials/schools.md index e161c9a470..bd2647b46e 100644 --- a/docs/tutorials/schools.md +++ b/docs/tutorials/schools.md @@ -3,35 +3,35 @@ sidebar_position: 4 sidebar_label: Schools Data --- -# Zed and Schools Data +# SiperPipe and Schools Data -> This document provides a beginner's overview of the Zed language -using the [zq command](../commands/zq.md) and +> This document provides a beginner's overview of the SuperPipe language +using the [super command](../commands/zq.md) and [real-world data](https://github.com/brimdata/super/blob/main/testdata/edu/README.md) relating to California schools and test scores. ## 1. Getting Started If you want to follow along by running the examples, simply -[install zq](../install.md) and copy the +[install super](../install.md) and copy the data files used here into your working directory: ``` -curl https://raw.githubusercontent.com/brimdata/super/main/testdata/edu/schools.zson > schools.zson -curl https://raw.githubusercontent.com/brimdata/super/main/testdata/edu/testscores.zson > testscores.zson -curl https://raw.githubusercontent.com/brimdata/super/main/testdata/edu/webaddrs.zson > webaddrs.zson +curl https://raw.githubusercontent.com/brimdata/super/main/testdata/edu/schools.jsup > schools.jsup +curl https://raw.githubusercontent.com/brimdata/super/main/testdata/edu/testscores.jsup > testscores.jsup +curl https://raw.githubusercontent.com/brimdata/super/main/testdata/edu/webaddrs.jsup > webaddrs.jsup ``` -These files are all encoded in the human-readable [ZSON format](../formats/zson.md) -so you can easily have a look at them. ZSON is not optimized for speed but these +These files are all encoded in the human-readable [Super JSON format](../formats/jsup.md) +so you can easily have a look at them. Super JSON is not optimized for speed but these files are small enough that the example queries here will all run fast enough. ## 2. Exploring the Data It's always a good idea to get a feel for any new data, which is easy to do -with Zed. Zed's [sample operator](../language/operators/sample.md) is just the ticket --- +with SuperPipe. The [sample operator](../language/operators/sample.md) is just the ticket --- `sample` will select one representative value from each "shape" of data present in the input, e.g., ```mdtest-command dir=testdata/edu -super -Z -c 'sample | sort this' schools.zson testscores.zson webaddrs.zson +super -Z -c 'sample | sort this' schools.jsup testscores.jsup webaddrs.jsup ``` displays ```mdtest-output @@ -63,10 +63,10 @@ displays Website: null (string) } ``` ->Note that the `-Z` option tells `zq` to "pretty print" the output in -the [ZSON](../formats/zson.md) format. +>Note that the `-Z` option tells `super` to "pretty print" the output in +the [Super JSON](../formats/jsup.md) format. Furthermore, you will notice these examples often include a `-z` to indicate -line-oriented ZSON, which is the default when `zq` is writing to standard output. +line-oriented Super JSON, which is the default when `super` is writing to standard output. You can omit `-z` when running these commands on the terminal but we include them here for clarity and because all of the examples are tied to automated testing, which does not utilize a terminal for standard output. @@ -79,7 +79,7 @@ super -Z -c ' | by typeof(value) | yield typeof | sort -' schools.zson testscores.zson webaddrs.zson +' schools.jsup testscores.jsup webaddrs.jsup ``` which emits ```mdtest-output @@ -97,18 +97,18 @@ since we obtained the original data from ## 3. Searching -Searching with Zed is easy but powerful because it blends together the +Searching with SuperPipe is easy but powerful because it blends together the keyword search patterns of Web or email search with the more precise predicate matching patterns of query languages like SQL. -With this in mind, you can simply start typing keyword search phrases in Zed -and they will usually do the right thing. +With this in mind, you can simply start typing keyword search phrases +they will usually do the right thing. ### 3.1 Keyword Search With keyword search, you can just type a keyword that you want to look for, e.g., ```mdtest-command dir=testdata/edu -super -z -c Ygnacio schools.zson +super -z -c Ygnacio schools.jsup ``` which gives the one matching record: ```mdtest-output @@ -120,18 +120,18 @@ As with keyword search, you can simply concantenate keywords to require both of them to match (i.e., a "logical AND" of the two search predicates), e.g. we can whittle down the two records above by adding the keyword _Delano_ ```mdtest-command dir=testdata/edu -super -z -c 'Ygnacio Delano' schools.zson +super -z -c 'Ygnacio Delano' schools.jsup ``` and we get just the one record that matches: ```mdtest-output {School:"Valencia (Ygnacio) High (Alternative)",District:"Delano Joint Union High",City:"Delano",County:"Kern",Zip:"93215-1526",Latitude:null(float64),Longitude:null(float64),Magnet:null(bool),OpenDate:1980-07-01T00:00:00Z,ClosedDate:2009-08-01T00:00:00Z,Phone:null(string),StatusType:"Closed",Website:null(string)} ``` -Under the covers, a keyword search translates to Zed's [grep function](../language/functions/grep.md), +Under the covers, a keyword search translates to the [grep function](../language/functions/grep.md), which lets you search specific fields instead of the entire input value, e.g., we can search for the string "bar" in the `City` field and list all the unique cities that match with a [group-by](#52-grouping): ```mdtest-command dir=testdata/edu -super -f text -c 'grep("bar", City) | by City | yield City | sort' schools.zson +super -f text -c 'grep("bar", City) | by City | yield City | sort' schools.jsup ``` produces ```mdtest-output @@ -145,7 +145,7 @@ Somes Bar ``` In this example, we use the [yield operator](#8-value-construction) here to pull the `City` field out of the record result and we used `-f text` to output the -results in "text" format instead of ZSON so the strings are printed +results in "text" format instead of Super JSON so the strings are printed without quotes. The text format is often useful for piping the output to other Unix tools that might not expect quotes. @@ -163,7 +163,7 @@ used. For example, the following search finds records that contain school names that have some additional text between `ACE` and `Academy`: ```mdtest-command dir=testdata/edu -super -z -c 'ACE*Academy' schools.zson +super -z -c 'ACE*Academy' schools.jsup ``` produces ```mdtest-output head @@ -185,7 +185,7 @@ regexp. For example, since there are many high schools in our sample data, to find only records containing strings that _begin_ with the word `High`: ```mdtest-command dir=testdata/edu -super -z -c '/^High /' schools.zson +super -z -c '/^High /' schools.jsup ``` produces ```mdtest-output head @@ -196,12 +196,12 @@ produces ... ``` Further details for regular expressions are available in -the [Zed language documention](../language/search-expressions.md#regular-expressions). +the [SuperPipe language documention](../language/search-expressions.md#regular-expressions). ### 3.4 Literal Search Sometimes you want to search for values that aren't strings, e.g., numbers -or IP addresses. Zed can search for any +or IP addresses. SuperPipe can search for any [primitive-type](../formats/zed.md#1-primitive-types) value just typing that value like a keyword. In this case, the search looks for both fields of the value's type for an exact match as well as a substring @@ -212,7 +212,7 @@ the number `596` matches records that contain numeric fields of this precise val (such as from the test scores) and also records that contain string fields (such as the ZIP code and phone number fields in the school data), e.g., ```mdtest-command dir=testdata/edu -super -z -c '596' testscores.zson schools.zson +super -z -c '596' testscores.jsup schools.jsup ``` finds these records ```mdtest-output head @@ -231,7 +231,7 @@ Let's say we've noticed that a couple of the school names in our sample data include the string `Defunct=`. An attempt to enter this as a [keyword](#31-keyword-search) search causes a parse error, e.g., ```mdtest-command dir=testdata/edu fails -super -z -c 'Defunct=' *.zson +super -z -c 'Defunct=' *.jsup ``` produces ```mdtest-output @@ -242,7 +242,7 @@ Defunct= However, wrapping in quotes to performa a string-literal search gives the desired result: ```mdtest-command dir=testdata/edu -super -z -c '"Defunct="' schools.zson +super -z -c '"Defunct="' schools.jsup ``` produces ```mdtest-output @@ -255,7 +255,7 @@ say we're looking for information on the Union Hill Elementary district. Entered without quotes, we end up matching far more records than we intended since each space character between words is treated as a [Boolean `and`](#541-and), e.g., ```mdtest-command dir=testdata/edu -super -z -c 'Union Hill Elementary' schools.zson +super -z -c 'Union Hill Elementary' schools.jsup ``` produces ```mdtest-output head @@ -268,7 +268,7 @@ produces However, wrapping the entire search term in quotes allows us to search for the complete string, including the spaces, e.g., ```mdtest-command dir=testdata/edu -super -z -c '"Union Hill Elementary"' schools.zson +super -z -c '"Union Hill Elementary"' schools.jsup ``` produces ```mdtest-output @@ -283,7 +283,7 @@ produces ### 3.5 Predicate Search Search terms can also be include Boolean predicates adhering -to Zed's [expression syntax](../language/expressions.md). +to SuperPipe's [expression syntax](../language/expressions.md). In particular, a search result can be narrowed down to include only records that contain a @@ -291,7 +291,7 @@ certain value in a particular named field. For example, the following search will only match records containing the field called `District` where it is set to the precise string value `Winton`: ```mdtest-command dir=testdata/edu -super -z -c 'District=="Winton"' schools.zson +super -z -c 'District=="Winton"' schools.jsup ``` produces ```mdtest-output @@ -304,13 +304,13 @@ produces Because the right-hand-side value to which we were comparing was a string, it was necessary to wrap it in quotes. If this string were written as a keyword, it would have been interpreted as a field name as -Zed [field references](../language/pipeline-model.md#implied-field-references) +[field references](../language/pipeline-model.md#implied-field-references) look like keywords in the context of an expression. For example, to see the records in which the school and district name are the same: ```mdtest-command dir=testdata/edu -super -z -c 'District==School' schools.zson +super -z -c 'District==School' schools.jsup ``` produces ```mdtest-output head @@ -329,7 +329,7 @@ For instance, the "Zip" field in the schools data is a `string` rather than a number because of the extended ZIP+4 format that includes a hyphen and four additional digits and hence could not be represented in a numeric type, e.g., ```mdtest-command dir=testdata/edu -super -z -c 'cut Zip' schools.zson +super -z -c 'cut Zip' schools.jsup ``` produces ```mdtest-output head @@ -338,11 +338,11 @@ produces {Zip:"92395-3360"} ... ``` -Because Zed does not coerce strings to numbers in expressions, +Because SuperPipe does not coerce strings to numbers in expressions, the predicate `Zip==95959` would _not_ -match the top record shown, since Zed recognizes the bare value `95959` as a +match the top record shown, since SuperPipe recognizes the bare value `95959` as a number before comparing it to all the fields named `Zip`. -However, `Zip=="95959"` _would_ match, since the quotes cause Zed +However, `Zip=="95959"` _would_ match, since the quotes cause SuperPipe to treat the value as a string. When confronted with messy data like this, you can usually cleaned it up @@ -350,7 +350,7 @@ to achieve the intent of your searches. For example, the dash suffix of the ZIP codes could be dropped, the string converted to an integer, then integer comparisons performed, i.e., ```mdtest-command dir=testdata/edu -super -z -c 'cut Zip | int64(Zip[0:5])==94607' schools.zson +super -z -c 'cut Zip | int64(Zip[0:5])==94607' schools.jsup ``` produces ```mdtest-output head @@ -368,7 +368,7 @@ For example, let's say we know there are several school names that start with as a _substring_ of the district names in our sample data, the following example produces no output, e.g., ```mdtest-command dir=testdata/edu -super -z -c 'District=="Luther"' schools.zson +super -z -c 'District=="Luther"' schools.jsup ``` produces an empty output ```mdtest-output @@ -378,7 +378,7 @@ To perform string searches inside of nested values, we can utilize the [grep function](../language/functions/grep.md) with a [glob](#32-globs), e.g., ```mdtest-command dir=testdata/edu -super -z -c 'grep(Luther*, District)' schools.zson +super -z -c 'grep(Luther*, District)' schools.jsup ``` produces ```mdtest-output head @@ -388,7 +388,7 @@ produces [Regular expressions](#33-regular-expressions) can also be used with `grep`, e.g., ```mdtest-command dir=testdata/edu -super -z -c 'grep(/^Sunset (Ranch|Ridge) Elementary/, School)' schools.zson +super -z -c 'grep(/^Sunset (Ranch|Ridge) Elementary/, School)' schools.jsup ``` produces ```mdtest-output @@ -404,7 +404,7 @@ This is performed with `in`. Since our sample data doesn't contain complex fields, we'll make one by using the [`union`](../language/aggregates/union.md) aggregate function to -create a [`set`](../formats/zson.md#243-set-value)-typed +create a [`set`](../formats/jsup.md#243-set-value)-typed field called `Schools` that contains all unique school names per district. From these we'll find each set that contains a school named `Lincoln Elementary`, e.g., ```mdtest-command dir=testdata/edu @@ -412,7 +412,7 @@ super -Z -c ' Schools:=union(School) by District | "Lincoln Elementary" in Schools | sort this -' schools.zson +' schools.jsup ``` produces ```mdtest-output head @@ -450,7 +450,7 @@ In addition to testing for equality via `==` and testing containment via For example, the following search finds the schools that reported the highest math test scores, ```mdtest-command dir=testdata/edu -super -z -c 'AvgScrMath > 690' testscores.zson +super -z -c 'AvgScrMath > 690' testscores.jsup ``` produces ```mdtest-output @@ -462,7 +462,7 @@ produces The same approach can be used to compare characters in `string`-type values, such as this search that finds school names at the end of the alphabet, e.g., ```mdtest-command dir=testdata/edu -super -z -c 'School > "Z"' schools.zson +super -z -c 'School > "Z"' schools.jsup ``` produces ```mdtest-output head @@ -475,7 +475,7 @@ produces ### 3.6 Boolean Logic Search terms can be combined with Boolean logic as detailed in -the [Zed language documentation](../language/search-expressions.md#boolean-logic). +the [SuperPipe language documentation](../language/search-expressions.md#boolean-logic). In particular, search terms separated by blank space implies Boolean `and` between the concatenated terms. @@ -484,7 +484,7 @@ Let's say we're earching for information about academies that are flagged as being in a `Pending` status. We can simply concatenate the predicate for "Pending" and the keyword search for `academy`, e.g., ```mdtest-command dir=testdata/edu -super -z -c 'StatusType=="Pending" academy' schools.zson +super -z -c 'StatusType=="Pending" academy' schools.jsup ``` produces ```mdtest-output @@ -506,7 +506,7 @@ Let'a revisit two of our previous example searches that each only returned a couple records, searching now with `or` to see them all at once, e.g., ```mdtest-command dir=testdata/edu -super -z -c '"Defunct=" or ACE*Academy' schools.zson +super -z -c '"Defunct=" or ACE*Academy' schools.jsup ``` produces ```mdtest-output @@ -522,7 +522,7 @@ it in your search. For example, to find schools in the `Dixon Unified` district _other than_ elementary schools, we invert the logic of a search term: ```mdtest-command dir=testdata/edu -super -z -c 'not elementary District=="Dixon Unified"' schools.zson +super -z -c 'not elementary District=="Dixon Unified"' schools.jsup ``` produces ```mdtest-output head @@ -550,7 +550,7 @@ left-to-right evaluation. For example, we've noticed there are some test score records that have `null` values for all three test scores: ```mdtest-command dir=testdata/edu -super -z -c 'AvgScrMath==null AvgScrRead==null AvgScrWrite==null' testscores.zson +super -z -c 'AvgScrMath==null AvgScrRead==null AvgScrWrite==null' testscores.jsup ``` produces ```mdtest-output head @@ -562,7 +562,7 @@ We can easily filter these out by negating the search for these records, e.g., ```mdtest-command dir=testdata/edu super -z -c ' not (AvgScrMath==null AvgScrRead==null AvgScrWrite==null) -' testscores.zson +' testscores.jsup ``` produces ```mdtest-output head @@ -577,7 +577,7 @@ super -z -c ' grep(*High*, sname) and (not (AvgScrMath==null AvgScrRead==null AvgScrWrite==null) and dname=="San Francisco Unified") -' testscores.zson +' testscores.jsup ``` produces ```mdtest-output head @@ -593,9 +593,9 @@ logic. ## 4. Record Operators -As with the data sets explored here, a very typical use case for Zed is -to operate over structured logs or events that are all represented as Zed records. -While Zed queries may operate over any sequence of values, the following operators +As with the data sets explored here, a very typical use case for SuperPipe is +to operate over structured logs or events that are all represented as records. +While SuperPipe queries may operate over any sequence of values, the following operators are designed specifically to work on sequences of records: * [cut](../language/operators/cut.md) - extract subsets of record fields into new records * [drop](../language/operators/drop.md) - drop fields from record values @@ -610,7 +610,7 @@ the specified named fields. This example returns only the name and opening date from our school records: ```mdtest-command dir=testdata/edu -super -Z -c 'cut School,OpenDate' schools.zson +super -Z -c 'cut School,OpenDate' schools.jsup ``` produces ```mdtest-output head @@ -631,7 +631,7 @@ school data that includes fields for both `School` and `Website`, values from our web address data that have the `Website` and `addr` fields, and the missing value from the test score data since it has none of these fields: ```mdtest-command dir=testdata/edu -super -z -c 'yosemiteuhsd | cut School,Website,addr' *.zson +super -z -c 'yosemiteuhsd | cut School,Website,addr' *.jsup ``` produces ```mdtest-output @@ -641,7 +641,7 @@ produces Here, we return only the `sname` and `dname` fields of the test scores while also renaming the fields: ```mdtest-command dir=testdata/edu -super -z -c 'cut School:=sname,District:=dname' testscores.zson +super -z -c 'cut School:=sname,District:=dname' testscores.jsup ``` produces ```mdtest-output head @@ -657,7 +657,7 @@ fields dropped from the output. This example return all the fields _other than_ the score values in our test score data: ```mdtest-command dir=testdata/edu -super -z -c 'drop AvgScrMath,AvgScrRead,AvgScrWrite' testscores.zson +super -z -c 'drop AvgScrMath,AvgScrRead,AvgScrWrite' testscores.jsup ``` produces ```mdtest-output head @@ -679,7 +679,7 @@ Let's say you'd started with table-formatted output of all records in our data that reference the town of Geyserville, e.g., ```mdtest-command dir=testdata/edu -super -f table -c 'Geyserville' *.zson +super -f table -c 'Geyserville' *.jsup ``` produces ```mdtest-output @@ -703,7 +703,7 @@ accurately conveys the heterogeneous nature of the data, but changing schemas mid-stream is not allowed in formats such as CSV or other downstream tooling such as SQL. Indeed, `zq` halts its output in this case, e.g., ```mdtest-command dir=testdata/edu fails -super -f csv -c 'Geyserville' *.zson +super -f csv -c 'Geyserville' *.jsup ``` produces ```mdtest-output @@ -722,7 +722,7 @@ is assembled in a first pass through the data stream, which enables the presentation of the results under a single, wider header row with no further interruptions between the subsequent data rows, e.g., ```mdtest-command dir=testdata/edu -super -f csv -c 'Geyserville | fuse' *.zson +super -f csv -c 'Geyserville | fuse' *.jsup ``` produces ```mdtest-output @@ -764,7 +764,7 @@ reading, and writing scores for each school that reported them, we could say: super -Z -c ' AvgScrMath!=null | put AvgAll:=(AvgScrMath+AvgScrRead+AvgScrWrite)/3.0 -' testscores.zson +' testscores.jsup ``` which produces ```mdtest-output head @@ -787,7 +787,7 @@ super -f table -c ' | put combined_scores:=AvgScrMath+AvgScrRead+AvgScrWrite | cut sname,combined_scores,AvgScrMath,AvgScrRead,AvgScrWrite | head 5 -' testscores.zson +' testscores.jsup ``` produces ```mdtest-output @@ -802,7 +802,7 @@ As noted above the `put` keyword is entirely optional. Here we omit it and create a new field to hold the lowercase representation of the school `District` field: ```mdtest-command dir=testdata/edu -super -Z -c 'cut District | lower_district:=lower(District)' schools.zson +super -Z -c 'cut District | lower_district:=lower(District)' schools.jsup ``` produces ```mdtest-output head @@ -824,7 +824,7 @@ The rename steps are applied left-to-right. Here is a simple example that renames some fields in our test score data to match the field names from our school data: ```mdtest-command dir=testdata/edu -super -Z -c 'rename School:=sname,District:=dname,City:=cname' testscores.zson +super -Z -c 'rename School:=sname,District:=dname,City:=cname' testscores.jsup ``` produces ```mdtest-output head @@ -841,8 +841,8 @@ produces As mentioned above, a field can only be renamed within its own record. In other words, a field cannot move between nested levels when being renamed. -For example, consider this sample input data `nested.zson`: -```mdtest-input nested.zson +For example, consider this sample input data `nested.jsup`: +```mdtest-input nested.jsup { outer: { inner: "MyValue" @@ -851,7 +851,7 @@ For example, consider this sample input data `nested.zson`: ``` The field `inner` can be renamed within that nested record, e.g., ```mdtest-command -super -Z -c 'rename outer.renamed:=outer.inner' nested.zson +super -Z -c 'rename outer.renamed:=outer.inner' nested.jsup ``` produces ```mdtest-output @@ -863,7 +863,7 @@ produces ``` However, an attempt to rename it to a top-level field will fail, e.g., ```mdtest-command fails -super -Z -c 'rename toplevel:=outer.inner' nested.zson +super -Z -c 'rename toplevel:=outer.inner' nested.jsup ``` produces this compile-time error message and the query is not run: ```mdtest-output @@ -874,7 +874,7 @@ rename toplevel:=outer.inner This goal could instead be achieved by combining [`put`](#44-put) and [`drop`](#42-drop), e.g., ```mdtest-command -super -Z -c 'put toplevel:=outer.inner | drop outer.inner' nested.zson +super -Z -c 'put toplevel:=outer.inner | drop outer.inner' nested.jsup ``` produces ```mdtest-output @@ -899,7 +899,7 @@ the math test scores: ```mdtest-command dir=testdata/edu super -f table -c ' min(AvgScrMath),max(AvgScrMath),avg(AvgScrMath) -' testscores.zson +' testscores.jsup ``` produces ```mdtest-output @@ -922,7 +922,7 @@ explicit name for the generated field, e.g., ```mdtest-command dir=testdata/edu super -f table -c ' lowest:=min(AvgScrMath),highest:=max(AvgScrMath),typical:=avg(AvgScrMath) -' testscores.zson +' testscores.jsup ``` produces ```mdtest-output @@ -950,7 +950,7 @@ and San Francisco: super -Z -c ' LA_Math:=avg(AvgScrMath) where cname=="Los Angeles", SF_Math:=avg(AvgScrMath) where cname=="San Francisco" -' testscores.zson +' testscores.jsup ``` produces ```mdtest-output @@ -977,7 +977,7 @@ not. The following query shows the cities in which all schools have a website. e super -Z -c ' all_schools_have_website:=and(Website!=null) by City | sort City -' schools.zson +' schools.jsup ``` produces ```mdtest-output head @@ -1003,7 +1003,7 @@ an undefined manner. This query gives the name of one of the schools in our sample data: ```mdtest-command dir=testdata/edu -super -z -c 'any(School)' schools.zson +super -z -c 'any(School)' schools.jsup ``` For small inputs that fit in memory, this will typically be the first such field in the stream, but in general you should not rely upon this. In this @@ -1018,7 +1018,7 @@ The `avg` function computes an arithmetic mean over all of all of its input. This query calculates the average of the math test scores: ```mdtest-command dir=testdata/edu -super -f table -c 'avg:=avg(AvgScrMath)' testscores.zson +super -f table -c 'avg:=avg(AvgScrMath)' testscores.jsup ``` and produces ```mdtest-output @@ -1038,7 +1038,7 @@ super -Z -c ' County=="Fresno" Website!=null | Websites:=collect(Website),Schools:=collect(School) by City | sort City -' schools.zson +' schools.jsup ``` and produces ```mdtest-output head @@ -1075,9 +1075,9 @@ The `count` function produces a count of all of its input values. This query counts the number of records in each of our example data sources: ```mdtest-command dir=testdata/edu -super -z -c 'count()' schools.zson -super -z -c 'count()' testscores.zson -super -z -c 'count()' webaddrs.zson +super -z -c 'count()' schools.jsup +super -z -c 'count()' testscores.jsup +super -z -c 'count()' webaddrs.jsup ``` and produces ```mdtest-output @@ -1089,7 +1089,7 @@ The `Website` field is known to be in our school and website address data sources, but not in the test score data. To confirm this, we can count across all data sources and specify the named field, e.g., ```mdtest-command dir=testdata/edu -super -z -c 'count(Website)' *.zson +super -z -c 'count(Website)' *.jsup ``` produces ```mdtest-output @@ -1109,7 +1109,7 @@ from the [HyperLogLog repository](https://github.com/axiomhq/hyperloglog). This query generates an approcimate count the number of unique school names in our sample data set: ```mdtest-command dir=testdata/edu -super -z -c 'dcount(School)' schools.zson +super -z -c 'dcount(School)' schools.jsup ``` and produces ```mdtest-output @@ -1117,7 +1117,7 @@ and produces ``` To see the precise value, which may take longer to execute, this query ```mdtest-command dir=testdata/edu -super -z -c 'count() by School | count()' schools.zson +super -z -c 'count() by School | count()' schools.jsup ``` produces ```mdtest-output @@ -1131,7 +1131,7 @@ The `max` function computes the maximum numeric value over all of its input. To see the highest reported math test score, this query: ```mdtest-command dir=testdata/edu -super -f table -c 'max:=max(AvgScrMath)' testscores.zson +super -f table -c 'max:=max(AvgScrMath)' testscores.jsup ``` produces ```mdtest-output @@ -1145,7 +1145,7 @@ The `min` function computes the minimum numeric value over all of its input. To see the lowest reported math test score, this query ```mdtest-command dir=testdata/edu -super -f table -c 'min:=min(AvgScrMath)' testscores.zson +super -f table -c 'min:=min(AvgScrMath)' testscores.jsup ``` produces ```mdtest-output @@ -1165,7 +1165,7 @@ a listed website: super -Z -c ' has_at_least_one_school_website:=or(Website!=null) by City | sort City -' schools.zson +' schools.jsup ``` and produces ```mdtest-output head @@ -1203,7 +1203,7 @@ super -Z -c ' AllMath:=sum(AvgScrMath), AllRead:=sum(AvgScrRead), AllWrite:=sum(AvgScrWrite) -' testscores.zson +' testscores.jsup ``` and produces ```mdtest-output @@ -1226,7 +1226,7 @@ super -Z -c ' County=="Fresno" Website!=null | Websites:=union(Website) by City | sort City -' schools.zson +' schools.jsup ``` and produces ```mdtest-output head @@ -1269,7 +1269,7 @@ For example, to see the different categories of status for the schools in our example data, this query: ```mdtest-command dir=testdata/edu -super -z -c 'by StatusType | sort' schools.zson +super -z -c 'by StatusType | sort' schools.jsup ``` produces ```mdtest-output @@ -1279,10 +1279,10 @@ produces {StatusType:"Pending"} ``` If you work a lot at the UNIX/Linux shell, you might have sought to accomplish -the same via a familiar idiom: `sort | uniq`. This works in Zed, but the `by` +the same via a familiar idiom: `sort | uniq`. This works in SuperPipe, but the `by` shorthand is preferable, e.g., ```mdtest-command dir=testdata/edu -super -z -c 'cut StatusType | sort | uniq' schools.zson +super -z -c 'cut StatusType | sort | uniq' schools.jsup ``` produces ```mdtest-output @@ -1299,7 +1299,7 @@ test scores and school count for each county/district pairing, this query: super -f table -c ' avg(AvgScrRead),count() by cname,dname | sort count desc -' testscores.zson +' testscores.jsup ``` produces ```mdtest-output head @@ -1311,13 +1311,13 @@ San Francisco San Francisco Unified 454.368421052 ... ``` Instead of a simple field name, any of the comma-separated group-by elements -can be any [Zed expression](../language/expressions.md), which may +can be any [expression](../language/expressions.md), which may appear in the form of a field assignment `field:=expr` To see a count of how many school names of a particular character length appear in our example data, this query: ```mdtest-command dir=testdata/edu -super -f table -c 'count() by Name_Length:=len(School) | sort -r' schools.zson +super -f table -c 'count() by Name_Length:=len(School) | sort -r' schools.jsup ``` produces ```mdtest-output head @@ -1340,7 +1340,7 @@ the misspelled field would appear as embedded missing errors, e.g., super -Z -c ' avg(AvgScrRead),count() by cname,dnmae | sort count desc -' testscores.zson +' testscores.jsup ``` produces ```mdtest-output head @@ -1361,15 +1361,15 @@ produces ## 6. Sorting -Zed provides a convenient way to sort data using the +SuperPipe provides a convenient way to sort data using the [sort operator](../language/operators/sort.md). -All values in Zed have a well-defined sort order, even complex values +All values in super data model have a well-defined sort order, even complex values and values of different data types, so you can easily sort heterogenous sequences of values. This query sorts our test score records by average reading score: ```mdtest-command dir=testdata/edu -super -z -c 'sort AvgScrRead' testscores.zson +super -z -c 'sort AvgScrRead' testscores.jsup ``` and produces ```mdtest-output head @@ -1384,7 +1384,7 @@ Now we'll sort the test score records first by average reading score and then by average math score. Note how this changed the order of the bottom two records in the result, e.g., ```mdtest-command dir=testdata/edu -super -z -c 'sort AvgScrRead,AvgScrMath' testscores.zson +super -z -c 'sort AvgScrRead,AvgScrMath' testscores.jsup ``` produces ```mdtest-output head @@ -1402,7 +1402,7 @@ field name as an explicit argument, the `sort` operator did what we wanted because it found a field of the `uint64` [data type](../language/data-types.md), e.g., ```mdtest-command dir=testdata/edu -super -z -c 'count() by County | sort -r' schools.zson +super -z -c 'count() by County | sort -r' schools.jsup ``` produces ```mdtest-output head @@ -1416,7 +1416,7 @@ records. Since we know some of the records don't include a website, we'll deliberately put the null values at the front of the list so we can see how many there are, e.g., ```mdtest-command dir=testdata/edu -super -z -c 'count() by Website | sort -nulls first Website' schools.zson +super -z -c 'count() by Website | sort -nulls first Website' schools.jsup ``` produces ```mdtest-output head @@ -1429,7 +1429,7 @@ produces ## 7. Sequence Filters -Several Zed operators manipulate a sequence of values based on the order +Several operators manipulate a sequence of values based on the order in which they appear in the input: * [head](../language/operators/head.md) - copy leading values of input sequence * [tail](../language/operators/tail.md) - copy trailing values of input sequence @@ -1442,7 +1442,7 @@ of its input to its output. For example, this query selects the first school record: ```mdtest-command dir=testdata/edu -super -Z -c 'head' schools.zson +super -Z -c 'head' schools.jsup ``` and produces ```mdtest-output @@ -1464,7 +1464,7 @@ and produces ``` To see the first five school records in Los Angeles county, this query ```mdtest-command dir=testdata/edu -super -z -c 'County=="Los Angeles" | head 5' schools.zson +super -z -c 'County=="Los Angeles" | head 5' schools.jsup ``` produces ```mdtest-output @@ -1481,7 +1481,7 @@ of its input to its output. For example, this query selects the last school record: ```mdtest-command dir=testdata/edu -super -Z -c 'tail' schools.zson +super -Z -c 'tail' schools.jsup ``` and produces ```mdtest-output @@ -1503,7 +1503,7 @@ and produces ``` To see the last five school records in Los Angeles county, this query ```mdtest-command dir=testdata/edu -super -z -c 'County=="Los Angeles" | tail 5' schools.zson +super -z -c 'County=="Los Angeles" | tail 5' schools.jsup ``` produces ```mdtest-output @@ -1522,7 +1522,7 @@ input to the output. Let's say you'd been looking at the contents of just the `District` and `County` fields in the order they appear in the school data, e.g., ```mdtest-command dir=testdata/edu -super -z -c 'cut District,County' schools.zson +super -z -c 'cut District,County' schools.jsup ``` produces ```mdtest-output head @@ -1541,7 +1541,7 @@ produces To eliminate the adjacent lines that share the same field/value pairs, this query ```mdtest-command dir=testdata/edu -super -z -c 'cut District,County | uniq' schools.zson +super -z -c 'cut District,County | uniq' schools.jsup ``` produces ```mdtest-output head @@ -1568,7 +1568,7 @@ the average math score with the school name and the county name: super -Z -c ' AvgScrMath!=null | yield {school:sname,avg:AvgScrMath}, {county:cname,zvg:AvgScrMath} -' testscores.zson +' testscores.jsup ``` which produces ```mdtest-output head 4 @@ -1597,7 +1597,7 @@ super -f table -c ' | put combined_scores:=AvgScrMath+AvgScrRead+AvgScrWrite | cut sname,combined_scores,AvgScrMath,AvgScrRead,AvgScrWrite | head 5 -' testscores.zson +' testscores.jsup ``` produces ```mdtest-output @@ -1621,7 +1621,7 @@ AvgScrMath != null AvgScrRead, AvgScrWrite } -| head 5' testscores.zson +| head 5' testscores.jsup ``` produces ```mdtest-output diff --git a/docs/tutorials/zq.md b/docs/tutorials/zq.md index 35336926bb..75294ac63d 100644 --- a/docs/tutorials/zq.md +++ b/docs/tutorials/zq.md @@ -76,7 +76,7 @@ which also gives 4 ``` > Note that we are using the `-z` option with `zq` in all of the examples, -> which causes `zq` to format the output as [ZSON](../formats/zson.md). +> which causes `zq` to format the output as [ZSON](../formats/jsup.md). > When running `zq` on the terminal, you do not need `-z` as it is the default, > but we include it here for clarity and because all of these examples are > run through automated testing, which is not attached to a terminal. @@ -158,7 +158,7 @@ trying to do high-precision stuff with data. When using `zq`, it's handy to operate in the domain of Zed data and only output to JSON when needed. -The human-readable format of Zed is called [ZSON](../formats/zson.md) +The human-readable format of Zed is called [ZSON](../formats/jsup.md) (and yes, that's a play on the acronym JSON). ZSON is nice because it has a comprehensive type system and you can @@ -249,10 +249,10 @@ Here, `v1` is a 64-bit IEEE floating-point value just like JSON. Unlike JSON, `v2` is a 64-bit integer. And there are other integer types as with `v3`, -which utilizes a [ZSON type decorator](../formats/zson.md#22-type-decorators), +which utilizes a [ZSON type decorator](../formats/jsup.md#22-type-decorators), in this case, to clarify its specific type of integer as unsigned 8 bits. - +a `v4` has type `time` and `v5` type `duration`. `v6` is type `ip` and `v7` type `net`. diff --git a/testdata/edu/README.md b/testdata/edu/README.md index 4228fb2cd8..ca5e3b584e 100644 --- a/testdata/edu/README.md +++ b/testdata/edu/README.md @@ -2,7 +2,7 @@ This directory contains a small sample data set regarding California schools and their average SAT scores. It is used in query examples in -the [Zed language documentation](../../docs/language/README.md). +the [SuperPipe language documentation](../../docs/language/README.md). # Acknowledgement @@ -14,7 +14,7 @@ this data. # Creation -[`schools.zson`](schools.zson) and [`testscores.zson`](testscores.zson) +[`schools.jsup`](schools.jsup) and [`testscores.jsup`](testscores.jsup) are created by downloading an SQLite database, extracting two tables as JSON, and shaping and sorting the resulting records. @@ -38,7 +38,7 @@ sqlite3 -json cdeschools.sqlite "select * from schools;" | super -z -c ' Website:string }; this := crop(shape(school), school) | sort School -' - > schools.zson +' - > schools.jsup sqlite3 -json cdeschools.sqlite "select * from satscores;" | super -z -c ' type testscore = { @@ -50,17 +50,17 @@ sqlite3 -json cdeschools.sqlite "select * from satscores;" | super -z -c ' sname: string }; this := crop(shape(testscore), testscore) | sort sname -' - > testscores.zson +' - > testscores.jsup ``` -Some Zed language examples require IP address data, so the data set is -augmented with [`webaddrs.zson`](webaddrs.zson), which captures an IP +Some SuperPipe language examples require IP address data, so the data set is +augmented with [`webaddrs.jsup`](webaddrs.jsup), which captures an IP address at which each school website was once hosted. ```sh -for host in $(zq -f text 'Website != null | by Website' schools.zson | sed -e 's|http://||' -e 's|/.*||' | sort -u); do +for host in $(zq -f text 'Website != null | by Website' schools.jsup | sed -e 's|http://||' -e 's|/.*||' | sort -u); do addr=$(dig +short $host | egrep '\d{1,3}(.\d{1,3}){3}' | tail -1) [ "$addr" ] && echo "{Website:\"$host\",addr:$addr}" -done > webaddrs.zson +done > webaddrs.jsup ``` diff --git a/testdata/edu/schools.zson b/testdata/edu/schools.jsup similarity index 100% rename from testdata/edu/schools.zson rename to testdata/edu/schools.jsup diff --git a/testdata/edu/testscores.zson b/testdata/edu/testscores.jsup similarity index 100% rename from testdata/edu/testscores.zson rename to testdata/edu/testscores.jsup diff --git a/testdata/edu/webaddrs.zson b/testdata/edu/webaddrs.jsup similarity index 100% rename from testdata/edu/webaddrs.zson rename to testdata/edu/webaddrs.jsup