From 433196452f302632f627417ee8c0987d16d31cd3 Mon Sep 17 00:00:00 2001 From: Michael Vorburger Date: Sun, 29 Dec 2024 04:20:10 +0100 Subject: [PATCH] feat (cli): Add fetch CLI command (with documentation) --- .markdown-link-check.json | 2 +- docs/use/fetch/index.md | 85 +++++++++++++++++++ docs/use/get/index.md | 2 + docs/use/help/index.md | 9 ++ docs/use/info/index.md | 18 +++- java/dev/enola/cli/DetectCommand.java | 7 +- java/dev/enola/cli/EnolaCLI.java | 3 +- java/dev/enola/cli/FetchCommand.java | 52 ++++++++++++ java/dev/enola/common/io/iri/URIs.java | 1 + .../common/io/resource/EmptyResource.java | 5 +- mkdocs.yaml | 1 + 11 files changed, 177 insertions(+), 8 deletions(-) create mode 100644 docs/use/fetch/index.md create mode 100644 java/dev/enola/cli/FetchCommand.java diff --git a/.markdown-link-check.json b/.markdown-link-check.json index 1e13af567..d4594543b 100644 --- a/.markdown-link-check.json +++ b/.markdown-link-check.json @@ -14,5 +14,5 @@ { "pattern": "^https://kohesio.ec.europa.eu" }, { "pattern": "^https://billiam.itch.io/deepdwn" } ], - "aliveStatusCodes": [504, 503, 502, 405, 200, 0] + "aliveStatusCodes": [504, 503, 502, 405, 202, 200, 0] } diff --git a/docs/use/fetch/index.md b/docs/use/fetch/index.md new file mode 100644 index 000000000..d8b3163bf --- /dev/null +++ b/docs/use/fetch/index.md @@ -0,0 +1,85 @@ + + +# Fetch + +`fetch` fetches a _Resource_ from an _URL_ and outputs its content. You can therefore use this similarly to [curl](https://curl.se/) or [httpie](https://httpie.io/cli) or [wget](https://en.wikipedia.org/wiki/Wget). (If you want to see the _Media Type,_ use [`info detect`](../info/index.md#detect).) + +This is different from [`get`](../get/index.md), which shows _Things_ given an _IRI. +(However the `--load` option of `get` internally does a `fetch`, and supports the same schemes.) + +Enola supports the [URI schemes](https://en.wikipedia.org/wiki/List_of_URI_schemes) which are documented below. +These are supported everywhere; including in `fetch`, `--load`, and elsewhere. + +## HTTP + +This will fetch : _(Note how for security reasons we have to explicitly permit it.)_ + +```bash cd ../.././.. +$ ./enola fetch --http-scheme https://www.vorburger.ch/hello.md +... +``` + +Enola locally caches HTTP responses on the filesystem. + +## Files + +We can do `cat`-like equivalent of local files using [the `file:` scheme](https://en.wikipedia.org/wiki/File_URI_scheme): + +```bash cd ../.././.. +$ echo "hello" >/tmp/hi.txt && ./enola fetch file:///tmp/hi.txt +... +``` + +We can omit the `file:` scheme and use absolute or relative paths, +because (in the CLI) the current working directory is implicitly [the _base URI_ +used to resolve URI references](https://en.wikipedia.org/wiki/Uniform_Resource_Identifier#URI_references): + +```bash cd ../.././.. +$ ./enola fetch /tmp/hi.txt +... +``` + +## Classpath + +```bash cd ../.././.. +$ ./enola fetch classpath:/VERSION +... +``` + +## Data + +Enola [supports (RFC 2397) `data:` URLs](https://en.m.wikipedia.org/wiki/Data_URI_scheme): + +```bash cd ../.././.. +$ ./enola fetch "data:application/json;charset=UTF-8,%7B%22key%22%3A+%22value%22%7D" +... +``` + +## Empty + +`empty:` is a (non-standard) URL scheme in Enola for "no content" (as an alternative to `data:,`): + +```bash cd ../.././.. +$ ./enola fetch empty:/ +... +``` + +## Screencast + +![Demo](script.svg) diff --git a/docs/use/get/index.md b/docs/use/get/index.md index 3fb0510ff..dca0884b1 100644 --- a/docs/use/get/index.md +++ b/docs/use/get/index.md @@ -41,3 +41,5 @@ $ ./enola get --load models/enola.dev/enola.ttl https://enola.dev/emoji | head - ``` Note that `get` [supports various formats](../help/index.md#get). + +PS: The [`fetch`](../fetch/index.md) command does something related. diff --git a/docs/use/help/index.md b/docs/use/help/index.md index a85c8b994..2ded48eec 100644 --- a/docs/use/help/index.md +++ b/docs/use/help/index.md @@ -94,6 +94,15 @@ $ ./enola help validate ... ``` +## Fetch + +[Fetch](../fetch/index.md) has the following options: + +```bash cd ../.././.. +$ ./enola help fetch +... +``` + ## Info [Info](../info/index.md) has the following options: diff --git a/docs/use/info/index.md b/docs/use/info/index.md index 6b4874fc1..5371d0a0a 100644 --- a/docs/use/info/index.md +++ b/docs/use/info/index.md @@ -50,7 +50,23 @@ $ ./enola info detect --file-scheme picasso.thing.yaml ... ``` -Note that this file does not exist, this is fine; the type of its content is determine by the extension (in this case). +Note that this file does not exist; that's fine, +as the type of its content is determined by the extension (in this case). +In some cases it may even be part of the URL itself, like for [`data:` URLs](../fetch/index.md#data): + +```bash cd ../.././.. +$ ./enola info detect "data:application/json;charset=UTF-8,%7B%22key%22%3A+%22value%22%7D" +... +``` + +It's also possible to "override" the Media Type, like this: + +```bash cd ../.././.. +./enola info detect "picasso.thing.yaml?mediaType=application/json" +... +``` + +[`fetch`](../fetch/index.md) is another command to "get the bytes at" an URL. ## Metadata diff --git a/java/dev/enola/cli/DetectCommand.java b/java/dev/enola/cli/DetectCommand.java index aa28bbe89..e883f8e5c 100644 --- a/java/dev/enola/cli/DetectCommand.java +++ b/java/dev/enola/cli/DetectCommand.java @@ -32,7 +32,8 @@ description = "Provides information about the media type detected for a given URL.\n" + "This works both for local files (based on extension), and remote HTTP (based" - + " on headers).") + + " on headers).\n" + + "See also the related 'fetch' command.") public class DetectCommand extends CommandWithResourceProvider { @Spec CommandSpec spec; @@ -45,9 +46,9 @@ public void run() throws Exception { super.run(); try (var ctx = TLC.open().push(URIs.ContextKeys.BASE, Paths.get("").toUri())) { var resource = rp.getResource(URIs.parse(iri)); - var mediaType = resource.mediaType(); var pw = spec.commandLine().getOut(); - pw.println(mediaType); + pw.println(resource.mediaType()); + resource.lastModifiedIfKnown().ifPresent(lastModified -> pw.println(lastModified)); } } } diff --git a/java/dev/enola/cli/EnolaCLI.java b/java/dev/enola/cli/EnolaCLI.java index bc5505353..b4d91f5b8 100644 --- a/java/dev/enola/cli/EnolaCLI.java +++ b/java/dev/enola/cli/EnolaCLI.java @@ -44,7 +44,8 @@ LoggingTestCommand.class, InfoCommand.class, ValidateCommand.class, - CanonicalizeCommand.class + CanonicalizeCommand.class, + FetchCommand.class }) public class EnolaCLI { diff --git a/java/dev/enola/cli/FetchCommand.java b/java/dev/enola/cli/FetchCommand.java new file mode 100644 index 000000000..0a2f62420 --- /dev/null +++ b/java/dev/enola/cli/FetchCommand.java @@ -0,0 +1,52 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * Copyright 2024 The Enola Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.enola.cli; + +import dev.enola.common.context.TLC; +import dev.enola.common.io.iri.URIs; + +import picocli.CommandLine; + +import java.net.URI; +import java.nio.file.Paths; + +@CommandLine.Command( + name = "fetch", + description = {"Fetches (I/O) a Resource", "See also the related 'info detect' command."}) +public class FetchCommand extends CommandWithResourceProvider { + + @CommandLine.Parameters(index = "0", paramLabel = "URL", description = "URL to fetch") + String url; + + @Override + public Integer call() throws Exception { + super.run(); + var uri = URI.create(url); + try (var ctx = TLC.open().push(URIs.ContextKeys.BASE, Paths.get("").toUri())) { + var resource = rp.getResource(uri); + if (resource == null) { + System.err.println(uri.getScheme() + " scheme unknown; try: enola fetch --help"); + return 1; + + } else { + resource.byteSource().copyTo(System.out); + return 0; + } + } + } +} diff --git a/java/dev/enola/common/io/iri/URIs.java b/java/dev/enola/common/io/iri/URIs.java index 3cf055dec..f64afe511 100644 --- a/java/dev/enola/common/io/iri/URIs.java +++ b/java/dev/enola/common/io/iri/URIs.java @@ -446,6 +446,7 @@ private static String getFragment(String uri) { // TODO Review if getScheme(), getPath(), getQueryString(), getFragment() are *REALLY* needed?! + @Deprecated // TODO This automagic " " => %20 is fishy... who needs this, why?! public static String encode(String uri) { return uri.replace(" ", "%20"); } diff --git a/java/dev/enola/common/io/resource/EmptyResource.java b/java/dev/enola/common/io/resource/EmptyResource.java index 40a15da5f..b395f722c 100644 --- a/java/dev/enola/common/io/resource/EmptyResource.java +++ b/java/dev/enola/common/io/resource/EmptyResource.java @@ -26,8 +26,9 @@ import java.net.URI; /** - * Read-only resources which when read are always immediately EOF. This is a bit like /dev/null on - * *NIX OS for reading, but not for writing (because /dev/null ignores writes, whereas this fails). + * Read-only resources which when read are always immediately EOF (like "data:,"). This is a bit + * like /dev/null on *NIX OS for reading, but not for writing (because /dev/null ignores writes, + * whereas this fails). * * @see NullResource for an alternative that returns infinite 0s instead of EOF. */ diff --git a/mkdocs.yaml b/mkdocs.yaml index cda6bf573..8f665551b 100644 --- a/mkdocs.yaml +++ b/mkdocs.yaml @@ -52,6 +52,7 @@ nav: - Canonicalize: use/canonicalize/index.md - ExecMD: use/execmd/index.md - Info: use/info/index.md + - Fetch: use/info/fetch.md - Models: - By Type: models/index.md - By Parent: models/hierarchy.md