Skip to content

Commit

Permalink
clean(core): Simplify BaseResource with MediaType to only adjustChars…
Browse files Browse the repository at this point in the history
…et, not detect
  • Loading branch information
vorburger committed Dec 14, 2024
1 parent 494a7ff commit 09c8cc8
Show file tree
Hide file tree
Showing 12 changed files with 102 additions and 54 deletions.
29 changes: 22 additions & 7 deletions java/dev/enola/common/io/iri/URIs.java
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import java.net.URISyntaxException;
import java.net.URL;
import java.net.URLEncoder;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.nio.file.FileSystems;
import java.nio.file.Path;
Expand Down Expand Up @@ -72,10 +73,25 @@ public static MediaTypeAndOrCharset getMediaTypeAndCharset(URI uri) {
return new MediaTypeAndOrCharset(mediaTypeParameter, charsetParameter);
}

public static @Nullable String getCharset(URI uri) {
var mediaTypeAndCharset = getMediaTypeAndCharset(uri);
if (mediaTypeAndCharset.charset != null) return mediaTypeAndCharset.charset;
if (mediaTypeAndCharset.mediaType != null) {
var optMediaTypeCharset = MediaType.parse(mediaTypeAndCharset.mediaType);
if (optMediaTypeCharset.charset().isPresent())
return optMediaTypeCharset.charset().get().toString();
}
return null;
}

public static URI addMediaType(URI uri, MediaType mediaType) {
return addQueryParameter(uri, MEDIA_TYPE, mediaType.toString().replace(" ", ""));
}

public static URI addCharset(URI uri, Charset charset) {
return addQueryParameter(uri, CHARSET, charset.toString());
}

public static URI addQuery(URI uri, Map<String, String> parameters) {
for (var parameter : parameters.entrySet()) {
uri = addQueryParameter(uri, parameter.getKey(), parameter.getValue());
Expand Down Expand Up @@ -105,13 +121,12 @@ private static URI addQueryParameter(URI uri, String key, String value) {
String connector;
if (uri.getQuery() != null && uri.getQuery().contains(key)) {
return uri;
} else if (uri.getQuery() == null) {
if (!uri.getSchemeSpecificPart().contains("?")) {
connector = "?";
} else {
// Special case of "scheme:?"-like URIs with "empty" query
connector = "";
}
} else if (uri.getQuery() == null && !uri.getSchemeSpecificPart().contains("?")) {
connector = "?";
} else if (uri.getQuery() == null && uri.getSchemeSpecificPart().equals("?")) {
// Handle special (but technically invalid cases) of "scheme:?" URIs with "empty" query
// This is currently used by EmptyResource.EMPTY_URI (but may be changed later)
connector = "";
} else {
connector = "&";
}
Expand Down
15 changes: 15 additions & 0 deletions java/dev/enola/common/io/iri/URIsTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import static org.junit.Assert.assertThrows;

import static java.net.URI.create;
import static java.nio.charset.StandardCharsets.UTF_8;

import com.google.common.collect.ImmutableMap;
import com.google.common.io.Resources;
Expand Down Expand Up @@ -112,6 +113,20 @@ public void testAddQuery() {
.isEqualTo("http://host/path?arg1=a&arg2=b&arg3=c");
}

@Test
public void testAddCharsetQueryParameter() {
assertThat(URIs.addCharset(URI.create("fd:1?mediaType=application/yaml"), UTF_8))
.isEqualTo(URI.create("fd:1?mediaType=application/yaml&charset=UTF-8"));
}

@Test
public void testGetCharsetFromMediaTypeOrCharsetQueryParameter() {
assertThat(URIs.getCharset(URI.create("fd:1?mediaType=application/yaml&charset=UTF-8")))
.isEqualTo("UTF-8");
assertThat(URIs.getCharset(URI.create("fd:1?mediaType=application/yaml;charset=UTF-8")))
.isEqualTo("UTF-8");
}

@Test
public void testAddQueryGivenOriginalUriWithQuery() {
var uri1 = URI.create("http://host/pathX");
Expand Down
13 changes: 3 additions & 10 deletions java/dev/enola/common/io/mediatype/MarkdownMediaTypes.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
import com.google.common.collect.Multimap;
import com.google.common.net.MediaType;

import java.nio.charset.StandardCharsets;
import java.util.Map;
import java.util.Set;

Expand All @@ -40,26 +39,20 @@ public class MarkdownMediaTypes implements MediaTypeProvider {

private static final String VARIANT = "variant";

public static final MediaType MARKDOWN_UTF_8 =
create("text", "markdown").withCharset(StandardCharsets.UTF_8);
public static final MediaType MARKDOWN_UTF_8 = MediaType.MD_UTF_8;

/**
* <a href="https://www.iana.org/assignments/markdown-variants/CommonMark">CommonMark</a>
* variant, see <a href="https://commonmark.org">CommonMark.org</a>.
*/
public static final MediaType COMMON_MARKDOWN_UTF_8 =
create("text", "markdown")
.withCharset(StandardCharsets.UTF_8)
.withParameter(VARIANT, "CommonMark");
MARKDOWN_UTF_8.withParameter(VARIANT, "CommonMark");

/**
* <a href="https://www.iana.org/assignments/markdown-variants/GFM">GitHub Flavored Markdown
* (GFM)</a>.
*/
public static final MediaType GFM_MARKDOWN_UTF_8 =
create("text", "markdown")
.withCharset(StandardCharsets.UTF_8)
.withParameter(VARIANT, "GFM");
public static final MediaType GFM_MARKDOWN_UTF_8 = MARKDOWN_UTF_8.withParameter(VARIANT, "GFM");

@Override
public Multimap<String, MediaType> extensionsToTypes() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import java.nio.charset.Charset;
import java.util.Optional;

// TODO Increase test coverage... (this currently only used by YamlMediaType)
public abstract class ResourceCharsetDetectorSPI implements ResourceCharsetDetector {

private static final Logger LOG = LoggerFactory.getLogger(ResourceCharsetDetectorSPI.class);
Expand Down
6 changes: 4 additions & 2 deletions java/dev/enola/common/io/resource/BaseResource.java
Original file line number Diff line number Diff line change
Expand Up @@ -39,14 +39,16 @@ protected BaseResource(URI uri) {
}

protected BaseResource(URI uri, ByteSource byteSource) {
this(uri, mtd.detect(uri, byteSource));
this.uri = requireNonNull(uri, "uri");
this.mediaType = mtd.detect(uri, byteSource);
}

protected BaseResource(URI uri, MediaType mediaType) {
this.uri = requireNonNull(uri, "uri");
this.mediaType = mtd.overwrite(uri, requireNonNull(mediaType, "mediaType"));
this.mediaType = mtd.adjustCharset(uri, requireNonNull(mediaType, "mediaType"));
}

// TODO Remove this weird "hacked" constructor with "fake" fixedMediaType parameter...
protected BaseResource(URI uri, MediaType mediaType, boolean fixedMediaType) {
this.uri = requireNonNull(uri, "uri");
this.mediaType = requireNonNull(mediaType, "mediaType");
Expand Down
2 changes: 1 addition & 1 deletion java/dev/enola/common/io/resource/EmptyResource.java
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ public Resource getResource(URI uri) {
}

static final String SCHEME = "empty";
public static final URI EMPTY_URI = URI.create(SCHEME + ":?"); // Maybe :/ is better?
public static final URI EMPTY_URI = URI.create(SCHEME + ":?"); // TODO :/ is probably better?
public static final EmptyResource INSTANCE = new EmptyResource(EMPTY_URI);

public EmptyResource(URI uri) {
Expand Down
21 changes: 16 additions & 5 deletions java/dev/enola/common/io/resource/FileDescriptorResource.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@

import com.google.common.io.ByteSink;
import com.google.common.io.ByteSource;
import com.google.common.net.MediaType;

import dev.enola.common.io.iri.URIs;

Expand All @@ -33,8 +32,16 @@
import java.nio.charset.Charset;

/**
* Resource for URIs "fd:0?charset=ASCII" (STDIN), "fd:1?charset=UTF-8" (STDOUT),
* "fd:2?charset=UTF-16BE" STDERR. Implementation uses {@link FileDescriptor}.
* Resource for "fd:0" (STDIN), "fd:1" (STDOUT), "fd:2" (STDERR) URIs.
*
* <p>The {@link #mediaType()} will be {@link com.google.common.net.MediaType#OCTET_STREAM} (NOT
* {@link com.google.common.net.MediaType#APPLICATION_BINARY}!), unless there is a ?mediaType= query
* parameter in the URI argument.
*
* <p>The Charset of that Media Type will be the {@link Charset#defaultCharset()}, unless there is
* either (check first) a ?charset= query parameter in the URI argument (e.g. "fd:0?charset=ASCII",
* or "fd:1?charset=UTF-8", or "fd:2?charset=UTF-16BE") or the ?mediaType= query parameter includes
* a charset (e.g. "fd:1?mediaType=application/yaml;charset=utf-16be").
*/
public class FileDescriptorResource extends BaseResource implements Resource {

Expand All @@ -54,8 +61,7 @@ public Resource getResource(URI uri) {
private final FileDescriptor fileDescriptor;

public FileDescriptorResource(URI uri) {
// Default to Text in Platform's (Terminal) default charset
super(uri, MediaType.PLAIN_TEXT_UTF_8.withCharset(Charset.defaultCharset()));
super(addDefaultCharsetIfNone(uri));

if (!"fd".equals(uri.getScheme())) {
throw new IllegalArgumentException(uri.toString());
Expand All @@ -73,6 +79,11 @@ public FileDescriptorResource(URI uri) {
}
}

private static URI addDefaultCharsetIfNone(URI uri) {
if (URIs.getCharset(uri) == null) return URIs.addCharset(uri, Charset.defaultCharset());
else return uri;
}

@Override
public ByteSink byteSink() {
return new ByteSink() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
*/
package dev.enola.common.io.resource;

import static com.google.common.net.MediaType.OCTET_STREAM;
import static com.google.common.truth.Truth.assertThat;

import static dev.enola.common.context.testlib.SingletonRule.$;
Expand All @@ -40,11 +41,12 @@ public class FileDescriptorResourceTest {
public @Rule SingletonRule r = $(MediaTypeProviders.set());

@Test
public void testSTDOUTwithoutCharset() throws IOException {
public void testSTDOUTwithoutCharsetNorMediaType() throws IOException {
var FD1 = new FileDescriptorResource(URI.create("fd:1"));
FD1.byteSink().write(new byte[] {1, 2, 3});
FD1.charSink().write("hello");
assertThat(FD1.mediaType().charset()).hasValue(Charset.defaultCharset());
assertThat(FD1.mediaType().withoutParameters()).isEqualTo(OCTET_STREAM.withoutParameters());
FD1.charSink().write("hello");
}

@Test
Expand Down
32 changes: 25 additions & 7 deletions java/dev/enola/common/io/resource/MediaTypeDetector.java
Original file line number Diff line number Diff line change
Expand Up @@ -75,32 +75,37 @@ MediaType detect(URI uri, ByteSource byteSource) {
var mediaTypeCharset = URIs.getMediaTypeAndCharset(uri);
var detected = detect(mediaTypeCharset.mediaType(), mediaTypeCharset.charset());
detected = detectCharsetAndMediaType(uri, byteSource, detected);

detected = overwrite(uri, detected);

return detected;
}

/**
* This is called by Resource* implementation constructors via {@link BaseResource}, if there is
* (only) an URI and a server proposed MediaType already. In this case, there is no point in
* also considering a ByteSource. This uses a number of hard-coded built-in heuristics to ignore
* certain server proposals and try to find a "better" one based on the URI.
* (only) an URI and already a MediaType proposed either by a server, or from {@link
* #detect(URI, ByteSource)}. In this case, there is no point in also considering a ByteSource.
* This uses a number of hard-coded built-in heuristics to ignore certain server proposals and
* try to find a "better" one based on the URI.
*/
MediaType overwrite(URI uri, final MediaType serverProposedMediaType) {
var mediaType = serverProposedMediaType;
MediaType overwrite(URI uri, final MediaType proposedMediaType) {
var mediaType = proposedMediaType;

// TODO Move this into UrlResource if "content/unknown" is hard-coded in URLConnection?
if (IGNORE.contains(mediaType.withoutParameters())) {
mediaType = MediaType.OCTET_STREAM;
}

// TODO Reuse #adjust() ?
var uriCharset = URIs.getMediaTypeAndCharset(uri);
var uriMediaType = uriCharset.mediaType();
if (uriMediaType != null) mediaType = MediaType.parse(uriMediaType);

var cs = uriCharset.charset();
if (cs != null) mediaType = mediaType.withCharset(Charset.forName(cs));
else {
if (!mediaType.charset().isPresent() && serverProposedMediaType.charset().isPresent()) {
mediaType = mediaType.withCharset(serverProposedMediaType.charset().get());
if (!mediaType.charset().isPresent() && proposedMediaType.charset().isPresent()) {
mediaType = mediaType.withCharset(proposedMediaType.charset().get());
} else {
mediaType = detectCharsetAndMediaType(uri, ByteSource.empty(), mediaType);
}
Expand All @@ -109,6 +114,19 @@ MediaType overwrite(URI uri, final MediaType serverProposedMediaType) {
return mediaType;
}

/**
* This is called by Resource* implementation constructors via {@link BaseResource}, if there is
* (only) an URI and a "client" (caller of API) requested MediaType. It generally "respects"
* (honors) the clients' wish, does not use any magic heuristics, and only adjusts (only) the it
* based on query parameters in the URI (if present).
*/
MediaType adjustCharset(URI uri, final MediaType clientRequestedMediaType) {
var charsetParameter = URIs.getCharset(uri);
if (charsetParameter != null)
return clientRequestedMediaType.withCharset(Charset.forName(charsetParameter));
else return clientRequestedMediaType;
}

private MediaType detectCharsetAndMediaType(
URI uri, ByteSource byteSource, MediaType detected) {
if (!detected.charset().isPresent()) {
Expand Down
29 changes: 9 additions & 20 deletions java/dev/enola/common/io/resource/MediaTypeDetectorTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@
import org.junit.Test;

import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.nio.charset.StandardCharsets;

Expand Down Expand Up @@ -115,29 +114,19 @@ public void testTextproto() {
}

@Test
public void testTextYAML() {
// A text/plain with *.yaml is still application/yaml
var r =
new EmptyResource(
create("http://server/hello.yaml"),
PLAIN_TEXT_UTF_8.withoutParameters()); // drop charset!

public void testYAML() {
var r = new EmptyResource(create("http://server/hello.yaml"));
assertThat(r.mediaType()).isEqualTo(YAML_UTF_8);
}

@Test
public void testEmptyYAML() {
// Empty .YAML is UTF-8
var r = new EmptyResource(YamlMediaType.YAML_UTF_8.withoutParameters()); // drop charset!
assertThat(r.mediaType()).isEqualTo(YAML_UTF_8);
}

@Test
public void testNoHeaderYAML() throws IOException {
// A .YAML without header and just some ASCII is still UTF-8
var text = "hello: world";
var r = new MemoryResource(YamlMediaType.YAML_UTF_8.withoutParameters()); // drop charset!
r.byteSink().write(text.getBytes(StandardCharsets.US_ASCII));
@Ignore // TODO This is an invalid test... rewrite it elsewhere.
// If a caller of an EmptyResource constructor says its TEXT, then it is that!
// What this meant to test is that if a HTTP server says something is TEXT, then
// that may be wrong, and we should detect if it may be YAML; but that goes elsewhere.
public void testTextYAML() {
// A text/plain with *.yaml is still application/yaml
var r = new EmptyResource(create("http://server/hello.yaml"), PLAIN_TEXT_UTF_8);
assertThat(r.mediaType()).isEqualTo(YAML_UTF_8);
}

Expand Down
1 change: 1 addition & 0 deletions java/dev/enola/common/io/resource/OkHttpResource.java
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ public OkHttpResource(URI uri) {
super(uri, mediaType(uri.toString()));
}

// See also UrlResource#mediaType(URL url)
private static MediaType mediaType(String url) {
Request request = new Request.Builder().url(url).build();

Expand Down
1 change: 1 addition & 0 deletions java/dev/enola/common/io/resource/UrlResource.java
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ public UrlResource(URL url, MediaType mediaType) {
this.url = url;
}

// See also OkHttpResource#mediaType(String url)
private static MediaType mediaType(URL url) {
// This is slow - but more correct; see https://www.baeldung.com/java-file-mime-type
URLConnection c = null;
Expand Down

0 comments on commit 09c8cc8

Please sign in to comment.