Skip to content

Commit

Permalink
Refactor PercentCodec a bit
Browse files Browse the repository at this point in the history
Signed-off-by: Thomas Farr <[email protected]>
  • Loading branch information
Xtansia committed Jul 30, 2024
1 parent b5901c9 commit 7fcb24d
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 86 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ public static void setCodec(PercentCodec codec) {
PathEncoder.codec = codec;
}


public static String encode(String pathSegment) {
return getCodec().encode(pathSegment);
}
Expand Down
157 changes: 72 additions & 85 deletions java-client/src/main/java/org/opensearch/client/util/PercentCodec.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,96 +21,83 @@
* </p>
*/
public class PercentCodec {
private static final BitSet RFC3986_GEN_DELIMS_CHARS = new BitSet(256);
private static final BitSet RFC3986_SUB_DELIMS_CHARS = new BitSet(256);
private static final BitSet RFC3986_UNRESERVED_CHARS = new BitSet(256);
private static final BitSet RFC3986_PATHSAFE_NC_CHARS = new BitSet(256);
private static final BitSet RFC3986_PATHSAFE_CHARS = new BitSet(256);
private static final BitSet RFC3986_URIC_CHARS = new BitSet(256);

static {
RFC3986_GEN_DELIMS_CHARS.set(':');
RFC3986_GEN_DELIMS_CHARS.set('/');
RFC3986_GEN_DELIMS_CHARS.set('?');
RFC3986_GEN_DELIMS_CHARS.set('#');
RFC3986_GEN_DELIMS_CHARS.set('[');
RFC3986_GEN_DELIMS_CHARS.set(']');
RFC3986_GEN_DELIMS_CHARS.set('@');

RFC3986_SUB_DELIMS_CHARS.set('!');
RFC3986_SUB_DELIMS_CHARS.set('$');
RFC3986_SUB_DELIMS_CHARS.set('&');
RFC3986_SUB_DELIMS_CHARS.set('\'');
RFC3986_SUB_DELIMS_CHARS.set('(');
RFC3986_SUB_DELIMS_CHARS.set(')');
RFC3986_SUB_DELIMS_CHARS.set('*');
RFC3986_SUB_DELIMS_CHARS.set('+');
RFC3986_SUB_DELIMS_CHARS.set(',');
RFC3986_SUB_DELIMS_CHARS.set(';');
RFC3986_SUB_DELIMS_CHARS.set('=');

for (int i = 'a'; i <= 'z'; i++) {
RFC3986_UNRESERVED_CHARS.set(i);
}
for (int i = 'A'; i <= 'Z'; i++) {
RFC3986_UNRESERVED_CHARS.set(i);
}
// numeric characters
for (int i = '0'; i <= '9'; i++) {
RFC3986_UNRESERVED_CHARS.set(i);
}
RFC3986_UNRESERVED_CHARS.set('-');
RFC3986_UNRESERVED_CHARS.set('.');
RFC3986_UNRESERVED_CHARS.set('_');
RFC3986_UNRESERVED_CHARS.set('~');

RFC3986_PATHSAFE_NC_CHARS.or(RFC3986_UNRESERVED_CHARS);
RFC3986_PATHSAFE_NC_CHARS.or(RFC3986_SUB_DELIMS_CHARS);
RFC3986_PATHSAFE_NC_CHARS.set('@');

RFC3986_PATHSAFE_CHARS.or(RFC3986_PATHSAFE_NC_CHARS);
RFC3986_PATHSAFE_CHARS.set(':');

RFC3986_URIC_CHARS.or(RFC3986_SUB_DELIMS_CHARS);
RFC3986_URIC_CHARS.or(RFC3986_UNRESERVED_CHARS);
}
private static class Chars {
private final BitSet set = new BitSet(256);

private static final BitSet RFC5987_UNRESERVED_CHARS = new BitSet(256);

static {
// Alphanumeric characters
for (int i = 'a'; i <= 'z'; i++) {
RFC5987_UNRESERVED_CHARS.set(i);
}
for (int i = 'A'; i <= 'Z'; i++) {
RFC5987_UNRESERVED_CHARS.set(i);
}
for (int i = '0'; i <= '9'; i++) {
RFC5987_UNRESERVED_CHARS.set(i);
}

// Additional characters as per RFC 5987 attr-char
RFC5987_UNRESERVED_CHARS.set('!');
RFC5987_UNRESERVED_CHARS.set('#');
RFC5987_UNRESERVED_CHARS.set('$');
RFC5987_UNRESERVED_CHARS.set('&');
RFC5987_UNRESERVED_CHARS.set('+');
RFC5987_UNRESERVED_CHARS.set('-');
RFC5987_UNRESERVED_CHARS.set('.');
RFC5987_UNRESERVED_CHARS.set('^');
RFC5987_UNRESERVED_CHARS.set('_');
RFC5987_UNRESERVED_CHARS.set('`');
RFC5987_UNRESERVED_CHARS.set('|');
RFC5987_UNRESERVED_CHARS.set('~');
public void add(char... chars) {
for (char c : chars) {
set.set(c);
}
}

public void addRange(char start, char end) {
set.set(start, end + 1);
}

public void add(Chars set) {
this.set.or(set.set);
}

public boolean contains(int c) {
return set.get(c);
}
}

private static final Chars RFC3986_GEN_DELIMS_CHARS = new Chars() {
{
add(':', '/', '?', '#', '[', ']', '@');
}
};
private static final Chars RFC3986_SUB_DELIMS_CHARS = new Chars() {
{
add('!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=');
}
};
private static final Chars RFC3986_UNRESERVED_CHARS = new Chars() {
{
addRange('a', 'z');
addRange('A', 'Z');
addRange('0', '9');
add('-', '.', '_', '~');
}
};
private static final Chars RFC3986_PATH_NO_COLON_CHARS = new Chars() {
{
add(RFC3986_UNRESERVED_CHARS);
add(RFC3986_SUB_DELIMS_CHARS);
add('@');
}
};
private static final Chars RFC3986_PATH_CHARS = new Chars() {
{
add(RFC3986_PATH_NO_COLON_CHARS);
add(':');
}
};
private static final Chars RFC3986_URIC_CHARS = new Chars() {
{
add(RFC3986_SUB_DELIMS_CHARS);
add(RFC3986_UNRESERVED_CHARS);
}
};

private static final Chars RFC5987_UNRESERVED_CHARS = new Chars() {
{
addRange('a', 'z');
addRange('A', 'Z');
addRange('0', '9');
// Additional characters as per RFC 5987 attr-char
add('!', '#', '$', '&', '+', '-', '.', '^', '_', '`', '|', '~');
}
};

private static final int RADIX = 16;

private static void encode(
final StringBuilder buf,
final CharSequence content,
final Charset charset,
final BitSet safeChars,
final Chars safeChars,
final boolean blankAsPlus
) {
if (content == null) {
Expand All @@ -120,7 +107,7 @@ private static void encode(
final ByteBuffer bb = (charset != null ? charset : StandardCharsets.UTF_8).encode(cb);
while (bb.hasRemaining()) {
final int b = bb.get() & 0xff;
if (safeChars.get(b)) {
if (safeChars.contains(b)) {
buf.append((char) b);
} else if (blankAsPlus && b == ' ') {
buf.append("+");
Expand Down Expand Up @@ -165,12 +152,12 @@ private static String decode(final CharSequence content, final Charset charset,
}

public static final PercentCodec RFC3986_UNRESERVED = new PercentCodec(RFC3986_UNRESERVED_CHARS);
public static final PercentCodec RFC3986_PATHSAFE = new PercentCodec(RFC3986_PATHSAFE_CHARS);
public static final PercentCodec RFC3986_PATHSAFE = new PercentCodec(RFC3986_PATH_CHARS);
public static final PercentCodec RFC5987_UNRESERVED = new PercentCodec(RFC5987_UNRESERVED_CHARS);

private final BitSet unreserved;
private final Chars unreserved;

private PercentCodec(final BitSet unreserved) {
private PercentCodec(final Chars unreserved) {
this.unreserved = unreserved;
}

Expand Down

0 comments on commit 7fcb24d

Please sign in to comment.