Skip to content

Commit

Permalink
Config Doc - Add basic support for HTML tables -> AsciiDoc tables
Browse files Browse the repository at this point in the history
It will only handle very simple cases but it makes the doc for
quarkus.native.resources.includes a lot better.
  • Loading branch information
gsmet committed Nov 25, 2024
1 parent 916d194 commit 10ddc12
Showing 1 changed file with 109 additions and 21 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,12 @@ public final class JavadocToAsciidocTransformer {
private static final String UN_ORDERED_LIST_NODE = "ul";
private static final String PREFORMATED_NODE = "pre";
private static final String BLOCKQUOTE_NODE = "blockquote";
private static final String TABLE_NODE = "table";
private static final String THEAD_NODE = "thead";
private static final String TBODY_NODE = "tbody";
private static final String TR_NODE = "tr";
private static final String TH_NODE = "th";
private static final String TD_NODE = "td";

private static final String BIG_ASCIDOC_STYLE = "[.big]";
private static final String LINK_ATTRIBUTE_FORMAT = "[%s]";
Expand All @@ -64,6 +70,9 @@ public final class JavadocToAsciidocTransformer {
private static final String CODE_BLOCK_ASCIDOC_STYLE = "```";
private static final String BLOCKQUOTE_BLOCK_ASCIDOC_STYLE = "[quote]\n____";
private static final String BLOCKQUOTE_BLOCK_ASCIDOC_STYLE_END = "____";
private static final String TABLE_MARKER = "!===";
private static final String COLUMN_HEADER_MARKER = "h!";
private static final String COLUMN_MARKER = "!";

private static final Pattern INLINE_TAG_MARKER_PATTERN = Pattern.compile("§§([0-9]+)§§");

Expand Down Expand Up @@ -106,7 +115,7 @@ public static String toAsciidoc(String javadoc, JavadocFormat format, boolean in
case SYSTEM_PROPERTY:
sb.setLength(0);
sb.append('`');
appendEscapedAsciiDoc(sb, content, inlineMacroMode);
appendEscapedAsciiDoc(sb, content, inlineMacroMode, new Context());
sb.append('`');
htmlJavadoc.append("§§" + markerCounter + "§§");
inlineTagsReplacements.put(markerCounter, sb.toString());
Expand All @@ -119,7 +128,7 @@ public static String toAsciidoc(String javadoc, JavadocFormat format, boolean in
}
sb.setLength(0);
sb.append('`');
appendEscapedAsciiDoc(sb, content, inlineMacroMode);
appendEscapedAsciiDoc(sb, content, inlineMacroMode, new Context());
sb.append('`');
htmlJavadoc.append("§§" + markerCounter + "§§");
inlineTagsReplacements.put(markerCounter, sb.toString());
Expand All @@ -135,7 +144,7 @@ public static String toAsciidoc(String javadoc, JavadocFormat format, boolean in
}

StringBuilder asciidocSb = new StringBuilder();
htmlToAsciidoc(asciidocSb, Jsoup.parseBodyFragment(htmlJavadoc.toString()), inlineMacroMode);
htmlToAsciidoc(asciidocSb, Jsoup.parseBodyFragment(htmlJavadoc.toString()), inlineMacroMode, new Context());
String asciidoc = trim(asciidocSb);

// not very optimal and could be included in htmlToAsciidoc() but simpler so let's go for it
Expand All @@ -147,13 +156,13 @@ public static String toAsciidoc(String javadoc, JavadocFormat format, boolean in
return asciidoc.isBlank() ? null : asciidoc;
}

private static void htmlToAsciidoc(StringBuilder sb, Node node, boolean inlineMacroMode) {
private static void htmlToAsciidoc(StringBuilder sb, Node node, boolean inlineMacroMode, Context context) {
for (Node childNode : node.childNodes()) {
switch (childNode.nodeName()) {
case PARAGRAPH_NODE:
newLine(sb);
newLine(sb);
htmlToAsciidoc(sb, childNode, inlineMacroMode);
htmlToAsciidoc(sb, childNode, inlineMacroMode, context);
break;
case PREFORMATED_NODE:
newLine(sb);
Expand All @@ -173,7 +182,7 @@ private static void htmlToAsciidoc(StringBuilder sb, Node node, boolean inlineMa
newLine(sb);
sb.append(BLOCKQUOTE_BLOCK_ASCIDOC_STYLE);
newLine(sb);
htmlToAsciidoc(sb, childNode, inlineMacroMode);
htmlToAsciidoc(sb, childNode, inlineMacroMode, context);
newLineIfNeeded(sb);
sb.append(BLOCKQUOTE_BLOCK_ASCIDOC_STYLE_END);
newLine(sb);
Expand All @@ -182,7 +191,7 @@ private static void htmlToAsciidoc(StringBuilder sb, Node node, boolean inlineMa
case ORDERED_LIST_NODE:
case UN_ORDERED_LIST_NODE:
newLine(sb);
htmlToAsciidoc(sb, childNode, inlineMacroMode);
htmlToAsciidoc(sb, childNode, inlineMacroMode, context);
newLine(sb);
break;
case LIST_ITEM_NODE:
Expand All @@ -191,67 +200,67 @@ private static void htmlToAsciidoc(StringBuilder sb, Node node, boolean inlineMa
: UNORDERED_LIST_ITEM_ASCIDOC_STYLE;
newLine(sb);
sb.append(marker);
htmlToAsciidoc(sb, childNode, inlineMacroMode);
htmlToAsciidoc(sb, childNode, inlineMacroMode, context);
break;
case LINK_NODE:
final String link = childNode.attr(HREF_ATTRIBUTE);
sb.append("link:");
sb.append(link);
final StringBuilder caption = new StringBuilder();
htmlToAsciidoc(caption, childNode, inlineMacroMode);
htmlToAsciidoc(caption, childNode, inlineMacroMode, context);
sb.append(String.format(LINK_ATTRIBUTE_FORMAT, trim(caption)));
break;
case CODE_NODE:
sb.append(BACKTICK);
htmlToAsciidoc(sb, childNode, inlineMacroMode);
htmlToAsciidoc(sb, childNode, inlineMacroMode, context);
sb.append(BACKTICK);
break;
case BOLD_NODE:
case STRONG_NODE:
sb.append(STAR);
htmlToAsciidoc(sb, childNode, inlineMacroMode);
htmlToAsciidoc(sb, childNode, inlineMacroMode, context);
sb.append(STAR);
break;
case EMPHASIS_NODE:
case ITALICS_NODE:
sb.append(UNDERSCORE);
htmlToAsciidoc(sb, childNode, inlineMacroMode);
htmlToAsciidoc(sb, childNode, inlineMacroMode, context);
sb.append(UNDERSCORE);
break;
case UNDERLINE_NODE:
sb.append(UNDERLINE_ASCIDOC_STYLE);
sb.append(HASH);
htmlToAsciidoc(sb, childNode, inlineMacroMode);
htmlToAsciidoc(sb, childNode, inlineMacroMode, context);
sb.append(HASH);
break;
case SMALL_NODE:
sb.append(SMALL_ASCIDOC_STYLE);
sb.append(HASH);
htmlToAsciidoc(sb, childNode, inlineMacroMode);
htmlToAsciidoc(sb, childNode, inlineMacroMode, context);
sb.append(HASH);
break;
case BIG_NODE:
sb.append(BIG_ASCIDOC_STYLE);
sb.append(HASH);
htmlToAsciidoc(sb, childNode, inlineMacroMode);
htmlToAsciidoc(sb, childNode, inlineMacroMode, context);
sb.append(HASH);
break;
case SUB_SCRIPT_NODE:
sb.append(SUB_SCRIPT_ASCIDOC_STYLE);
htmlToAsciidoc(sb, childNode, inlineMacroMode);
htmlToAsciidoc(sb, childNode, inlineMacroMode, context);
sb.append(SUB_SCRIPT_ASCIDOC_STYLE);
break;
case SUPER_SCRIPT_NODE:
sb.append(SUPER_SCRIPT_ASCIDOC_STYLE);
htmlToAsciidoc(sb, childNode, inlineMacroMode);
htmlToAsciidoc(sb, childNode, inlineMacroMode, context);
sb.append(SUPER_SCRIPT_ASCIDOC_STYLE);
break;
case DEL_NODE:
case S_NODE:
case STRIKE_NODE:
sb.append(LINE_THROUGH_ASCIDOC_STYLE);
sb.append(HASH);
htmlToAsciidoc(sb, childNode, inlineMacroMode);
htmlToAsciidoc(sb, childNode, inlineMacroMode, context);
sb.append(HASH);
break;
case NEW_LINE_NODE:
Expand All @@ -272,10 +281,58 @@ private static void htmlToAsciidoc(StringBuilder sb, Node node, boolean inlineMa
text = startingSpaceMatcher.replaceFirst("");
}

appendEscapedAsciiDoc(sb, text, inlineMacroMode);
appendEscapedAsciiDoc(sb, text, inlineMacroMode, context);
break;
case TABLE_NODE:
newLine(sb);
newLine(sb);
sb.append(TABLE_MARKER);
newLine(sb);
context.inTable = true;
context.firstTableRow = true;
htmlToAsciidoc(sb, childNode, inlineMacroMode, context);
context.inTable = false;
context.firstTableRow = false;
sb.append(TABLE_MARKER);
newLine(sb);
break;
case THEAD_NODE:
htmlToAsciidoc(sb, childNode, inlineMacroMode, context);
break;
case TBODY_NODE:
htmlToAsciidoc(sb, childNode, inlineMacroMode, context);
break;
case TR_NODE:
trimTrailingWhitespaces(sb);
if (!context.firstTableRow) {
newLine(sb);
}
newLine(sb);
htmlToAsciidoc(sb, childNode, inlineMacroMode, context);
context.firstTableRow = false;
break;
case TH_NODE:
if (!context.firstTableRow) {
sb.append(COLUMN_HEADER_MARKER);
} else {
sb.append(COLUMN_MARKER);
}
htmlToAsciidoc(sb, childNode, inlineMacroMode, context);
trimTrailingWhitespaces(sb);
if (!context.firstTableRow) {
newLine(sb);
}
break;
case TD_NODE:
sb.append(COLUMN_MARKER);
htmlToAsciidoc(sb, childNode, inlineMacroMode, context);
trimTrailingWhitespaces(sb);
if (!context.firstTableRow) {
newLine(sb);
}
break;
default:
htmlToAsciidoc(sb, childNode, inlineMacroMode);
htmlToAsciidoc(sb, childNode, inlineMacroMode, context);
break;
}
}
Expand Down Expand Up @@ -351,6 +408,20 @@ private static StringBuilder trimText(StringBuilder sb, String charsToTrim) {
return sb;
}

private static void trimTrailingWhitespaces(StringBuilder sb) {
int j = -1;
for (int i = sb.length() - 1; i >= 0; i--) {
if (Character.isWhitespace(sb.charAt(i))) {
j = i;
} else {
break;
}
}
if (j >= 0) {
sb.setLength(j);
}
}

private static StringBuilder unescapeHtmlEntities(StringBuilder sb, String text) {
int i = 0;
/* trim leading whitespace */
Expand Down Expand Up @@ -417,7 +488,8 @@ private static StringBuilder unescapeHtmlEntities(StringBuilder sb, String text)
return sb;
}

private static StringBuilder appendEscapedAsciiDoc(StringBuilder sb, String text, boolean inlineMacroMode) {
private static StringBuilder appendEscapedAsciiDoc(StringBuilder sb, String text, boolean inlineMacroMode,
Context context) {
boolean escaping = false;
for (int i = 0; i < text.length(); i++) {
final char ch = text.charAt(i);
Expand Down Expand Up @@ -453,6 +525,16 @@ private static StringBuilder appendEscapedAsciiDoc(StringBuilder sb, String text
}
sb.append("{plus}");
break;
case '!':
if (escaping) {
sb.append("++");
escaping = false;
}
if (context.inTable) {
sb.append('\\');
}
sb.append(ch);
break;
default:
if (escaping) {
sb.append("++");
Expand All @@ -466,4 +548,10 @@ private static StringBuilder appendEscapedAsciiDoc(StringBuilder sb, String text
}
return sb;
}

private static class Context {

boolean inTable;
boolean firstTableRow;
}
}

0 comments on commit 10ddc12

Please sign in to comment.