Skip to content

Commit

Permalink
Fix shortcode/continue-reading parsing with inline HTML
Browse files Browse the repository at this point in the history
  • Loading branch information
clarfonthey committed Jul 21, 2024
1 parent c666ee1 commit 5a81666
Show file tree
Hide file tree
Showing 7 changed files with 126 additions and 30 deletions.
9 changes: 1 addition & 8 deletions components/content/src/page.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,6 @@ static RFC3339_DATE: Lazy<Regex> = Lazy::new(|| {
).unwrap()
});

static FOOTNOTES_RE: Lazy<Regex> = Lazy::new(|| {
Regex::new(r#"<sup class="footnote-reference"><a href=\s*.*?>\s*.*?</a></sup>"#).unwrap()
});

#[derive(Clone, Debug, Default, PartialEq, Eq)]
pub struct Page {
/// All info about the actual file
Expand Down Expand Up @@ -232,10 +228,7 @@ impl Page {
let res = render_content(&self.raw_content, &context)
.with_context(|| format!("Failed to render content of {}", self.file.path.display()))?;

self.summary = res
.summary_len
.map(|l| &res.body[0..l])
.map(|s| FOOTNOTES_RE.replace_all(s, "").into_owned());
self.summary = res.summary;
self.content = res.body;
self.toc = res.toc;
self.external_links = res.external_links;
Expand Down
83 changes: 65 additions & 18 deletions components/markdown/src/markdown.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ use crate::codeblock::{CodeBlock, FenceSettings};
use crate::shortcode::{Shortcode, SHORTCODE_PLACEHOLDER};

const CONTINUE_READING: &str = "<span id=\"continue-reading\"></span>";
const SUMMARY_CUTOFF: &str = "<span class=\"summary-cutoff\"></span>";
const ANCHOR_LINK_TEMPLATE: &str = "anchor-link.html";
static EMOJI_REPLACER: Lazy<EmojiReplacer> = Lazy::new(EmojiReplacer::new);

Expand All @@ -36,6 +37,10 @@ static MORE_DIVIDER_RE: Lazy<Regex> = Lazy::new(|| {
.unwrap()
});

static FOOTNOTES_RE: Lazy<Regex> = Lazy::new(|| {
Regex::new(r#"<sup class="footnote-reference"><a href=\s*.*?>\s*.*?</a></sup>"#).unwrap()
});

/// Although there exists [a list of registered URI schemes][uri-schemes], a link may use arbitrary,
/// private schemes. This regex checks if the given string starts with something that just looks
/// like a scheme, i.e., a case-insensitive identifier followed by a colon.
Expand Down Expand Up @@ -78,7 +83,7 @@ fn is_colocated_asset_link(link: &str) -> bool {
#[derive(Debug)]
pub struct Rendered {
pub body: String,
pub summary_len: Option<usize>,
pub summary: Option<String>,
pub toc: Vec<Heading>,
/// Links to site-local pages: relative path plus optional anchor target.
pub internal_links: Vec<(String, Option<String>)>,
Expand Down Expand Up @@ -405,6 +410,7 @@ pub fn markdown_to_html(
.map(|x| x.as_object().unwrap().get("relative_path").unwrap().as_str().unwrap());
// the rendered html
let mut html = String::with_capacity(content.len());
let mut summary = None;
// Set while parsing
let mut error = None;

Expand Down Expand Up @@ -679,17 +685,15 @@ pub fn markdown_to_html(
event
});
}
Event::Html(text) => {
if !has_summary && MORE_DIVIDER_RE.is_match(&text) {
has_summary = true;
events.push(Event::Html(CONTINUE_READING.into()));
continue;
}
if !contains_shortcode(text.as_ref()) {
events.push(Event::Html(text));
continue;
}

Event::Html(text) | Event::InlineHtml(text)
if !has_summary && MORE_DIVIDER_RE.is_match(text.as_ref()) =>
{
has_summary = true;
events.push(Event::Html(CONTINUE_READING.into()));
}
Event::Html(text) | Event::InlineHtml(text)
if contains_shortcode(text.as_ref()) =>
{
render_shortcodes!(false, text, range);
}
_ => events.push(event),
Expand Down Expand Up @@ -781,14 +785,57 @@ pub fn markdown_to_html(
convert_footnotes_to_github_style(&mut events);
}

cmark::html::push_html(&mut html, events.into_iter());
let continue_reading = events
.iter()
.position(|e| matches!(e, Event::Html(CowStr::Borrowed(CONTINUE_READING))))
.unwrap_or(events.len());

// determine closing tags missing from summary
let mut tags = Vec::new();
for event in &events[..continue_reading] {
match event {
Event::Start(Tag::HtmlBlock) | Event::End(TagEnd::HtmlBlock) => (),
Event::Start(tag) => tags.push(tag.to_end()),
Event::End(tag) => {
tags.truncate(tags.iter().rposition(|t| *t == *tag).unwrap_or(0));
}
_ => (),
}
}

let mut events = events.into_iter();

// emit everything up to summary
cmark::html::push_html(&mut html, events.by_ref().take(continue_reading));

if has_summary {
// remove footnotes
let mut summary_html = FOOTNOTES_RE.replace_all(&html, "").into_owned();

// truncate trailing whitespace
summary_html.truncate(summary_html.trim_end().len());

// add cutoff placeholder
if !tags.is_empty() {
dbg!(&tags);
summary_html.push_str(SUMMARY_CUTOFF);
}

// close remaining tags
cmark::html::push_html(&mut summary_html, tags.into_iter().rev().map(Event::End));

summary = Some(summary_html)
}

// emit everything after summary
cmark::html::push_html(&mut html, events);
}

if let Some(e) = error {
Err(e)
} else {
Ok(Rendered {
summary_len: if has_summary { html.find(CONTINUE_READING) } else { None },
summary,
body: html,
toc: make_table_of_contents(headings),
internal_links,
Expand Down Expand Up @@ -861,10 +908,10 @@ mod tests {
for more in mores {
let content = format!("{top}\n\n{more}\n\n{bottom}");
let rendered = markdown_to_html(&content, &context, vec![]).unwrap();
assert!(rendered.summary_len.is_some(), "no summary when splitting on {more}");
let summary_len = rendered.summary_len.unwrap();
let summary = &rendered.body[..summary_len].trim();
let body = &rendered.body[summary_len..].trim();
assert!(rendered.summary.is_some(), "no summary when splitting on {more}");
let summary = rendered.summary.unwrap();
let summary = summary.trim();
let body = rendered.body[summary.len()..].trim();
let continue_reading = &body[..CONTINUE_READING.len()];
let body = &body[CONTINUE_READING.len()..].trim();
assert_eq!(summary, &top_rendered);
Expand Down
12 changes: 12 additions & 0 deletions components/markdown/tests/shortcodes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -311,3 +311,15 @@ fn can_use_shortcodes_in_quotes() {
.body;
insta::assert_snapshot!(body);
}

#[test]
fn can_render_with_inline_html() {
let body = common::render(
r#"
Here is <span>{{ ex1(page="") }}</span> example.
"#,
)
.unwrap()
.body;
insta::assert_snapshot!(body);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
source: components/markdown/tests/shortcodes.rs
expression: body
---
<p>Here is <span>1</span> example.</p>
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
source: components/markdown/tests/summary.rs
expression: body
---
<p>Hello world.</p>
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
---
source: components/markdown/tests/summary.rs
expression: body
---
<p>Things to do:</p>
<ul>
<li>Program<span class="summary-cutoff"></span></li>
</ul>
34 changes: 30 additions & 4 deletions components/markdown/tests/summary.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
mod common;

fn get_summary(content: &str) -> String {
let rendered = common::render(content).unwrap();
assert!(rendered.summary_len.is_some());
let summary_len = rendered.summary_len.unwrap();
rendered.body[..summary_len].to_owned()
common::render(content).expect("couldn't render").summary.expect("had no summary")
}

#[test]
Expand Down Expand Up @@ -45,3 +42,32 @@ And some content after
);
insta::assert_snapshot!(body);
}

#[test]
fn truncated_summary() {
let body = get_summary(
r#"
Things to do:
* Program <!-- more --> something
* Eat
* Sleep
"#,
);
insta::assert_snapshot!(body);
}

#[test]
fn foontnotes_summary() {
let body = get_summary(
r#"
Hello world[^1].
<!-- more -->
Good bye.
[^1]: "World" is a placeholder.
"#,
);
insta::assert_snapshot!(body);
}

0 comments on commit 5a81666

Please sign in to comment.