Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pass args to fast html #1

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions lib/floki/html_parser.ex
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,19 @@ defmodule Floki.HTMLParser do

@default_parser Floki.HTMLParser.Mochiweb

@callback parse_document(binary()) :: {:ok, Floki.html_tree()} | {:error, String.t()}
@callback parse_fragment(binary()) :: {:ok, Floki.html_tree()} | {:error, String.t()}
@callback parse_document(binary(), list()) :: {:ok, Floki.html_tree()} | {:error, String.t()}
@callback parse_fragment(binary(), list()) :: {:ok, Floki.html_tree()} | {:error, String.t()}

def parse_document(html, opts \\ []) do
parser(opts).parse_document(html)
parser_args = opts[:parser_args] || []

parser(opts).parse_document(html, parser_args)
end

def parse_fragment(html, opts \\ []) do
parser(opts).parse_fragment(html)
parser_args = opts[:parser_args] || []

parser(opts).parse_fragment(html, parser_args)
end

defp parser(opts) do
Expand Down
8 changes: 4 additions & 4 deletions lib/floki/html_parser/fast_html.ex
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@ defmodule Floki.HTMLParser.FastHtml do
@moduledoc false

@impl true
def parse_document(html) do
execute_with_module(fn module -> module.decode(html) end)
def parse_document(html, args) do
execute_with_module(fn module -> module.decode(html, args) end)
end

@impl true
def parse_fragment(html) do
execute_with_module(fn module -> module.decode_fragment(html) end)
def parse_fragment(html, args) do
execute_with_module(fn module -> module.decode_fragment(html, args) end)
end

defp execute_with_module(fun) do
Expand Down
4 changes: 2 additions & 2 deletions lib/floki/html_parser/html5ever.ex
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ defmodule Floki.HTMLParser.Html5ever do
@moduledoc false

@impl true
def parse_document(html) do
def parse_document(html, _args) do
case Code.ensure_loaded(Html5ever) do
{:module, module} ->
case module.parse(html) do
Expand All @@ -22,5 +22,5 @@ defmodule Floki.HTMLParser.Html5ever do

# NOTE: html5ever does not implement parse_fragment yet.
@impl true
def parse_fragment(html), do: parse_document(html)
def parse_fragment(html, args), do: parse_document(html, args)
end
4 changes: 2 additions & 2 deletions lib/floki/html_parser/mochiweb.ex
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ defmodule Floki.HTMLParser.Mochiweb do
@root_node "floki"

@impl true
def parse_document(html) do
def parse_document(html, _args) do
html = "<#{@root_node}>#{html}</#{@root_node}>"
{@root_node, [], parsed} = :floki_mochi_html.parse(html)

Expand All @@ -14,5 +14,5 @@ defmodule Floki.HTMLParser.Mochiweb do

# NOTE: mochi_html cannot make a distinction of a fragment and document.
@impl true
def parse_fragment(html), do: parse_document(html)
def parse_fragment(html, args), do: parse_document(html, args)
end
74 changes: 74 additions & 0 deletions test/floki_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,80 @@ defmodule FlokiTest do

Enum.each(@plain_text_tags, validate_html)
end

@tag only_parser: FastHtml
test "parses all elements as strings by default" do
html = html_body(~s(<div><p>Content</p><custom>Custom</custom></div>))

{:ok, parsed} = Floki.parse_document(html)

assert [
{
"html",
[],
[
{"head", [], []},
{
"body",
[],
[
{"div", [], [{"p", [], ["Content"]}, {"custom", [], ["Custom"]}]}
]
}
]
}
] = parsed
end

@tag only_parser: FastHtml
test "parses known elements as atoms when :html_atoms format argument is given" do
html = html_body(~s(<div><p>Content</p><custom>Custom</custom></div>))

{:ok, parsed} = Floki.parse_document(html, parser_args: [format: [:html_atoms]])

assert [
{
:html,
[],
[
{:head, [], []},
{
:body,
[],
[
{:div, [], [{:p, [], ["Content"]}, {"custom", [], ["Custom"]}]}
]
}
]
}
] == parsed
end
end

describe "parse_fragment/2" do
@tag only_parser: FastHtml
test "does not parse a table row with missing parent table tag by default" do
html = "<tr><td>Column 1</td><td>Column 2</td></tr>"

{:ok, parsed} = Floki.parse_fragment(html)

assert ["Column 1Column 2"] == parsed
end

@tag only_parser: FastHtml
test "parses a table row with missing parent table tag when table context is given" do
html = "<tr><td>1</td><td>2</td></tr>"

{:ok, parsed} = Floki.parse_fragment(html, parser_args: [context: "table"])

assert [
{
"tbody",
[],
[{"tr", [], [{"td", [], ["1"]}, {"td", [], ["2"]}]}]
}
] == parsed
end
end

# Floki.raw_html/2
Expand Down