diff --git a/lib/earmark/restructure.ex b/lib/earmark/restructure.ex new file mode 100644 index 00000000..3587113a --- /dev/null +++ b/lib/earmark/restructure.ex @@ -0,0 +1,83 @@ +defmodule Earmark.Restructure do + + @doc """ + Walks an AST and allows you to process it (storing details in acc) and/or + modify it as it is walked. + + items is the AST you got from EarmarkParser.as_ast() + + acc is the initial value of an accumulator that is passed to both + process_item_fn and process_list_fn and accumulated. If your functions + do not need to use or store any state, you can pass nil. + + The process_item_fn function is required. It takes two parameters, the + single item to process (which will either be a string or a 4-tuple) and + the accumulator, and returns a tuple {processed_item, updated_acc}. + Returning the empty list for processed_item will remove the item processed + the AST. + + The process_list_fn function is optional and defaults to no modification of + items or accumulator. It takes two parameters, the list of items that + are the sub-items of a given element in the AST (or the top-level list of + items), and the accumulator, and returns a tuple + {processed_items_list, updated_acc}. + + This function ends up returning {ast, acc}. + """ + def walk_and_modify_ast(items, acc, process_item_fn, process_list_fn \\ &({&1, &2})) + when is_list(items) and is_function(process_item_fn) and is_function(process_list_fn) + do + {items, acc} = process_list_fn.(items, acc) + {ast, acc} = Enum.map_reduce(items, acc, fn (item, acc) -> + walk_and_modify_ast_item(item, acc, process_item_fn, process_list_fn) + end) + {List.flatten(ast), acc} + end + + defp walk_and_modify_ast_item(item, acc, process_item_fn, process_list_fn) do + case process_item_fn.(item, acc) do + {{type, attribs, items, annotations}, acc} + when is_binary(type) and is_list(attribs) and is_list(items) and is_map(annotations) -> + {items, acc} = walk_and_modify_ast(items, acc, process_item_fn, process_list_fn) + {{type, attribs, List.flatten(items), annotations}, acc} + {item_or_items, acc} when is_binary(item_or_items) or is_list(item_or_items) -> + {item_or_items, acc} + end + end + + @doc """ + Utility for creating a restructuring that parses text by splitting it into + parts "of interest" vs. "other parts" using a regular expression. + Returns a list of parts where the parts matching regex have been processed + by invoking map_captures_fn on each part, and a list of remaining parts, + preserving the order of parts from what it was in the plain text item. + """ + def text_to_ast_list_splitting_regex(item, regex, map_captures_fn) + when is_binary(item) and is_function(map_captures_fn) do + interest_parts = Regex.scan(regex, item) + |> Enum.map(map_captures_fn) + other_parts = Regex.split(regex, item) + # If the match is at the front of 'item', Regex.split will + # return an empty string "before" the split. Therefore + # the interest_parts always has either the same number of + # elements as the other_parts list, or one fewer. + zigzag_lists(other_parts, interest_parts) + end + + @doc """ + Given two lists that are either of equal length, or with the first list + exactly one element longer than the second, returns a list that begins with + the first element from the first list, then the first element from the first + list, and so forth until both lists are empty. + """ + def zigzag_lists(first, second, acc \\ []) + def zigzag_lists([], [], acc) do + Enum.reverse(acc) + end + def zigzag_lists([first|first_rest], second, acc) do + # Note that there will be no match for an empty 'first' list if 'second' is not empty, + # and this for our use case is on purpose - the lists should either be equal in + # length, or the first list as initially passed into the function should be one longer. + zigzag_lists(second, first_rest, [first|acc]) + end +end diff --git a/test/restructure_test.exs b/test/restructure_test.exs new file mode 100644 index 00000000..b67016fc --- /dev/null +++ b/test/restructure_test.exs @@ -0,0 +1,94 @@ +defmodule RestructureTest do + use ExUnit.Case + + alias Earmark.Restructure + + @doc """ + handle_italics is an example of a structure-changing function, that + takes a non-standard markdown where / is used as an italics marker, + parses for that within text items, and transforms a node containing + such markdown into a new structure with an "em" node. + """ + def handle_italics(ast) do + ast + |> Restructure.walk_and_modify_ast("", &handle_italics_impl/2) + |> elem(0) + end + def handle_italics_impl(item, "a"), do: {item, ""} + def handle_italics_impl(item, acc) when is_binary(item) do + new_item = Restructure.text_to_ast_list_splitting_regex( + item, + ~r/\/([[:graph:]].*?[[:graph:]]|[[:graph:]])\//, + fn [_, content] -> + {"em", [], [content], %{}} + end + ) + {new_item, acc} + end + def handle_italics_impl({name, _, _, _} = item, _acc) do + # Store the last seen element name so we can skip handling + # italics within elements. + {item, name} + end + + @doc """ + handle_bold is an example of a mostly-structure-preserving function + that simply changes the element type, again to deal with a non-standard + markdown where a single * is used to indicate "strong" text. + """ + def handle_bold(ast) do + ast + |> Restructure.walk_and_modify_ast(nil, &handle_bold_impl/2) + |> elem(0) + end + def handle_bold_impl({"em", attribs, items, annotations}, acc) do + {{"strong", attribs, items, annotations}, acc} + end + def handle_bold_impl(item, acc), do: {item, acc} + + @doc """ + An example of a structure-modifying function that operates on the + list of items in an AST node, removing any italic ("em") items. + """ + def delete_italicized_text(items, acc) do + { + Enum.flat_map(items, fn item -> + case item do + {"em", _, _, _} -> [] + _ -> [item] + end + end), + acc + } + end + + test "handle_bold_and_italic_from_nonstandard_markdown" do + markdown = "Hello *boldness* my /italic/ friend!" + {:ok, ast, []} = markdown |> EarmarkParser.as_ast() + processed_ast = ast + |> handle_bold() + |> handle_italics() + + assert processed_ast == [ + { + "p", [], + [ + "Hello ", + {"strong", [], ["boldness"], %{}}, + " my ", + {"em", [], ["italic"], %{}}, + " friend!" + ], %{} + } + ] + end + + test "delete_italicized_text" do + markdown = "Hello *there* my *good* friend!" + {:ok, ast, []} = markdown |> EarmarkParser.as_ast() + {processed_ast, :acc_unused} = Restructure.walk_and_modify_ast( + ast, :acc_unused, &({&1, &2}), &delete_italicized_text/2) + assert processed_ast == [{"p", [], ["Hello ", " my ", " friend!"], %{}}] + end +end +# SPDX-License-Identifier: Apache-2.0