Skip to content

Commit

Permalink
Fix: headers with tag
Browse files Browse the repository at this point in the history
  • Loading branch information
AlexanderDokuchaev committed Jun 30, 2024
1 parent 89e9e58 commit 4c6de7a
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 5 deletions.
11 changes: 6 additions & 5 deletions md_dead_link_check/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,9 @@

RE_HEADER = r"^(?:\s*[-+*]\s+|)[#]{1,6}\s*(.*)"
RE_LINK = r"([!]{0,1})\[([^\]!]*)\]\(([^()\s]+(?:\([^()\s]*\))*)\s*(.*?)\)"
RE_HTML_A_TAG_ID = r"<\w+\s+(?:[^>]*?\s+)?(?:id|name)=([\"'])(.*?)\1"
RE_HTML_A_TAG_HREF = r"<\w+\s+(?:[^>]*?\s+)?href=([\"'])(.*?)\1"
RE_HTML_TAG = r"</?\w+[^>]*>"
RE_HTML_TAG_ID = r"<\w+\s+(?:[^>]*?\s+)?(?:id|name)=([\"'])(.*?)\1"
RE_HTML_TAG_HREF = r"<\w+\s+(?:[^>]*?\s+)?href=([\"'])(.*?)\1"
RE_SUB = r"[$`][^`]+?[$`]"


Expand Down Expand Up @@ -57,6 +58,7 @@ def process_header_to_fragment(header: str) -> str:
"""

fragment = header.strip()
fragment = re.sub(RE_HTML_TAG, "", fragment)
while True:
res = re.search(RE_LINK, fragment)
if not res:
Expand Down Expand Up @@ -103,7 +105,6 @@ def process_md_file(path: Path, root_dir: Path) -> MarkdownInfo:
repeat += 1
fragment = f"{_fragment}-{repeat}"
fragments.append(fragment)
continue

# Skip $ and ` tags
line = re.sub(RE_SUB, "", line)
Expand All @@ -121,12 +122,12 @@ def process_md_file(path: Path, root_dir: Path) -> MarkdownInfo:
links.append(LinkInfo(link, path, line_num))

# Detect id under a tag <a id="introduction"></a>
matches = re.findall(RE_HTML_A_TAG_ID, line)
matches = re.findall(RE_HTML_TAG_ID, line)
for _, id in matches:
fragments.append(id.lower())

# Detect links under a tag <a href="introduction"></a>
matches = re.findall(RE_HTML_A_TAG_HREF, line)
matches = re.findall(RE_HTML_TAG_HREF, line)
for _, link in matches:
links.append(LinkInfo(link, path, line_num))
return MarkdownInfo(path=path, fragments=fragments, links=links)
Expand Down
4 changes: 4 additions & 0 deletions tests/test_md_files/a.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,7 @@ Some text
[0](#badge)
[1](#badge-1)
[1](#badge-2)

#### Some [link](b.md) [link2](b.md)

#### Some tag<a id="id"><a id="id2"></a> asd <a id="id3"></a>
16 changes: 16 additions & 0 deletions tests/test_preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ def test_find_all_markdowns_in_repo():
("🙀 header with icon", "-header-with-icon"),
("דוגמא", "דוגמא"),
("例子", "例子"),
("text (br)", "text"),
),
)
def test_process_header_to_fragment(header, fragment):
Expand All @@ -59,6 +60,11 @@ def test_process_md_file():
"badge",
"badge-1",
"badge-2",
"some-link-link2",
"some-tag-asd-",
"id",
"id2",
"id3",
]

ref_links = [
Expand Down Expand Up @@ -127,5 +133,15 @@ def test_process_md_file():
location=Path("tests/test_md_files/a.md"),
line_num=51,
),
LinkInfo(
link="b.md",
location=Path("tests/test_md_files/a.md"),
line_num=53,
),
LinkInfo(
link="b.md",
location=Path("tests/test_md_files/a.md"),
line_num=53,
),
]
assert md_info.links == ref_links

0 comments on commit 4c6de7a

Please sign in to comment.