From 3cee6bd2f0e7884527a40c0ba130f60459caf1cc Mon Sep 17 00:00:00 2001 From: Vincent Gao Date: Thu, 25 Jun 2026 15:14:37 +0200 Subject: [PATCH 1/3] Fix empty-title wikilinks being incorrectly parsed as Wikilink nodes MediaWiki does not treat [[]] or [[|...]] as wikilinks because their title is empty. Calls to filter_wikilinks() on text containing these sequences returned false positives. The fix is in builder.py (pure Python, runs for both the C and Python tokenizers): when a WikilinkClose token is encountered and the resulting title is an empty string, the node is demoted to a plain Text node containing the original bracket sequence instead of being wrapped in a Wikilink. Fixes #292. --- src/mwparserfromhell/parser/builder.py | 10 ++++++++-- tests/test_parser.py | 21 +++++++++++++++++++++ 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/src/mwparserfromhell/parser/builder.py b/src/mwparserfromhell/parser/builder.py index ab2cb9a4..db4aa22b 100644 --- a/src/mwparserfromhell/parser/builder.py +++ b/src/mwparserfromhell/parser/builder.py @@ -162,8 +162,14 @@ def _handle_wikilink(self, token): self._push() elif isinstance(token, tokens.WikilinkClose): if title is not None: - return Wikilink(title, self._pop()) - return Wikilink(self._pop()) + wikilink = Wikilink(title, self._pop()) + else: + wikilink = Wikilink(self._pop()) + # MediaWiki treats [[]] and [[|...]] (empty title) as plain + # text rather than actual wikilinks. + if not str(wikilink.title): + return Text(str(wikilink)) + return wikilink else: self._write(self._handle_token(token)) raise ParserError("_handle_wikilink() missed a close token") diff --git a/tests/test_parser.py b/tests/test_parser.py index 8ec4a40d..b8886e90 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -102,3 +102,24 @@ def test_skip_style_tags(pyparser): without_style = parser.Parser().parse(text, skip_style_tags=True) assert_wikicode_equal(a, with_style) assert_wikicode_equal(b, without_style) + + +@pytest.mark.parametrize( + "text", + [ + "[[]]", + "[[|]]", + "[[|foo]]", + "[[|||]]", + ], +) +def test_empty_title_wikilink_is_text(text): + """[[]] and [[|...]] have an empty title and should not be wikilinks. + + MediaWiki does not render these as hyperlinks; the parser must treat them + as plain text so that filter_wikilinks() does not return false positives. + Regression test for https://github.com/earwig/mwparserfromhell/issues/292. + """ + parsed = parser.Parser().parse(text) + assert parsed.filter_wikilinks() == [] + assert str(parsed) == text From d8f9d4acee530acb63a34697fee6540fce903ba6 Mon Sep 17 00:00:00 2001 From: Vincent Gao Date: Thu, 25 Jun 2026 19:06:11 +0200 Subject: [PATCH 2/3] Preserve empty-title wikilink nodes --- src/mwparserfromhell/parser/builder.py | 10 ++-------- src/mwparserfromhell/wikicode.py | 12 ++++++++---- tests/test_parser.py | 10 ++++++---- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/mwparserfromhell/parser/builder.py b/src/mwparserfromhell/parser/builder.py index db4aa22b..ab2cb9a4 100644 --- a/src/mwparserfromhell/parser/builder.py +++ b/src/mwparserfromhell/parser/builder.py @@ -162,14 +162,8 @@ def _handle_wikilink(self, token): self._push() elif isinstance(token, tokens.WikilinkClose): if title is not None: - wikilink = Wikilink(title, self._pop()) - else: - wikilink = Wikilink(self._pop()) - # MediaWiki treats [[]] and [[|...]] (empty title) as plain - # text rather than actual wikilinks. - if not str(wikilink.title): - return Text(str(wikilink)) - return wikilink + return Wikilink(title, self._pop()) + return Wikilink(self._pop()) else: self._write(self._handle_token(token)) raise ParserError("_handle_wikilink() missed a close token") diff --git a/src/mwparserfromhell/wikicode.py b/src/mwparserfromhell/wikicode.py index 482d9375..7739562c 100644 --- a/src/mwparserfromhell/wikicode.py +++ b/src/mwparserfromhell/wikicode.py @@ -761,8 +761,12 @@ def ifilter_wikilinks( This is equivalent to :meth:`ifilter` with *forcetype* set to :class:`~wikilink.Wikilink`. """ - return self.ifilter( - recursive=recursive, matches=matches, flags=flags, forcetype=Wikilink + return ( + node + for node in self.ifilter( + recursive=recursive, matches=matches, flags=flags, forcetype=Wikilink + ) + if str(node.title) ) @overload @@ -934,8 +938,8 @@ def filter_wikilinks( This is equivalent to :meth:`filter` with *forcetype* set to :class:`~wikilink.Wikilink`. """ - return self.filter( - recursive=recursive, matches=matches, flags=flags, forcetype=Wikilink + return list( + self.ifilter_wikilinks(recursive=recursive, matches=matches, flags=flags) ) def get_sections( diff --git a/tests/test_parser.py b/tests/test_parser.py index b8886e90..c2939b37 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -113,13 +113,15 @@ def test_skip_style_tags(pyparser): "[[|||]]", ], ) -def test_empty_title_wikilink_is_text(text): - """[[]] and [[|...]] have an empty title and should not be wikilinks. +def test_empty_title_wikilink_filter_wikilinks(text): + """[[]] and [[|...]] have an empty title and should not be links. - MediaWiki does not render these as hyperlinks; the parser must treat them - as plain text so that filter_wikilinks() does not return false positives. + MediaWiki does not render these as hyperlinks; filter_wikilinks() should + not return false positives, while the parsed Wikilink node remains + available to tools that want to detect and fix invalid wikilinks. Regression test for https://github.com/earwig/mwparserfromhell/issues/292. """ parsed = parser.Parser().parse(text) assert parsed.filter_wikilinks() == [] + assert isinstance(parsed.get(0), Wikilink) assert str(parsed) == text From 1be77275798705acb20983e4fcd737589405aaea Mon Sep 17 00:00:00 2001 From: Vincent Gao Date: Fri, 26 Jun 2026 11:46:33 +0200 Subject: [PATCH 3/3] Keep wikilink filters semantically aligned --- src/mwparserfromhell/wikicode.py | 18 ++++++++++-------- tests/test_parser.py | 3 +++ 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/src/mwparserfromhell/wikicode.py b/src/mwparserfromhell/wikicode.py index 7739562c..b6c379f4 100644 --- a/src/mwparserfromhell/wikicode.py +++ b/src/mwparserfromhell/wikicode.py @@ -160,6 +160,12 @@ def getter(i: int, node: Node) -> Generator[tuple[int, Node]]: else: inodes = enumerate(self.nodes) for i, node in inodes: + if ( + forcetype is Wikilink + and isinstance(node, Wikilink) + and not str(node.title) + ): + continue if (forcetype is None or isinstance(node, forcetype)) and match( cast(N, node) ): @@ -761,12 +767,8 @@ def ifilter_wikilinks( This is equivalent to :meth:`ifilter` with *forcetype* set to :class:`~wikilink.Wikilink`. """ - return ( - node - for node in self.ifilter( - recursive=recursive, matches=matches, flags=flags, forcetype=Wikilink - ) - if str(node.title) + return self.ifilter( + recursive=recursive, matches=matches, flags=flags, forcetype=Wikilink ) @overload @@ -938,8 +940,8 @@ def filter_wikilinks( This is equivalent to :meth:`filter` with *forcetype* set to :class:`~wikilink.Wikilink`. """ - return list( - self.ifilter_wikilinks(recursive=recursive, matches=matches, flags=flags) + return self.filter( + recursive=recursive, matches=matches, flags=flags, forcetype=Wikilink ) def get_sections( diff --git a/tests/test_parser.py b/tests/test_parser.py index c2939b37..2c921f42 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -123,5 +123,8 @@ def test_empty_title_wikilink_filter_wikilinks(text): """ parsed = parser.Parser().parse(text) assert parsed.filter_wikilinks() == [] + assert parsed.filter(forcetype=Wikilink) == [] + assert list(parsed.ifilter(forcetype=Wikilink)) == [] assert isinstance(parsed.get(0), Wikilink) + assert parsed.get(0) in parsed.filter() assert str(parsed) == text