From 59162ba45f3ad9de00854037b46c5923e0f7ca59 Mon Sep 17 00:00:00 2001 From: Evgenii Alekseev Date: Sat, 21 Sep 2024 00:50:39 +0300 Subject: [PATCH] support escaped arrays and functions --- src/ahriman/core/alpm/pkgbuild_parser.py | 25 +++++++++++++------ .../ahriman/core/alpm/test_pkgbuild_parser.py | 10 ++++++-- 2 files changed, 25 insertions(+), 10 deletions(-) diff --git a/src/ahriman/core/alpm/pkgbuild_parser.py b/src/ahriman/core/alpm/pkgbuild_parser.py index dacb86b4..168de1ce 100644 --- a/src/ahriman/core/alpm/pkgbuild_parser.py +++ b/src/ahriman/core/alpm/pkgbuild_parser.py @@ -165,26 +165,35 @@ class PkgbuildParser(shlex.shlex): return result - def _is_quoted(self) -> bool: + def _is_escaped(self) -> bool: """ check if the last element was quoted. ``shlex.shlex`` parser doesn't provide information about was the token quoted or not, thus there is no difference between "'#'" (diez in quotes) and "#" (diez without quotes). This method simply rolls back to the last non-space character and check if it is a quotation mark Returns: - bool: ``True`` if the previous element of the stream is a quote and ``False`` otherwise + bool: ``True`` if the previous element of the stream is a quote or escaped and ``False`` otherwise """ current_position = self._io.tell() - last_char = None + last_char = penultimate_char = None for index in range(current_position - 1, -1, -1): self._io.seek(index) last_char = self._io.read(1) - if not last_char.isspace(): - break + if last_char.isspace(): + continue + + if index >= 0: + self._io.seek(index - 1) + penultimate_char = self._io.read(1) + + break self._io.seek(current_position) # reset position of the stream - return last_char is not None and last_char in self.quotes + is_quoted = last_char is not None and last_char in self.quotes + is_escaped = penultimate_char is not None and penultimate_char in self.escape + + return is_quoted or is_escaped def _parse_array(self) -> list[str]: """ @@ -200,7 +209,7 @@ class PkgbuildParser(shlex.shlex): def extract() -> Generator[str, None, None]: while token := self.get_token(): match token: - case _ if self._is_quoted(): + case _ if self._is_escaped(): pass case PkgbuildToken.ArrayEnds: break @@ -231,7 +240,7 @@ class PkgbuildParser(shlex.shlex): counter = 0 # simple processing of the inner "{" and "}" for token in self: match token: - case _ if self._is_quoted(): + case _ if self._is_escaped(): continue case PkgbuildToken.FunctionStarts: if counter == 0: diff --git a/tests/ahriman/core/alpm/test_pkgbuild_parser.py b/tests/ahriman/core/alpm/test_pkgbuild_parser.py index 5ac1b2e0..fc6c56aa 100644 --- a/tests/ahriman/core/alpm/test_pkgbuild_parser.py +++ b/tests/ahriman/core/alpm/test_pkgbuild_parser.py @@ -68,7 +68,7 @@ def test_parse_array_comment() -> None: ])] -def test_parse_array_quotes() -> None: +def test_parse_array_escaped() -> None: """ must correctly process quoted brackets """ @@ -81,6 +81,9 @@ def test_parse_array_quotes() -> None: parser = PkgbuildParser(StringIO("""var=(first ')' second)""")) assert list(parser.parse()) == [PkgbuildPatch("var", ["first", ")", "second"])] + parser = PkgbuildParser(StringIO("""var=(first \\) second)""")) + assert list(parser.parse()) == [PkgbuildPatch("var", ["first", ")", "second"])] + def test_parse_array_exception() -> None: """ @@ -123,7 +126,7 @@ def test_parse_function_inner_shell() -> None: assert list(parser.parse()) == [PkgbuildPatch("var()", "{ { echo hello world } }")] -def test_parse_function_quotes() -> None: +def test_parse_function_escaped() -> None: """ must parse function with bracket in quotes """ @@ -142,6 +145,9 @@ def test_parse_function_quotes() -> None: parser = PkgbuildParser(StringIO("""var ( ) { echo hello world '}' } """)) assert list(parser.parse()) == [PkgbuildPatch("var()", """{ echo hello world '}' }""")] + parser = PkgbuildParser(StringIO("""var ( ) { echo hello world \\} } """)) + assert list(parser.parse()) == [PkgbuildPatch("var()", """{ echo hello world \\} }""")] + def test_parse_function_exception() -> None: """