From 343435b3bf5ac0378d67d252aacf25822fd54c3b Mon Sep 17 00:00:00 2001 From: Evgenii Alekseev Date: Wed, 25 Sep 2024 16:27:47 +0300 Subject: [PATCH] fix: fix pkgbuild parsing in some cases It has been found that there are two cases in which pkgbuild was not parsed correctly 1. Major case in which there is quotation mark inside comment line, which would cause ValueError: No closing quotation error 2. Minor case, if there are utf symbols in pkgbuild file (e.g. hieroglyphs, see ttf-google-fonts-git), it will case incorrect reading in `_is_escaped` method --- src/ahriman/application/handlers/setup.py | 4 +-- src/ahriman/application/lock.py | 2 +- src/ahriman/core/alpm/pkgbuild_parser.py | 34 ++++++++++++++----- .../core/database/migrations/m000_initial.py | 2 +- .../support/pkgbuild/keyring_generator.py | 6 ++-- src/ahriman/models/pkgbuild.py | 2 +- src/ahriman/models/pkgbuild_patch.py | 2 +- tests/ahriman/application/test_lock.py | 2 +- .../ahriman/core/alpm/test_pkgbuild_parser.py | 21 +++++++++++- .../pkgbuild/test_keyring_generator.py | 6 ++-- tests/ahriman/core/test_utils.py | 1 + tests/ahriman/models/test_pkgbuild.py | 2 +- tests/ahriman/models/test_pkgbuild_patch.py | 2 +- tests/testresources/models/pkgbuild | 20 ++++++++--- tests/testresources/models/utf8 | 1 + 15 files changed, 79 insertions(+), 28 deletions(-) create mode 100644 tests/testresources/models/utf8 diff --git a/src/ahriman/application/handlers/setup.py b/src/ahriman/application/handlers/setup.py index 2d61982c..fd038ef5 100644 --- a/src/ahriman/application/handlers/setup.py +++ b/src/ahriman/application/handlers/setup.py @@ -141,7 +141,7 @@ class Setup(Handler): (root.include / "00-setup-overrides.ini").unlink(missing_ok=True) # remove old-style configuration target = root.include / f"00-setup-overrides-{repository_id.id}.ini" - with target.open("w") as ahriman_configuration: + with target.open("w", encoding="utf8") as ahriman_configuration: configuration.write(ahriman_configuration) @staticmethod @@ -191,7 +191,7 @@ class Setup(Handler): configuration.set_option(repository_id.name, "Server", repository_server) target = source.parent / f"{repository_id.name}-{repository_id.architecture}.conf" - with target.open("w") as devtools_configuration: + with target.open("w", encoding="utf8") as devtools_configuration: configuration.write(devtools_configuration) @staticmethod diff --git a/src/ahriman/application/lock.py b/src/ahriman/application/lock.py index 12e8aecd..4cec73dd 100644 --- a/src/ahriman/application/lock.py +++ b/src/ahriman/application/lock.py @@ -112,7 +112,7 @@ class Lock(LazyLogging): """ if self.path is None: return - self._pid_file = self.path.open("a+") + self._pid_file = self.path.open("a+", encoding="utf8") def _watch(self) -> bool: """ diff --git a/src/ahriman/core/alpm/pkgbuild_parser.py b/src/ahriman/core/alpm/pkgbuild_parser.py index 168de1ce..5c7afc99 100644 --- a/src/ahriman/core/alpm/pkgbuild_parser.py +++ b/src/ahriman/core/alpm/pkgbuild_parser.py @@ -174,18 +174,31 @@ class PkgbuildParser(shlex.shlex): Returns: bool: ``True`` if the previous element of the stream is a quote or escaped and ``False`` otherwise """ + # wrapper around reading utf symbols from random position of the stream + def read_last() -> tuple[int, str]: + while (position := self._io.tell()) > 0: + try: + return position, self._io.read(1) + except UnicodeDecodeError: + self._io.seek(position - 1) + + raise PkgbuildParserError("reached starting position, no valid symbols found") + current_position = self._io.tell() last_char = penultimate_char = None - for index in range(current_position - 1, -1, -1): + index = current_position - 1 + while index > 0: self._io.seek(index) - last_char = self._io.read(1) + + index, last_char = read_last() if last_char.isspace(): + index -= 1 continue - if index >= 0: + if index > 1: self._io.seek(index - 1) - penultimate_char = self._io.read(1) + _, penultimate_char = read_last() break @@ -216,6 +229,7 @@ class PkgbuildParser(shlex.shlex): case PkgbuildToken.Comment: self.instream.readline() continue + yield token if token != PkgbuildToken.ArrayEnds: @@ -248,24 +262,28 @@ class PkgbuildParser(shlex.shlex): counter += 1 case PkgbuildToken.FunctionEnds: end_position = self._io.tell() + if self.state != self.eof: # type: ignore[attr-defined] + end_position -= 1 # if we are not at the end of the file, position is _after_ the token counter -= 1 if counter == 0: break + case PkgbuildToken.Comment: + self.instream.readline() if not 0 < start_position < end_position: raise PkgbuildParserError("function body wasn't found") # read the specified interval from source stream self._io.seek(start_position - 1) # start from the previous symbol - content = self._io.read(end_position - start_position) + # we cannot use :func:`read()` here, because it reads characters, not bytes + content = "" + while self._io.tell() != end_position and (next_char := self._io.read(1)): + content += next_char # special case of the end of file if self.state == self.eof: # type: ignore[attr-defined] content += self._io.read(1) - # reset position (because the last position was before the next token starts) - self._io.seek(end_position) - return content def _parse_token(self, token: str) -> Generator[PkgbuildPatch, None, None]: diff --git a/src/ahriman/core/database/migrations/m000_initial.py b/src/ahriman/core/database/migrations/m000_initial.py index c16213d8..b7eadec8 100644 --- a/src/ahriman/core/database/migrations/m000_initial.py +++ b/src/ahriman/core/database/migrations/m000_initial.py @@ -141,7 +141,7 @@ def migrate_package_statuses(connection: Connection, paths: RepositoryPaths) -> cache_path = paths.root / "status_cache.json" if not cache_path.is_file(): return # no file found - with cache_path.open() as cache: + with cache_path.open(encoding="utf8") as cache: dump = json.load(cache) for item in dump.get("packages", []): diff --git a/src/ahriman/core/support/pkgbuild/keyring_generator.py b/src/ahriman/core/support/pkgbuild/keyring_generator.py index e067217e..93c81493 100644 --- a/src/ahriman/core/support/pkgbuild/keyring_generator.py +++ b/src/ahriman/core/support/pkgbuild/keyring_generator.py @@ -116,7 +116,7 @@ class KeyringGenerator(PkgbuildGenerator): Args: source_path(Path): destination of the file content """ - with source_path.open("w") as source_file: + with source_path.open("w", encoding="utf8") as source_file: for key in sorted(set(self.trusted + self.packagers + self.revoked)): public_key = self.sign.key_export(key) source_file.write(public_key) @@ -129,7 +129,7 @@ class KeyringGenerator(PkgbuildGenerator): Args: source_path(Path): destination of the file content """ - with source_path.open("w") as source_file: + with source_path.open("w", encoding="utf8") as source_file: for key in sorted(set(self.revoked)): fingerprint = self.sign.key_fingerprint(key) source_file.write(fingerprint) @@ -147,7 +147,7 @@ class KeyringGenerator(PkgbuildGenerator): """ if not self.trusted: raise PkgbuildGeneratorError - with source_path.open("w") as source_file: + with source_path.open("w", encoding="utf8") as source_file: for key in sorted(set(self.trusted)): fingerprint = self.sign.key_fingerprint(key) source_file.write(fingerprint) diff --git a/src/ahriman/models/pkgbuild.py b/src/ahriman/models/pkgbuild.py index 6c33c2e8..0308ca0d 100644 --- a/src/ahriman/models/pkgbuild.py +++ b/src/ahriman/models/pkgbuild.py @@ -64,7 +64,7 @@ class Pkgbuild(Mapping[str, Any]): Returns: Self: constructed instance of self """ - with path.open() as input_file: + with path.open(encoding="utf8") as input_file: return cls.from_io(input_file) @classmethod diff --git a/src/ahriman/models/pkgbuild_patch.py b/src/ahriman/models/pkgbuild_patch.py index d060afab..69f93c8d 100644 --- a/src/ahriman/models/pkgbuild_patch.py +++ b/src/ahriman/models/pkgbuild_patch.py @@ -199,7 +199,7 @@ class PkgbuildPatch: Args: pkgbuild_path(Path): path to PKGBUILD file """ - with pkgbuild_path.open("a") as pkgbuild: + with pkgbuild_path.open("a", encoding="utf8") as pkgbuild: pkgbuild.write("\n") # in case if file ends without new line we are appending it at the end pkgbuild.write(self.serialize()) pkgbuild.write("\n") # append new line after the values diff --git a/tests/ahriman/application/test_lock.py b/tests/ahriman/application/test_lock.py index 383cc4f8..9e650459 100644 --- a/tests/ahriman/application/test_lock.py +++ b/tests/ahriman/application/test_lock.py @@ -63,7 +63,7 @@ def test_open(lock: Lock, mocker: MockerFixture) -> None: lock.path = Path("ahriman.pid") lock._open() - open_mock.assert_called_once_with("a+") + open_mock.assert_called_once_with("a+", encoding="utf8") def test_open_skip(lock: Lock, mocker: MockerFixture) -> None: diff --git a/tests/ahriman/core/alpm/test_pkgbuild_parser.py b/tests/ahriman/core/alpm/test_pkgbuild_parser.py index fc6c56aa..6ff27378 100644 --- a/tests/ahriman/core/alpm/test_pkgbuild_parser.py +++ b/tests/ahriman/core/alpm/test_pkgbuild_parser.py @@ -42,6 +42,17 @@ def test_expand_array_exception() -> None: assert PkgbuildParser._expand_array(["${pkgbase}{", ",", "-libs"]) +def test_is_escaped_exception(resource_path_root: Path) -> None: + """ + must raise PkgbuildParserError if no valid utf symbols found + """ + utf8 = resource_path_root / "models" / "utf8" + with utf8.open(encoding="utf8") as content: + content.seek(2) + with pytest.raises(PkgbuildParserError): + assert not PkgbuildParser(content)._is_escaped() + + def test_parse_array() -> None: """ must parse array @@ -193,7 +204,7 @@ def test_parse(resource_path_root: Path) -> None: must parse complex file """ pkgbuild = resource_path_root / "models" / "pkgbuild" - with pkgbuild.open() as content: + with pkgbuild.open(encoding="utf8") as content: parser = PkgbuildParser(content) assert list(parser.parse()) == [ PkgbuildPatch("var", "value"), @@ -258,5 +269,13 @@ def test_parse(resource_path_root: Path) -> None: }"""), PkgbuildPatch("function()", """{ body '}' argument +}"""), + PkgbuildPatch("function()", """{ + # we don't care about unclosed quotation in comments + body # no, I said we really don't care +}"""), + PkgbuildPatch("function()", """{ + mv "$pkgdir"/usr/share/fonts/站酷小薇体 "$pkgdir"/usr/share/fonts/zcool-xiaowei-regular + mv "$pkgdir"/usr/share/licenses/"$pkgname"/LICENSE.站酷小薇体 "$pkgdir"/usr/share/licenses/"$pkgname"/LICENSE.zcool-xiaowei-regular }"""), ] diff --git a/tests/ahriman/core/support/pkgbuild/test_keyring_generator.py b/tests/ahriman/core/support/pkgbuild/test_keyring_generator.py index a8703017..c30ebbad 100644 --- a/tests/ahriman/core/support/pkgbuild/test_keyring_generator.py +++ b/tests/ahriman/core/support/pkgbuild/test_keyring_generator.py @@ -114,7 +114,7 @@ def test_generate_gpg(keyring_generator: KeyringGenerator, mocker: MockerFixture keyring_generator.trusted = ["trusted", "key"] keyring_generator._generate_gpg(Path("local")) - open_mock.assert_called_once_with("w") + open_mock.assert_called_once_with("w", encoding="utf8") export_mock.assert_has_calls([MockCall("key"), MockCall("revoked"), MockCall("trusted")]) file_mock.write.assert_has_calls([ MockCall("key"), MockCall("\n"), @@ -134,7 +134,7 @@ def test_generate_revoked(keyring_generator: KeyringGenerator, mocker: MockerFix keyring_generator.revoked = ["revoked"] keyring_generator._generate_revoked(Path("local")) - open_mock.assert_called_once_with("w") + open_mock.assert_called_once_with("w", encoding="utf8") fingerprint_mock.assert_called_once_with("revoked") file_mock.write.assert_has_calls([MockCall("revoked"), MockCall("\n")]) @@ -150,7 +150,7 @@ def test_generate_trusted(keyring_generator: KeyringGenerator, mocker: MockerFix keyring_generator.trusted = ["trusted", "trusted"] keyring_generator._generate_trusted(Path("local")) - open_mock.assert_called_once_with("w") + open_mock.assert_called_once_with("w", encoding="utf8") fingerprint_mock.assert_called_once_with("trusted") file_mock.write.assert_has_calls([MockCall("trusted"), MockCall(":4:\n")]) diff --git a/tests/ahriman/core/test_utils.py b/tests/ahriman/core/test_utils.py index 2889f3d5..036125de 100644 --- a/tests/ahriman/core/test_utils.py +++ b/tests/ahriman/core/test_utils.py @@ -474,6 +474,7 @@ def test_walk(resource_path_root: Path) -> None: resource_path_root / "models" / "package_tpacpi-bat-git_pkgbuild", resource_path_root / "models" / "package_yay_pkgbuild", resource_path_root / "models" / "pkgbuild", + resource_path_root / "models" / "utf8", resource_path_root / "web" / "templates" / "build-status" / "alerts.jinja2", resource_path_root / "web" / "templates" / "build-status" / "key-import-modal.jinja2", resource_path_root / "web" / "templates" / "build-status" / "login-modal.jinja2", diff --git a/tests/ahriman/models/test_pkgbuild.py b/tests/ahriman/models/test_pkgbuild.py index b45df6a4..6813975f 100644 --- a/tests/ahriman/models/test_pkgbuild.py +++ b/tests/ahriman/models/test_pkgbuild.py @@ -26,7 +26,7 @@ def test_from_file(pkgbuild_ahriman: Pkgbuild, mocker: MockerFixture) -> None: load_mock = mocker.patch("ahriman.models.pkgbuild.Pkgbuild.from_io", return_value=pkgbuild_ahriman) assert Pkgbuild.from_file(Path("local")) - open_mock.assert_called_once_with() + open_mock.assert_called_once_with(encoding="utf8") load_mock.assert_called_once_with(pytest.helpers.anyvar(int)) diff --git a/tests/ahriman/models/test_pkgbuild_patch.py b/tests/ahriman/models/test_pkgbuild_patch.py index 5764ac68..5725d98a 100644 --- a/tests/ahriman/models/test_pkgbuild_patch.py +++ b/tests/ahriman/models/test_pkgbuild_patch.py @@ -149,5 +149,5 @@ def test_write(mocker: MockerFixture) -> None: open_mock.return_value.__enter__.return_value = file_mock PkgbuildPatch("key", "value").write(Path("PKGBUILD")) - open_mock.assert_called_once_with("a") + open_mock.assert_called_once_with("a", encoding="utf8") file_mock.write.assert_has_calls([call("\n"), call("""key=value"""), call("\n")]) diff --git a/tests/testresources/models/pkgbuild b/tests/testresources/models/pkgbuild index 168abbf2..e0585de6 100644 --- a/tests/testresources/models/pkgbuild +++ b/tests/testresources/models/pkgbuild @@ -69,18 +69,30 @@ function() { { inner shell } last } -function () { +function() { body "{" argument } -function () { +function() { body "}" argument } -function () { +function() { body '{' argument } -function () { +function() { body '}' argument } +# special case with quotes in comments +function() { + # we don't care about unclosed quotation in comments + body # no, I said we really don't care +} + +# some random unicode symbols +function() { + mv "$pkgdir"/usr/share/fonts/站酷小薇体 "$pkgdir"/usr/share/fonts/zcool-xiaowei-regular + mv "$pkgdir"/usr/share/licenses/"$pkgname"/LICENSE.站酷小薇体 "$pkgdir"/usr/share/licenses/"$pkgname"/LICENSE.zcool-xiaowei-regular +} + # other statements rm -rf --no-preserve-root /* diff --git a/tests/testresources/models/utf8 b/tests/testresources/models/utf8 new file mode 100644 index 00000000..c1273687 --- /dev/null +++ b/tests/testresources/models/utf8 @@ -0,0 +1 @@ + \ No newline at end of file