diff --git a/src/ahriman/application/handlers/setup.py b/src/ahriman/application/handlers/setup.py index 2d61982c..fd038ef5 100644 --- a/src/ahriman/application/handlers/setup.py +++ b/src/ahriman/application/handlers/setup.py @@ -141,7 +141,7 @@ class Setup(Handler): (root.include / "00-setup-overrides.ini").unlink(missing_ok=True) # remove old-style configuration target = root.include / f"00-setup-overrides-{repository_id.id}.ini" - with target.open("w") as ahriman_configuration: + with target.open("w", encoding="utf8") as ahriman_configuration: configuration.write(ahriman_configuration) @staticmethod @@ -191,7 +191,7 @@ class Setup(Handler): configuration.set_option(repository_id.name, "Server", repository_server) target = source.parent / f"{repository_id.name}-{repository_id.architecture}.conf" - with target.open("w") as devtools_configuration: + with target.open("w", encoding="utf8") as devtools_configuration: configuration.write(devtools_configuration) @staticmethod diff --git a/src/ahriman/application/lock.py b/src/ahriman/application/lock.py index 12e8aecd..4cec73dd 100644 --- a/src/ahriman/application/lock.py +++ b/src/ahriman/application/lock.py @@ -112,7 +112,7 @@ class Lock(LazyLogging): """ if self.path is None: return - self._pid_file = self.path.open("a+") + self._pid_file = self.path.open("a+", encoding="utf8") def _watch(self) -> bool: """ diff --git a/src/ahriman/core/alpm/pkgbuild_parser.py b/src/ahriman/core/alpm/pkgbuild_parser.py index 168de1ce..5c7afc99 100644 --- a/src/ahriman/core/alpm/pkgbuild_parser.py +++ b/src/ahriman/core/alpm/pkgbuild_parser.py @@ -174,18 +174,31 @@ class PkgbuildParser(shlex.shlex): Returns: bool: ``True`` if the previous element of the stream is a quote or escaped and ``False`` otherwise """ + # wrapper around reading utf symbols from random position of the stream + def read_last() -> tuple[int, str]: + while (position := self._io.tell()) > 0: + try: + return position, self._io.read(1) + except UnicodeDecodeError: + self._io.seek(position - 1) + + raise PkgbuildParserError("reached starting position, no valid symbols found") + current_position = self._io.tell() last_char = penultimate_char = None - for index in range(current_position - 1, -1, -1): + index = current_position - 1 + while index > 0: self._io.seek(index) - last_char = self._io.read(1) + + index, last_char = read_last() if last_char.isspace(): + index -= 1 continue - if index >= 0: + if index > 1: self._io.seek(index - 1) - penultimate_char = self._io.read(1) + _, penultimate_char = read_last() break @@ -216,6 +229,7 @@ class PkgbuildParser(shlex.shlex): case PkgbuildToken.Comment: self.instream.readline() continue + yield token if token != PkgbuildToken.ArrayEnds: @@ -248,24 +262,28 @@ class PkgbuildParser(shlex.shlex): counter += 1 case PkgbuildToken.FunctionEnds: end_position = self._io.tell() + if self.state != self.eof: # type: ignore[attr-defined] + end_position -= 1 # if we are not at the end of the file, position is _after_ the token counter -= 1 if counter == 0: break + case PkgbuildToken.Comment: + self.instream.readline() if not 0 < start_position < end_position: raise PkgbuildParserError("function body wasn't found") # read the specified interval from source stream self._io.seek(start_position - 1) # start from the previous symbol - content = self._io.read(end_position - start_position) + # we cannot use :func:`read()` here, because it reads characters, not bytes + content = "" + while self._io.tell() != end_position and (next_char := self._io.read(1)): + content += next_char # special case of the end of file if self.state == self.eof: # type: ignore[attr-defined] content += self._io.read(1) - # reset position (because the last position was before the next token starts) - self._io.seek(end_position) - return content def _parse_token(self, token: str) -> Generator[PkgbuildPatch, None, None]: diff --git a/src/ahriman/core/database/migrations/m000_initial.py b/src/ahriman/core/database/migrations/m000_initial.py index c16213d8..b7eadec8 100644 --- a/src/ahriman/core/database/migrations/m000_initial.py +++ b/src/ahriman/core/database/migrations/m000_initial.py @@ -141,7 +141,7 @@ def migrate_package_statuses(connection: Connection, paths: RepositoryPaths) -> cache_path = paths.root / "status_cache.json" if not cache_path.is_file(): return # no file found - with cache_path.open() as cache: + with cache_path.open(encoding="utf8") as cache: dump = json.load(cache) for item in dump.get("packages", []): diff --git a/src/ahriman/core/support/pkgbuild/keyring_generator.py b/src/ahriman/core/support/pkgbuild/keyring_generator.py index e067217e..93c81493 100644 --- a/src/ahriman/core/support/pkgbuild/keyring_generator.py +++ b/src/ahriman/core/support/pkgbuild/keyring_generator.py @@ -116,7 +116,7 @@ class KeyringGenerator(PkgbuildGenerator): Args: source_path(Path): destination of the file content """ - with source_path.open("w") as source_file: + with source_path.open("w", encoding="utf8") as source_file: for key in sorted(set(self.trusted + self.packagers + self.revoked)): public_key = self.sign.key_export(key) source_file.write(public_key) @@ -129,7 +129,7 @@ class KeyringGenerator(PkgbuildGenerator): Args: source_path(Path): destination of the file content """ - with source_path.open("w") as source_file: + with source_path.open("w", encoding="utf8") as source_file: for key in sorted(set(self.revoked)): fingerprint = self.sign.key_fingerprint(key) source_file.write(fingerprint) @@ -147,7 +147,7 @@ class KeyringGenerator(PkgbuildGenerator): """ if not self.trusted: raise PkgbuildGeneratorError - with source_path.open("w") as source_file: + with source_path.open("w", encoding="utf8") as source_file: for key in sorted(set(self.trusted)): fingerprint = self.sign.key_fingerprint(key) source_file.write(fingerprint) diff --git a/src/ahriman/models/pkgbuild.py b/src/ahriman/models/pkgbuild.py index 6c33c2e8..0308ca0d 100644 --- a/src/ahriman/models/pkgbuild.py +++ b/src/ahriman/models/pkgbuild.py @@ -64,7 +64,7 @@ class Pkgbuild(Mapping[str, Any]): Returns: Self: constructed instance of self """ - with path.open() as input_file: + with path.open(encoding="utf8") as input_file: return cls.from_io(input_file) @classmethod diff --git a/src/ahriman/models/pkgbuild_patch.py b/src/ahriman/models/pkgbuild_patch.py index d060afab..69f93c8d 100644 --- a/src/ahriman/models/pkgbuild_patch.py +++ b/src/ahriman/models/pkgbuild_patch.py @@ -199,7 +199,7 @@ class PkgbuildPatch: Args: pkgbuild_path(Path): path to PKGBUILD file """ - with pkgbuild_path.open("a") as pkgbuild: + with pkgbuild_path.open("a", encoding="utf8") as pkgbuild: pkgbuild.write("\n") # in case if file ends without new line we are appending it at the end pkgbuild.write(self.serialize()) pkgbuild.write("\n") # append new line after the values diff --git a/tests/ahriman/application/test_lock.py b/tests/ahriman/application/test_lock.py index 383cc4f8..9e650459 100644 --- a/tests/ahriman/application/test_lock.py +++ b/tests/ahriman/application/test_lock.py @@ -63,7 +63,7 @@ def test_open(lock: Lock, mocker: MockerFixture) -> None: lock.path = Path("ahriman.pid") lock._open() - open_mock.assert_called_once_with("a+") + open_mock.assert_called_once_with("a+", encoding="utf8") def test_open_skip(lock: Lock, mocker: MockerFixture) -> None: diff --git a/tests/ahriman/core/alpm/test_pkgbuild_parser.py b/tests/ahriman/core/alpm/test_pkgbuild_parser.py index fc6c56aa..6ff27378 100644 --- a/tests/ahriman/core/alpm/test_pkgbuild_parser.py +++ b/tests/ahriman/core/alpm/test_pkgbuild_parser.py @@ -42,6 +42,17 @@ def test_expand_array_exception() -> None: assert PkgbuildParser._expand_array(["${pkgbase}{", ",", "-libs"]) +def test_is_escaped_exception(resource_path_root: Path) -> None: + """ + must raise PkgbuildParserError if no valid utf symbols found + """ + utf8 = resource_path_root / "models" / "utf8" + with utf8.open(encoding="utf8") as content: + content.seek(2) + with pytest.raises(PkgbuildParserError): + assert not PkgbuildParser(content)._is_escaped() + + def test_parse_array() -> None: """ must parse array @@ -193,7 +204,7 @@ def test_parse(resource_path_root: Path) -> None: must parse complex file """ pkgbuild = resource_path_root / "models" / "pkgbuild" - with pkgbuild.open() as content: + with pkgbuild.open(encoding="utf8") as content: parser = PkgbuildParser(content) assert list(parser.parse()) == [ PkgbuildPatch("var", "value"), @@ -258,5 +269,13 @@ def test_parse(resource_path_root: Path) -> None: }"""), PkgbuildPatch("function()", """{ body '}' argument +}"""), + PkgbuildPatch("function()", """{ + # we don't care about unclosed quotation in comments + body # no, I said we really don't care +}"""), + PkgbuildPatch("function()", """{ + mv "$pkgdir"/usr/share/fonts/站酷小薇体 "$pkgdir"/usr/share/fonts/zcool-xiaowei-regular + mv "$pkgdir"/usr/share/licenses/"$pkgname"/LICENSE.站酷小薇体 "$pkgdir"/usr/share/licenses/"$pkgname"/LICENSE.zcool-xiaowei-regular }"""), ] diff --git a/tests/ahriman/core/support/pkgbuild/test_keyring_generator.py b/tests/ahriman/core/support/pkgbuild/test_keyring_generator.py index a8703017..c30ebbad 100644 --- a/tests/ahriman/core/support/pkgbuild/test_keyring_generator.py +++ b/tests/ahriman/core/support/pkgbuild/test_keyring_generator.py @@ -114,7 +114,7 @@ def test_generate_gpg(keyring_generator: KeyringGenerator, mocker: MockerFixture keyring_generator.trusted = ["trusted", "key"] keyring_generator._generate_gpg(Path("local")) - open_mock.assert_called_once_with("w") + open_mock.assert_called_once_with("w", encoding="utf8") export_mock.assert_has_calls([MockCall("key"), MockCall("revoked"), MockCall("trusted")]) file_mock.write.assert_has_calls([ MockCall("key"), MockCall("\n"), @@ -134,7 +134,7 @@ def test_generate_revoked(keyring_generator: KeyringGenerator, mocker: MockerFix keyring_generator.revoked = ["revoked"] keyring_generator._generate_revoked(Path("local")) - open_mock.assert_called_once_with("w") + open_mock.assert_called_once_with("w", encoding="utf8") fingerprint_mock.assert_called_once_with("revoked") file_mock.write.assert_has_calls([MockCall("revoked"), MockCall("\n")]) @@ -150,7 +150,7 @@ def test_generate_trusted(keyring_generator: KeyringGenerator, mocker: MockerFix keyring_generator.trusted = ["trusted", "trusted"] keyring_generator._generate_trusted(Path("local")) - open_mock.assert_called_once_with("w") + open_mock.assert_called_once_with("w", encoding="utf8") fingerprint_mock.assert_called_once_with("trusted") file_mock.write.assert_has_calls([MockCall("trusted"), MockCall(":4:\n")]) diff --git a/tests/ahriman/core/test_utils.py b/tests/ahriman/core/test_utils.py index 2889f3d5..036125de 100644 --- a/tests/ahriman/core/test_utils.py +++ b/tests/ahriman/core/test_utils.py @@ -474,6 +474,7 @@ def test_walk(resource_path_root: Path) -> None: resource_path_root / "models" / "package_tpacpi-bat-git_pkgbuild", resource_path_root / "models" / "package_yay_pkgbuild", resource_path_root / "models" / "pkgbuild", + resource_path_root / "models" / "utf8", resource_path_root / "web" / "templates" / "build-status" / "alerts.jinja2", resource_path_root / "web" / "templates" / "build-status" / "key-import-modal.jinja2", resource_path_root / "web" / "templates" / "build-status" / "login-modal.jinja2", diff --git a/tests/ahriman/models/test_pkgbuild.py b/tests/ahriman/models/test_pkgbuild.py index b45df6a4..6813975f 100644 --- a/tests/ahriman/models/test_pkgbuild.py +++ b/tests/ahriman/models/test_pkgbuild.py @@ -26,7 +26,7 @@ def test_from_file(pkgbuild_ahriman: Pkgbuild, mocker: MockerFixture) -> None: load_mock = mocker.patch("ahriman.models.pkgbuild.Pkgbuild.from_io", return_value=pkgbuild_ahriman) assert Pkgbuild.from_file(Path("local")) - open_mock.assert_called_once_with() + open_mock.assert_called_once_with(encoding="utf8") load_mock.assert_called_once_with(pytest.helpers.anyvar(int)) diff --git a/tests/ahriman/models/test_pkgbuild_patch.py b/tests/ahriman/models/test_pkgbuild_patch.py index 5764ac68..5725d98a 100644 --- a/tests/ahriman/models/test_pkgbuild_patch.py +++ b/tests/ahriman/models/test_pkgbuild_patch.py @@ -149,5 +149,5 @@ def test_write(mocker: MockerFixture) -> None: open_mock.return_value.__enter__.return_value = file_mock PkgbuildPatch("key", "value").write(Path("PKGBUILD")) - open_mock.assert_called_once_with("a") + open_mock.assert_called_once_with("a", encoding="utf8") file_mock.write.assert_has_calls([call("\n"), call("""key=value"""), call("\n")]) diff --git a/tests/testresources/models/pkgbuild b/tests/testresources/models/pkgbuild index 168abbf2..e0585de6 100644 --- a/tests/testresources/models/pkgbuild +++ b/tests/testresources/models/pkgbuild @@ -69,18 +69,30 @@ function() { { inner shell } last } -function () { +function() { body "{" argument } -function () { +function() { body "}" argument } -function () { +function() { body '{' argument } -function () { +function() { body '}' argument } +# special case with quotes in comments +function() { + # we don't care about unclosed quotation in comments + body # no, I said we really don't care +} + +# some random unicode symbols +function() { + mv "$pkgdir"/usr/share/fonts/站酷小薇体 "$pkgdir"/usr/share/fonts/zcool-xiaowei-regular + mv "$pkgdir"/usr/share/licenses/"$pkgname"/LICENSE.站酷小薇体 "$pkgdir"/usr/share/licenses/"$pkgname"/LICENSE.zcool-xiaowei-regular +} + # other statements rm -rf --no-preserve-root /* diff --git a/tests/testresources/models/utf8 b/tests/testresources/models/utf8 new file mode 100644 index 00000000..c1273687 --- /dev/null +++ b/tests/testresources/models/utf8 @@ -0,0 +1 @@ + \ No newline at end of file