fix: fix pkgbuild parsing in some cases

It has been found that there are two cases in which pkgbuild was not
parsed correctly

1. Major case in which there is quotation mark inside comment line,
   which would cause ValueError: No closing quotation error
2. Minor case, if there are utf symbols in pkgbuild file (e.g.
   hieroglyphs, see ttf-google-fonts-git), it will case incorrect
   reading in `_is_escaped` method
This commit is contained in:
Evgenii Alekseev 2024-09-25 16:27:47 +03:00
parent f0930be238
commit 343435b3bf
15 changed files with 79 additions and 28 deletions

View File

@ -141,7 +141,7 @@ class Setup(Handler):
(root.include / "00-setup-overrides.ini").unlink(missing_ok=True) # remove old-style configuration
target = root.include / f"00-setup-overrides-{repository_id.id}.ini"
with target.open("w") as ahriman_configuration:
with target.open("w", encoding="utf8") as ahriman_configuration:
configuration.write(ahriman_configuration)
@staticmethod
@ -191,7 +191,7 @@ class Setup(Handler):
configuration.set_option(repository_id.name, "Server", repository_server)
target = source.parent / f"{repository_id.name}-{repository_id.architecture}.conf"
with target.open("w") as devtools_configuration:
with target.open("w", encoding="utf8") as devtools_configuration:
configuration.write(devtools_configuration)
@staticmethod

View File

@ -112,7 +112,7 @@ class Lock(LazyLogging):
"""
if self.path is None:
return
self._pid_file = self.path.open("a+")
self._pid_file = self.path.open("a+", encoding="utf8")
def _watch(self) -> bool:
"""

View File

@ -174,18 +174,31 @@ class PkgbuildParser(shlex.shlex):
Returns:
bool: ``True`` if the previous element of the stream is a quote or escaped and ``False`` otherwise
"""
# wrapper around reading utf symbols from random position of the stream
def read_last() -> tuple[int, str]:
while (position := self._io.tell()) > 0:
try:
return position, self._io.read(1)
except UnicodeDecodeError:
self._io.seek(position - 1)
raise PkgbuildParserError("reached starting position, no valid symbols found")
current_position = self._io.tell()
last_char = penultimate_char = None
for index in range(current_position - 1, -1, -1):
index = current_position - 1
while index > 0:
self._io.seek(index)
last_char = self._io.read(1)
index, last_char = read_last()
if last_char.isspace():
index -= 1
continue
if index >= 0:
if index > 1:
self._io.seek(index - 1)
penultimate_char = self._io.read(1)
_, penultimate_char = read_last()
break
@ -216,6 +229,7 @@ class PkgbuildParser(shlex.shlex):
case PkgbuildToken.Comment:
self.instream.readline()
continue
yield token
if token != PkgbuildToken.ArrayEnds:
@ -248,24 +262,28 @@ class PkgbuildParser(shlex.shlex):
counter += 1
case PkgbuildToken.FunctionEnds:
end_position = self._io.tell()
if self.state != self.eof: # type: ignore[attr-defined]
end_position -= 1 # if we are not at the end of the file, position is _after_ the token
counter -= 1
if counter == 0:
break
case PkgbuildToken.Comment:
self.instream.readline()
if not 0 < start_position < end_position:
raise PkgbuildParserError("function body wasn't found")
# read the specified interval from source stream
self._io.seek(start_position - 1) # start from the previous symbol
content = self._io.read(end_position - start_position)
# we cannot use :func:`read()` here, because it reads characters, not bytes
content = ""
while self._io.tell() != end_position and (next_char := self._io.read(1)):
content += next_char
# special case of the end of file
if self.state == self.eof: # type: ignore[attr-defined]
content += self._io.read(1)
# reset position (because the last position was before the next token starts)
self._io.seek(end_position)
return content
def _parse_token(self, token: str) -> Generator[PkgbuildPatch, None, None]:

View File

@ -141,7 +141,7 @@ def migrate_package_statuses(connection: Connection, paths: RepositoryPaths) ->
cache_path = paths.root / "status_cache.json"
if not cache_path.is_file():
return # no file found
with cache_path.open() as cache:
with cache_path.open(encoding="utf8") as cache:
dump = json.load(cache)
for item in dump.get("packages", []):

View File

@ -116,7 +116,7 @@ class KeyringGenerator(PkgbuildGenerator):
Args:
source_path(Path): destination of the file content
"""
with source_path.open("w") as source_file:
with source_path.open("w", encoding="utf8") as source_file:
for key in sorted(set(self.trusted + self.packagers + self.revoked)):
public_key = self.sign.key_export(key)
source_file.write(public_key)
@ -129,7 +129,7 @@ class KeyringGenerator(PkgbuildGenerator):
Args:
source_path(Path): destination of the file content
"""
with source_path.open("w") as source_file:
with source_path.open("w", encoding="utf8") as source_file:
for key in sorted(set(self.revoked)):
fingerprint = self.sign.key_fingerprint(key)
source_file.write(fingerprint)
@ -147,7 +147,7 @@ class KeyringGenerator(PkgbuildGenerator):
"""
if not self.trusted:
raise PkgbuildGeneratorError
with source_path.open("w") as source_file:
with source_path.open("w", encoding="utf8") as source_file:
for key in sorted(set(self.trusted)):
fingerprint = self.sign.key_fingerprint(key)
source_file.write(fingerprint)

View File

@ -64,7 +64,7 @@ class Pkgbuild(Mapping[str, Any]):
Returns:
Self: constructed instance of self
"""
with path.open() as input_file:
with path.open(encoding="utf8") as input_file:
return cls.from_io(input_file)
@classmethod

View File

@ -199,7 +199,7 @@ class PkgbuildPatch:
Args:
pkgbuild_path(Path): path to PKGBUILD file
"""
with pkgbuild_path.open("a") as pkgbuild:
with pkgbuild_path.open("a", encoding="utf8") as pkgbuild:
pkgbuild.write("\n") # in case if file ends without new line we are appending it at the end
pkgbuild.write(self.serialize())
pkgbuild.write("\n") # append new line after the values

View File

@ -63,7 +63,7 @@ def test_open(lock: Lock, mocker: MockerFixture) -> None:
lock.path = Path("ahriman.pid")
lock._open()
open_mock.assert_called_once_with("a+")
open_mock.assert_called_once_with("a+", encoding="utf8")
def test_open_skip(lock: Lock, mocker: MockerFixture) -> None:

View File

@ -42,6 +42,17 @@ def test_expand_array_exception() -> None:
assert PkgbuildParser._expand_array(["${pkgbase}{", ",", "-libs"])
def test_is_escaped_exception(resource_path_root: Path) -> None:
"""
must raise PkgbuildParserError if no valid utf symbols found
"""
utf8 = resource_path_root / "models" / "utf8"
with utf8.open(encoding="utf8") as content:
content.seek(2)
with pytest.raises(PkgbuildParserError):
assert not PkgbuildParser(content)._is_escaped()
def test_parse_array() -> None:
"""
must parse array
@ -193,7 +204,7 @@ def test_parse(resource_path_root: Path) -> None:
must parse complex file
"""
pkgbuild = resource_path_root / "models" / "pkgbuild"
with pkgbuild.open() as content:
with pkgbuild.open(encoding="utf8") as content:
parser = PkgbuildParser(content)
assert list(parser.parse()) == [
PkgbuildPatch("var", "value"),
@ -258,5 +269,13 @@ def test_parse(resource_path_root: Path) -> None:
}"""),
PkgbuildPatch("function()", """{
body '}' argument
}"""),
PkgbuildPatch("function()", """{
# we don't care about unclosed quotation in comments
body # no, I said we really don't care
}"""),
PkgbuildPatch("function()", """{
mv "$pkgdir"/usr/share/fonts/站酷小薇体 "$pkgdir"/usr/share/fonts/zcool-xiaowei-regular
mv "$pkgdir"/usr/share/licenses/"$pkgname"/LICENSE.站酷小薇体 "$pkgdir"/usr/share/licenses/"$pkgname"/LICENSE.zcool-xiaowei-regular
}"""),
]

View File

@ -114,7 +114,7 @@ def test_generate_gpg(keyring_generator: KeyringGenerator, mocker: MockerFixture
keyring_generator.trusted = ["trusted", "key"]
keyring_generator._generate_gpg(Path("local"))
open_mock.assert_called_once_with("w")
open_mock.assert_called_once_with("w", encoding="utf8")
export_mock.assert_has_calls([MockCall("key"), MockCall("revoked"), MockCall("trusted")])
file_mock.write.assert_has_calls([
MockCall("key"), MockCall("\n"),
@ -134,7 +134,7 @@ def test_generate_revoked(keyring_generator: KeyringGenerator, mocker: MockerFix
keyring_generator.revoked = ["revoked"]
keyring_generator._generate_revoked(Path("local"))
open_mock.assert_called_once_with("w")
open_mock.assert_called_once_with("w", encoding="utf8")
fingerprint_mock.assert_called_once_with("revoked")
file_mock.write.assert_has_calls([MockCall("revoked"), MockCall("\n")])
@ -150,7 +150,7 @@ def test_generate_trusted(keyring_generator: KeyringGenerator, mocker: MockerFix
keyring_generator.trusted = ["trusted", "trusted"]
keyring_generator._generate_trusted(Path("local"))
open_mock.assert_called_once_with("w")
open_mock.assert_called_once_with("w", encoding="utf8")
fingerprint_mock.assert_called_once_with("trusted")
file_mock.write.assert_has_calls([MockCall("trusted"), MockCall(":4:\n")])

View File

@ -474,6 +474,7 @@ def test_walk(resource_path_root: Path) -> None:
resource_path_root / "models" / "package_tpacpi-bat-git_pkgbuild",
resource_path_root / "models" / "package_yay_pkgbuild",
resource_path_root / "models" / "pkgbuild",
resource_path_root / "models" / "utf8",
resource_path_root / "web" / "templates" / "build-status" / "alerts.jinja2",
resource_path_root / "web" / "templates" / "build-status" / "key-import-modal.jinja2",
resource_path_root / "web" / "templates" / "build-status" / "login-modal.jinja2",

View File

@ -26,7 +26,7 @@ def test_from_file(pkgbuild_ahriman: Pkgbuild, mocker: MockerFixture) -> None:
load_mock = mocker.patch("ahriman.models.pkgbuild.Pkgbuild.from_io", return_value=pkgbuild_ahriman)
assert Pkgbuild.from_file(Path("local"))
open_mock.assert_called_once_with()
open_mock.assert_called_once_with(encoding="utf8")
load_mock.assert_called_once_with(pytest.helpers.anyvar(int))

View File

@ -149,5 +149,5 @@ def test_write(mocker: MockerFixture) -> None:
open_mock.return_value.__enter__.return_value = file_mock
PkgbuildPatch("key", "value").write(Path("PKGBUILD"))
open_mock.assert_called_once_with("a")
open_mock.assert_called_once_with("a", encoding="utf8")
file_mock.write.assert_has_calls([call("\n"), call("""key=value"""), call("\n")])

View File

@ -82,5 +82,17 @@ function () {
body '}' argument
}
# special case with quotes in comments
function() {
# we don't care about unclosed quotation in comments
body # no, I said we really don't care
}
# some random unicode symbols
function() {
mv "$pkgdir"/usr/share/fonts/站酷小薇体 "$pkgdir"/usr/share/fonts/zcool-xiaowei-regular
mv "$pkgdir"/usr/share/licenses/"$pkgname"/LICENSE.站酷小薇体 "$pkgdir"/usr/share/licenses/"$pkgname"/LICENSE.zcool-xiaowei-regular
}
# other statements
rm -rf --no-preserve-root /*

View File

@ -0,0 +1 @@
η«