fix: fix pkgbuild parsing in some cases

It has been found that there are two cases in which pkgbuild was not
parsed correctly

1. Major case in which there is quotation mark inside comment line,
   which would cause ValueError: No closing quotation error
2. Minor case, if there are utf symbols in pkgbuild file (e.g.
   hieroglyphs, see ttf-google-fonts-git), it will case incorrect
   reading in `_is_escaped` method
This commit is contained in:
2024-09-25 16:27:47 +03:00
parent 6d157ca809
commit d6cdb5bea5
15 changed files with 79 additions and 28 deletions

View File

@ -141,7 +141,7 @@ class Setup(Handler):
(root.include / "00-setup-overrides.ini").unlink(missing_ok=True) # remove old-style configuration
target = root.include / f"00-setup-overrides-{repository_id.id}.ini"
with target.open("w") as ahriman_configuration:
with target.open("w", encoding="utf8") as ahriman_configuration:
configuration.write(ahriman_configuration)
@staticmethod
@ -191,7 +191,7 @@ class Setup(Handler):
configuration.set_option(repository_id.name, "Server", repository_server)
target = source.parent / f"{repository_id.name}-{repository_id.architecture}.conf"
with target.open("w") as devtools_configuration:
with target.open("w", encoding="utf8") as devtools_configuration:
configuration.write(devtools_configuration)
@staticmethod

View File

@ -112,7 +112,7 @@ class Lock(LazyLogging):
"""
if self.path is None:
return
self._pid_file = self.path.open("a+")
self._pid_file = self.path.open("a+", encoding="utf8")
def _watch(self) -> bool:
"""

View File

@ -174,18 +174,31 @@ class PkgbuildParser(shlex.shlex):
Returns:
bool: ``True`` if the previous element of the stream is a quote or escaped and ``False`` otherwise
"""
# wrapper around reading utf symbols from random position of the stream
def read_last() -> tuple[int, str]:
while (position := self._io.tell()) > 0:
try:
return position, self._io.read(1)
except UnicodeDecodeError:
self._io.seek(position - 1)
raise PkgbuildParserError("reached starting position, no valid symbols found")
current_position = self._io.tell()
last_char = penultimate_char = None
for index in range(current_position - 1, -1, -1):
index = current_position - 1
while index > 0:
self._io.seek(index)
last_char = self._io.read(1)
index, last_char = read_last()
if last_char.isspace():
index -= 1
continue
if index >= 0:
if index > 1:
self._io.seek(index - 1)
penultimate_char = self._io.read(1)
_, penultimate_char = read_last()
break
@ -216,6 +229,7 @@ class PkgbuildParser(shlex.shlex):
case PkgbuildToken.Comment:
self.instream.readline()
continue
yield token
if token != PkgbuildToken.ArrayEnds:
@ -248,24 +262,28 @@ class PkgbuildParser(shlex.shlex):
counter += 1
case PkgbuildToken.FunctionEnds:
end_position = self._io.tell()
if self.state != self.eof: # type: ignore[attr-defined]
end_position -= 1 # if we are not at the end of the file, position is _after_ the token
counter -= 1
if counter == 0:
break
case PkgbuildToken.Comment:
self.instream.readline()
if not 0 < start_position < end_position:
raise PkgbuildParserError("function body wasn't found")
# read the specified interval from source stream
self._io.seek(start_position - 1) # start from the previous symbol
content = self._io.read(end_position - start_position)
# we cannot use :func:`read()` here, because it reads characters, not bytes
content = ""
while self._io.tell() != end_position and (next_char := self._io.read(1)):
content += next_char
# special case of the end of file
if self.state == self.eof: # type: ignore[attr-defined]
content += self._io.read(1)
# reset position (because the last position was before the next token starts)
self._io.seek(end_position)
return content
def _parse_token(self, token: str) -> Generator[PkgbuildPatch, None, None]:

View File

@ -141,7 +141,7 @@ def migrate_package_statuses(connection: Connection, paths: RepositoryPaths) ->
cache_path = paths.root / "status_cache.json"
if not cache_path.is_file():
return # no file found
with cache_path.open() as cache:
with cache_path.open(encoding="utf8") as cache:
dump = json.load(cache)
for item in dump.get("packages", []):

View File

@ -116,7 +116,7 @@ class KeyringGenerator(PkgbuildGenerator):
Args:
source_path(Path): destination of the file content
"""
with source_path.open("w") as source_file:
with source_path.open("w", encoding="utf8") as source_file:
for key in sorted(set(self.trusted + self.packagers + self.revoked)):
public_key = self.sign.key_export(key)
source_file.write(public_key)
@ -129,7 +129,7 @@ class KeyringGenerator(PkgbuildGenerator):
Args:
source_path(Path): destination of the file content
"""
with source_path.open("w") as source_file:
with source_path.open("w", encoding="utf8") as source_file:
for key in sorted(set(self.revoked)):
fingerprint = self.sign.key_fingerprint(key)
source_file.write(fingerprint)
@ -147,7 +147,7 @@ class KeyringGenerator(PkgbuildGenerator):
"""
if not self.trusted:
raise PkgbuildGeneratorError
with source_path.open("w") as source_file:
with source_path.open("w", encoding="utf8") as source_file:
for key in sorted(set(self.trusted)):
fingerprint = self.sign.key_fingerprint(key)
source_file.write(fingerprint)

View File

@ -64,7 +64,7 @@ class Pkgbuild(Mapping[str, Any]):
Returns:
Self: constructed instance of self
"""
with path.open() as input_file:
with path.open(encoding="utf8") as input_file:
return cls.from_io(input_file)
@classmethod

View File

@ -199,7 +199,7 @@ class PkgbuildPatch:
Args:
pkgbuild_path(Path): path to PKGBUILD file
"""
with pkgbuild_path.open("a") as pkgbuild:
with pkgbuild_path.open("a", encoding="utf8") as pkgbuild:
pkgbuild.write("\n") # in case if file ends without new line we are appending it at the end
pkgbuild.write(self.serialize())
pkgbuild.write("\n") # append new line after the values