fix: parse non-utf pkgbuilds as well (#140)

it has been reported that duriing reading pkgbuilds with latin-1 charset
the execption will be raised. Well, it is one more point to rewrite
parser to use own impl instead of shlex and parse raw byte array instead
This commit is contained in:
2025-02-24 00:05:54 +02:00
parent a1a8dd68e8
commit ed67898012
5 changed files with 519 additions and 8 deletions

View File

@ -95,6 +95,19 @@ class DuplicateRunError(RuntimeError):
self, "Another application instance is run. This error can be suppressed by using --force flag.")
class EncodeError(ValueError):
"""
exception used for bytes encoding errors
"""
def __init__(self, encodings: list[str]) -> None:
"""
Args:
encodings(list[str]): list of encodings tried
"""
ValueError.__init__(self, f"Could not encode bytes by using {encodings}")
class ExitCode(RuntimeError):
"""
special exception which has to be thrown to return non-zero status without error message

View File

@ -24,6 +24,7 @@ from pathlib import Path
from typing import Any, IO, Self
from ahriman.core.alpm.pkgbuild_parser import PkgbuildParser, PkgbuildToken
from ahriman.core.exceptions import EncodeError
from ahriman.models.pkgbuild_patch import PkgbuildPatch
@ -33,11 +34,14 @@ class Pkgbuild(Mapping[str, Any]):
model and proxy for PKGBUILD properties
Attributes:
DEFAULT_ENCODINGS(list[str]): (class attribute) list of encoding to be applied on the file content
fields(dict[str, PkgbuildPatch]): PKGBUILD fields
"""
fields: dict[str, PkgbuildPatch]
DEFAULT_ENCODINGS = ["utf8", "latin-1"]
@property
def variables(self) -> dict[str, str]:
"""
@ -54,18 +58,34 @@ class Pkgbuild(Mapping[str, Any]):
}
@classmethod
def from_file(cls, path: Path) -> Self:
def from_file(cls, path: Path, encodings: list[str] | None = None) -> Self:
"""
parse PKGBUILD from the file
Args:
path(Path): path to the PKGBUILD file
encodings(list[str] | None, optional): the encoding of the file (Default value = None)
Returns:
Self: constructed instance of self
Raises:
EncodeError: if encoding is unknown
"""
with path.open(encoding="utf8") as input_file:
return cls.from_io(input_file)
# read raw bytes from file
with path.open("rb") as input_file:
content = input_file.read()
# decode bytes content based on either
encodings = encodings or cls.DEFAULT_ENCODINGS
for encoding in encodings:
try:
io = StringIO(content.decode(encoding))
return cls.from_io(io)
except ValueError:
pass
raise EncodeError(encodings)
@classmethod
def from_io(cls, stream: IO[str]) -> Self: