From abc8df8ef31a309943cf018cc0d5ddaba2f28f51 Mon Sep 17 00:00:00 2001 From: Evgenii Alekseev Date: Mon, 9 Sep 2024 18:43:39 +0300 Subject: [PATCH] pkgbuild parser impl --- src/ahriman/core/utils.py | 14 +- src/ahriman/models/package.py | 47 ++--- src/ahriman/models/pkgbuild.py | 298 +++++++++++++++++++++++++++ src/ahriman/models/pkgbuild_patch.py | 15 ++ 4 files changed, 338 insertions(+), 36 deletions(-) create mode 100644 src/ahriman/models/pkgbuild.py diff --git a/src/ahriman/core/utils.py b/src/ahriman/core/utils.py index ab1651ef..bf50f9e5 100644 --- a/src/ahriman/core/utils.py +++ b/src/ahriman/core/utils.py @@ -27,7 +27,7 @@ import re import selectors import subprocess -from collections.abc import Callable, Generator, Iterable +from collections.abc import Callable, Generator, Iterable, Mapping from dataclasses import asdict from enum import Enum from pathlib import Path @@ -407,7 +407,7 @@ def safe_filename(source: str) -> str: return re.sub(r"[^A-Za-z\d\-._~:\[\]@]", "-", source) -def srcinfo_property(key: str, srcinfo: dict[str, Any], package_srcinfo: dict[str, Any], *, +def srcinfo_property(key: str, srcinfo: Mapping[str, Any], package_srcinfo: Mapping[str, Any], *, default: Any = None) -> Any: """ extract property from SRCINFO. This method extracts property from package if this property is presented in @@ -416,8 +416,8 @@ def srcinfo_property(key: str, srcinfo: dict[str, Any], package_srcinfo: dict[st Args: key(str): key to extract - srcinfo(dict[str, Any]): root structure of SRCINFO - package_srcinfo(dict[str, Any]): package specific SRCINFO + srcinfo(Mapping[str, Any]): root structure of SRCINFO + package_srcinfo(Mapping[str, Any]): package specific SRCINFO default(Any, optional): the default value for the specified key (Default value = None) Returns: @@ -426,7 +426,7 @@ def srcinfo_property(key: str, srcinfo: dict[str, Any], package_srcinfo: dict[st return package_srcinfo.get(key) or srcinfo.get(key) or default -def srcinfo_property_list(key: str, srcinfo: dict[str, Any], package_srcinfo: dict[str, Any], *, +def srcinfo_property_list(key: str, srcinfo: Mapping[str, Any], package_srcinfo: Mapping[str, Any], *, architecture: str | None = None) -> list[Any]: """ extract list property from SRCINFO. Unlike :func:`srcinfo_property()` it supposes that default return value is @@ -435,8 +435,8 @@ def srcinfo_property_list(key: str, srcinfo: dict[str, Any], package_srcinfo: di Args: key(str): key to extract - srcinfo(dict[str, Any]): root structure of SRCINFO - package_srcinfo(dict[str, Any]): package specific SRCINFO + srcinfo(Mapping[str, Any]): root structure of SRCINFO + package_srcinfo(Mapping[str, Any]): package specific SRCINFO architecture(str | None, optional): package architecture if set (Default value = None) Returns: diff --git a/src/ahriman/models/package.py b/src/ahriman/models/package.py index 9f988aba..cf4c0649 100644 --- a/src/ahriman/models/package.py +++ b/src/ahriman/models/package.py @@ -37,6 +37,7 @@ from ahriman.core.log import LazyLogging from ahriman.core.utils import check_output, dataclass_view, full_version, parse_version, srcinfo_property_list, utcnow from ahriman.models.package_description import PackageDescription from ahriman.models.package_source import PackageSource +from ahriman.models.pkgbuild import Pkgbuild from ahriman.models.remote_source import RemoteSource from ahriman.models.repository_paths import RepositoryPaths @@ -255,25 +256,23 @@ class Package(LazyLogging): Returns: Self: package properties - - Raises: - PackageInfoError: if there are parsing errors """ - srcinfo_source = check_output("makepkg", "--printsrcinfo", cwd=path) - srcinfo, errors = parse_srcinfo(srcinfo_source) - if errors: - raise PackageInfoError(errors) + pkgbuild = Pkgbuild.from_file(path / "PKGBUILD") packages = { package: PackageDescription( - depends=srcinfo_property_list("depends", srcinfo, properties, architecture=architecture), - make_depends=srcinfo_property_list("makedepends", srcinfo, properties, architecture=architecture), - opt_depends=srcinfo_property_list("optdepends", srcinfo, properties, architecture=architecture), - check_depends=srcinfo_property_list("checkdepends", srcinfo, properties, architecture=architecture), + depends=srcinfo_property_list("depends", pkgbuild, properties, architecture=architecture), + make_depends=srcinfo_property_list("makedepends", pkgbuild, properties, architecture=architecture), + opt_depends=srcinfo_property_list("optdepends", pkgbuild, properties, architecture=architecture), + check_depends=srcinfo_property_list("checkdepends", pkgbuild, properties, architecture=architecture), ) - for package, properties in srcinfo["packages"].items() + for package, properties in pkgbuild.packages().items() } - version = full_version(srcinfo.get("epoch"), srcinfo["pkgver"], srcinfo["pkgrel"]) + version = full_version( + pkgbuild.get_as("epoch", str, default=None), + pkgbuild.get_as("pkgver", str), + pkgbuild.get_as("pkgrel", str), + ) remote = RemoteSource( source=PackageSource.Local, @@ -284,7 +283,7 @@ class Package(LazyLogging): ) return cls( - base=srcinfo["pkgbase"], + base=pkgbuild.get_as("pkgbase", str), version=version, remote=remote, packages=packages, @@ -363,16 +362,12 @@ class Package(LazyLogging): Raises: PackageInfoError: if there are parsing errors """ - srcinfo_source = check_output("makepkg", "--printsrcinfo", cwd=path) - srcinfo, errors = parse_srcinfo(srcinfo_source) - if errors: - raise PackageInfoError(errors) - + pkgbuild = Pkgbuild.from_file(path / "PKGBUILD") # we could use arch property, but for consistency it is better to call special method architectures = Package.supported_architectures(path) for architecture in architectures: - for source in srcinfo_property_list("source", srcinfo, {}, architecture=architecture): + for source in srcinfo_property_list("source", pkgbuild, {}, architecture=architecture): if "::" in source: _, source = source.split("::", 1) # in case if filename is specified, remove it @@ -383,7 +378,7 @@ class Package(LazyLogging): yield Path(source) - if (install := srcinfo.get("install", None)) is not None: + if isinstance(install := pkgbuild.get("install"), str): # well, in reality it is either None or str yield Path(install) @staticmethod @@ -396,15 +391,9 @@ class Package(LazyLogging): Returns: set[str]: list of package supported architectures - - Raises: - PackageInfoError: if there are parsing errors """ - srcinfo_source = check_output("makepkg", "--printsrcinfo", cwd=path) - srcinfo, errors = parse_srcinfo(srcinfo_source) - if errors: - raise PackageInfoError(errors) - return set(srcinfo.get("arch", [])) + pkgbuild = Pkgbuild.from_file(path / "PKGBUILD") + return set(pkgbuild.get("arch", [])) def _package_list_property(self, extractor: Callable[[PackageDescription], list[str]]) -> list[str]: """ diff --git a/src/ahriman/models/pkgbuild.py b/src/ahriman/models/pkgbuild.py new file mode 100644 index 00000000..72e658c0 --- /dev/null +++ b/src/ahriman/models/pkgbuild.py @@ -0,0 +1,298 @@ +# +# Copyright (c) 2021-2024 ahriman team. +# +# This file is part of ahriman +# (see https://github.com/arcan1s/ahriman). +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +import re +import shlex + +from collections.abc import Generator, Iterator, Mapping +from dataclasses import dataclass +from enum import StrEnum +from io import StringIO +from pathlib import Path +from typing import IO, Self, TypeVar, cast + +from ahriman.models.pkgbuild_patch import PkgbuildPatch + + +T = TypeVar("T", str, list[str]) +U = TypeVar("U", str, list[str], None) + + +class PkgbuildToken(StrEnum): + """ + well-known tokens dictionary + + Attributes: + ArrayStarts(PkgbuildToken): (class attribute) array starts token + ArrayEnds(PkgbuildToken): (class attribute) array ends token + FunctionDeclaration(PkgbuildToken): (class attribute) function declaration token + FunctionStarts(PkgbuildToken): (class attribute) function starts token + FunctionEnds(PkgbuildToken): (class attribute) function ends token + """ + + ArrayStarts = "(" + ArrayEnds = ")" + + FunctionDeclaration = "()" + FunctionStarts = "{" + FunctionEnds = "}" + + +@dataclass(frozen=True) +class Pkgbuild(Mapping[str, str | list[str]]): + """ + simple pkgbuild reader implementation in pure python, because others sucks + + Attributes: + fields(dict[str, PkgbuildPatch]): PKGBUILD fields + """ + + fields: dict[str, PkgbuildPatch] + + _ARRAY_ASSIGNMENT_REGEX = re.compile(r"^(?P\w+)=$") + _STRING_ASSIGNMENT_REGEX = re.compile(r"^(?P\w+)=(?P.+)$") + # in addition functions can have dash to usual assignment + _FUNCTION_DECLARATION_REGEX = re.compile(r"^(?P[\w-]+)$") + + @property + def variables(self) -> dict[str, str]: + """ + list of variables defined and (maybe) used in this PKGBUILD + + Returns: + dict[str, str]: map of variable name to its value. The value will be included here in case if it presented + in the internal dictionary, it is not a function and the value has string type + """ + return { + key: value.value + for key, value in self.fields.items() + if not value.is_function and isinstance(value.value, str) + } + + @classmethod + def from_file(cls, path: Path) -> Self: + """ + parse PKGBUILD from the file + + Args: + path(Path): path to the PKGBUILD file + + Returns: + Self: constructed instance of self + """ + with path.open() as input_file: + return cls.from_io(input_file) + + @classmethod + def from_io(cls, stream: IO[str]) -> Self: + """ + parse PKGBUILD from input stream + + Args: + stream: IO[str]: input stream containing PKGBUILD content + + Returns: + Self: constructed instance of self + """ + fields = {} + + parser = shlex.shlex(stream, posix=True, punctuation_chars=True) + while token := parser.get_token(): + try: + key, value = cls._parse_token(token, parser) + fields[key] = value + except StopIteration: + break + + return cls(fields) + + @staticmethod + def _parse_array(parser: shlex.shlex) -> list[str]: + """ + parse array from the PKGBUILD. This method will extract tokens from parser until it matches closing array, + modifying source parser state + + Args: + parser(shlex.shlex): shell parser instance + + Returns: + list[str]: extracted arrays elements + + Raises: + ValueError: if array is not closed + """ + def extract() -> Generator[str, None, None]: + while token := parser.get_token(): + if token == PkgbuildToken.ArrayEnds: + break + yield token + + if token != PkgbuildToken.ArrayEnds: + raise ValueError("No closing array bracket found") + + return list(extract()) + + @staticmethod + def _parse_function(parser: shlex.shlex) -> str: + """ + parse function from the PKGBUILD. This method will extract tokens from parser until it matches closing function, + modifying source parser state. Instead of trying to combine tokens together, it uses positions of the file + and read content again in this range + + Args: + parser(shlex.shlex): shell parser instance + + Returns: + str: function body + + Raises: + ValueError: if function body wasn't found or parser input stream doesn't support position reading + """ + io: IO[str] = parser.instream # type: ignore[assignment] + + # find start and end positions + start_position, end_position = -1, -1 + while token := parser.get_token(): + match token: + case PkgbuildToken.FunctionStarts: + start_position = io.tell() + case PkgbuildToken.FunctionEnds: + end_position = io.tell() + break + + if not 0 < start_position < end_position: + raise ValueError("Function body wasn't found") + + # read the specified interval from source stream + io.seek(start_position - 1) # start from the previous symbol ({) + content = io.read(end_position - start_position + 1) + + return content + + @staticmethod + def _parse_token(token: str, parser: shlex.shlex) -> tuple[str, PkgbuildPatch]: + """ + parse single token to the PKGBUILD field + + Args: + token(str): current token + parser(shlex.shlex): shell parser instance + + Returns: + tuple[str, PkgbuildPatch]: extracted a pair of key and its value + + Raises: + StopIteration: if iteration reaches the end of the file' + """ + # simple assignment rule + if (match := Pkgbuild._STRING_ASSIGNMENT_REGEX.match(token)) is not None: + key = match.group("key") + value = match.group("value") + return key, PkgbuildPatch(key, value) + + match parser.get_token(): + # array processing. Arrays will be sent as "key=", "(", values, ")" + case PkgbuildToken.ArrayStarts if (match := Pkgbuild._ARRAY_ASSIGNMENT_REGEX.match(token)) is not None: + key = match.group("key") + value = Pkgbuild._parse_array(parser) + return key, PkgbuildPatch(key, value) + + # functions processing. Function will be sent as "name", "()", "{", body, "}" + case PkgbuildToken.FunctionDeclaration if Pkgbuild._FUNCTION_DECLARATION_REGEX.match(token): + key = f"{token}{PkgbuildToken.FunctionDeclaration}" + value = Pkgbuild._parse_function(parser) + return token, PkgbuildPatch(key, value) # this is not mistake, assign to token without () + + # some random token received without continuation, lets guess it is empty assignment (i.e. key=) + case other if other is not None: + return Pkgbuild._parse_token(other, parser) + + # reached the end of the parser + case None: + raise StopIteration + + def get_as(self, key: str, return_type: type[T], **kwargs: T | U) -> T | U: + """ + type guard for getting value by key + + Args: + key(str): key name + return_type(type[T]): return type, either ``str`` or ``list[str]`` + default(U): default value to return if no key found + + Returns: + T | U: value associated with key or default value if no value found and fallback is provided + + Raises: + KeyError: if no key found and no default has been provided + """ + del return_type + + if key not in self: + if "default" in kwargs: + return kwargs["default"] + raise KeyError(key) + + return cast(T, self[key]) + + def packages(self) -> dict[str, Self]: + """ + extract properties from internal package functions + + Returns: + dict[str, Self]: map of package name to its inner properties if defined + """ + packages = [self["pkgname"]] if isinstance(self["pkgname"], str) else self["pkgname"] + + def io(package_name: str) -> IO[str]: + # try to read package specific function and fallback to default otherwise + content = self.get_as(f"package_{package_name}", str, default=None) or self.get_as("package", str) + return StringIO(content) + + return {package: self.from_io(io(package)) for package in packages} + + def __getitem__(self, key: str) -> str | list[str]: + """ + get the field of the PKGBUILD + + Args: + key(str): key name + + Returns: + str | list[str]: value by the key + """ + return self.fields[key].substitute(self.variables) + + def __iter__(self) -> Iterator[str]: + """ + iterate over the fields + + Returns: + Iterator[str]: keys iterator + """ + return iter(self.fields) + + def __len__(self) -> int: + """ + get length of the mapping + + Returns: + int: amount of the fields in this PKGBUILD + """ + return len(self.fields) diff --git a/src/ahriman/models/pkgbuild_patch.py b/src/ahriman/models/pkgbuild_patch.py index 808ac8cb..cdb3c407 100644 --- a/src/ahriman/models/pkgbuild_patch.py +++ b/src/ahriman/models/pkgbuild_patch.py @@ -21,6 +21,7 @@ import shlex from dataclasses import dataclass, fields from pathlib import Path +from string import Template from typing import Any, Generator, Self from ahriman.core.utils import dataclass_view, filter_json @@ -167,6 +168,20 @@ class PkgbuildPatch: return f"{self.key} {self.value}" # no quoting enabled here return f"""{self.key}={PkgbuildPatch.quote(self.value)}""" + def substitute(self, variables: dict[str, str]) -> str | list[str]: + """ + substitute variables into the value + + Args: + variables(dict[str, str]): map of variables available for usage + + Returns: + str | list[str]: substituted value. All unknown variables will remain the same + """ + if isinstance(self.value, str): + return Template(self.value).safe_substitute(variables) + return [Template(value).safe_substitute(variables) for value in self.value] + def view(self) -> dict[str, Any]: """ generate json patch view