From 20e11cd7f4c1b5ed587756f5166b24fdb8400456 Mon Sep 17 00:00:00 2001 From: Evgenii Alekseev Date: Fri, 13 Sep 2024 23:42:44 +0300 Subject: [PATCH] add support of array expansion --- src/ahriman/core/alpm/pkgbuild_parser.py | 253 ++++++++++++++++++ src/ahriman/models/package.py | 8 +- src/ahriman/models/pkgbuild.py | 207 ++------------ .../handlers/test_handler_versions.py | 4 +- .../ahriman/core/alpm/test_pkgbuild_parser.py | 0 tests/ahriman/models/test_pkgbuild.py | 0 6 files changed, 281 insertions(+), 191 deletions(-) create mode 100644 src/ahriman/core/alpm/pkgbuild_parser.py create mode 100644 tests/ahriman/core/alpm/test_pkgbuild_parser.py create mode 100644 tests/ahriman/models/test_pkgbuild.py diff --git a/src/ahriman/core/alpm/pkgbuild_parser.py b/src/ahriman/core/alpm/pkgbuild_parser.py new file mode 100644 index 00000000..8e357f70 --- /dev/null +++ b/src/ahriman/core/alpm/pkgbuild_parser.py @@ -0,0 +1,253 @@ +# +# Copyright (c) 2021-2024 ahriman team. +# +# This file is part of ahriman +# (see https://github.com/arcan1s/ahriman). +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +import itertools +import re +import shlex + +from collections.abc import Generator +from enum import StrEnum +from typing import IO + +from ahriman.models.pkgbuild_patch import PkgbuildPatch + + +class PkgbuildToken(StrEnum): + """ + well-known tokens dictionary + + Attributes: + ArrayEnds(PkgbuildToken): (class attribute) array ends token + ArrayStarts(PkgbuildToken): (class attribute) array starts token + Comma(PkgbuildToken): (class attribute) comma token + Comment(PkgbuildToken): (class attribute) comment token + FunctionDeclaration(PkgbuildToken): (class attribute) function declaration token + FunctionEnds(PkgbuildToken): (class attribute) function ends token + FunctionStarts(PkgbuildToken): (class attribute) function starts token + """ + + ArrayStarts = "(" + ArrayEnds = ")" + + Comma = "," + + Comment = "#" + + FunctionDeclaration = "()" + FunctionStarts = "{" + FunctionEnds = "}" + + +class PkgbuildParser(shlex.shlex): + """ + simple pkgbuild reader implementation in pure python, because others suck + """ + + _ARRAY_ASSIGNMENT = re.compile(r"^(?P\w+)=$") + # in addition to usual assignment, functions can have dash + _FUNCTION_DECLARATION = re.compile(r"^(?P[\w-]+)$") + _STRING_ASSIGNMENT = re.compile(r"^(?P\w+)=(?P.+)$") + + def __init__(self, stream: IO[str]) -> None: + """ + default constructor + + Args: + stream(IO[str]): input stream containing PKGBUILD content + """ + shlex.shlex.__init__(self, stream, posix=True, punctuation_chars=True) + self._io = stream # direct access without type casting + + # ignore substitution and extend bash symbols + self.wordchars += "${}#:+-@" + # in case of default behaviour, it will ignore, for example, segment part of url outside of quotes + self.commenters = "" + + @staticmethod + def _expand_array(array: list[str]) -> list[str]: + """ + bash array expansion simulator. It takes raw parsed array and tries to expand constructions like + ``(first prefix-{mid1,mid2}-suffix last)`` into ``(first, prefix-mid1-suffix prefix-mid2-suffix last)`` + + Args: + array(list[str]): input array + + Returns: + list[str]: either source array or expanded array if possible + + Raises: + ValueError: if there are errors in parser + """ + # we are using comma as marker for expansion (if any) + if PkgbuildToken.Comma not in array: + return array + # again sanity check, for expansion there are at least 3 elements (first, last and comma) + if len(array) < 3: + return array + + result = [] + buffer, prefix = [], None + + for index, (first, second) in enumerate(itertools.pairwise(array)): + match (first, second): + # in this case we check if expansion should be started + # this condition matches "prefix{first", "," + case (_, PkgbuildToken.Comma) if PkgbuildToken.FunctionStarts in first: + prefix, part = first.rsplit(PkgbuildToken.FunctionStarts, maxsplit=1) + buffer.append(f"{prefix}{part}") + + # the last element case, it matches either ",", "last}" or ",", "last}suffix" + # in case if there is suffix, it must be appended to all list elements + case (PkgbuildToken.Comma, _) if prefix is not None and PkgbuildToken.FunctionEnds in second: + part, suffix = second.rsplit(PkgbuildToken.FunctionEnds, maxsplit=1) + buffer.append(f"{prefix}{part}") + result.extend([f"{part}{suffix}" for part in buffer]) + # reset state + buffer, prefix = [], None + + # we have already prefix string, so we are in progress of expansion + # we always operate the last element, so this matches ",", "next" + case (PkgbuildToken.Comma, _) if prefix is not None: + buffer.append(f"{prefix}{second}") + + # exactly first element of the list + case (_, _) if prefix is None and index == 0: + result.append(first) + + # any next normal element + case (_, _) if prefix is None: + result.append(second) + + # small sanity check + if prefix is not None: + raise ValueError(f"Could not expand `{array}` as array") + + return result + + def _parse_array(self) -> list[str]: + """ + parse array from the PKGBUILD. This method will extract tokens from parser until it matches closing array, + modifying source parser state + + Returns: + list[str]: extracted arrays elements + + Raises: + ValueError: if array is not closed + """ + def extract() -> Generator[str, None, None]: + while token := self.get_token(): + if token == PkgbuildToken.ArrayEnds: + break + if token == PkgbuildToken.Comment: + self.instream.readline() + continue + yield token + + if token != PkgbuildToken.ArrayEnds: + raise ValueError("No closing array bracket found") + + return self._expand_array(list(extract())) + + def _parse_function(self) -> str: + """ + parse function from the PKGBUILD. This method will extract tokens from parser until it matches closing function, + modifying source parser state. Instead of trying to combine tokens together, it uses positions of the file + and read content again in this range + + Returns: + str: function body + + Raises: + ValueError: if function body wasn't found or parser input stream doesn't support position reading + """ + # find start and end positions + start_position, end_position = -1, -1 + while token := self.get_token(): + match token: + case PkgbuildToken.FunctionStarts: + start_position = self._io.tell() - 1 + case PkgbuildToken.FunctionEnds: + end_position = self._io.tell() + break + + if not 0 < start_position < end_position: + raise ValueError("Function body wasn't found") + + # read the specified interval from source stream + self._io.seek(start_position - 1) # start from the previous symbol + content = self._io.read(end_position - start_position) + + return content + + def _parse_token(self, token: str) -> Generator[PkgbuildPatch, None, None]: + """ + parse single token to the PKGBUILD field + + Args: + token(str): current token + + Yields: + PkgbuildPatch: extracted a PKGBUILD node + """ + # simple assignment rule + if (match := self._STRING_ASSIGNMENT.match(token)) is not None: + key = match.group("key") + value = match.group("value") + yield PkgbuildPatch(key, value) + return + + if token == PkgbuildToken.Comment: + self.instream.readline() + return + + match self.get_token(): + # array processing. Arrays will be sent as "key=", "(", values, ")" + case PkgbuildToken.ArrayStarts if (match := self._ARRAY_ASSIGNMENT.match(token)) is not None: + key = match.group("key") + value = self._parse_array() + yield PkgbuildPatch(key, value) + + # functions processing. Function will be sent as "name", "()", "{", body, "}" + case PkgbuildToken.FunctionDeclaration if self._FUNCTION_DECLARATION.match(token): + key = f"{token}{PkgbuildToken.FunctionDeclaration}" + value = self._parse_function() + yield PkgbuildPatch(key, value) # this is not mistake, assign to token without () + + # special function case, where "(" and ")" are separated tokens, e.g. "pkgver ( )" + case PkgbuildToken.ArrayStarts if self._FUNCTION_DECLARATION.match(token): + next_token = self.get_token() + if next_token == PkgbuildToken.ArrayEnds: # replace closing bracket with "()" + next_token = PkgbuildToken.FunctionDeclaration + self.push_token(next_token) # type: ignore[arg-type] + yield from self._parse_token(token) + + # some random token received without continuation, lets guess it is empty assignment (i.e. key=) + case other if other is not None: + yield from self._parse_token(other) + + def parse(self) -> Generator[PkgbuildPatch, None, None]: + """ + parse source stream and yield parsed entries + + Yields: + PkgbuildPatch: extracted a PKGBUILD node + """ + for token in self: + yield from self._parse_token(token) diff --git a/src/ahriman/models/package.py b/src/ahriman/models/package.py index 23e5f93c..ad20f4ab 100644 --- a/src/ahriman/models/package.py +++ b/src/ahriman/models/package.py @@ -266,7 +266,7 @@ class Package(LazyLogging): ) for package, properties in pkgbuild.packages().items() } - version = full_version(pkgbuild.epoch, pkgbuild.pkgver, pkgbuild.pkgrel) + version = full_version(pkgbuild.get("epoch"), pkgbuild["pkgver"], pkgbuild["pkgrel"]) remote = RemoteSource( source=PackageSource.Local, @@ -277,7 +277,7 @@ class Package(LazyLogging): ) return cls( - base=pkgbuild.pkgbase, + base=pkgbuild["pkgbase"], version=version, remote=remote, packages=packages, @@ -372,7 +372,7 @@ class Package(LazyLogging): yield Path(source) - if install := pkgbuild.get("install"): + if (install := pkgbuild.get("install")) is not None: yield Path(install) @staticmethod @@ -435,7 +435,7 @@ class Package(LazyLogging): pkgbuild = Pkgbuild.from_file(paths.cache_for(self.base) / "PKGBUILD") - return full_version(pkgbuild.epoch, pkgbuild.pkgver, pkgbuild.pkgrel) + return full_version(pkgbuild.get("epoch"), pkgbuild["pkgver"], pkgbuild["pkgrel"]) except Exception: self.logger.exception("cannot determine version of VCS package") finally: diff --git a/src/ahriman/models/pkgbuild.py b/src/ahriman/models/pkgbuild.py index 211c4799..72cd6e32 100644 --- a/src/ahriman/models/pkgbuild.py +++ b/src/ahriman/models/pkgbuild.py @@ -17,43 +17,20 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . # -import re -import shlex - -from collections.abc import Generator, Iterator, Mapping +from collections.abc import Iterator, Mapping from dataclasses import dataclass -from enum import StrEnum from io import StringIO from pathlib import Path from typing import Any, IO, Self +from ahriman.core.alpm.pkgbuild_parser import PkgbuildParser, PkgbuildToken from ahriman.models.pkgbuild_patch import PkgbuildPatch -class PkgbuildToken(StrEnum): - """ - well-known tokens dictionary - - Attributes: - ArrayEnds(PkgbuildToken): (class attribute) array ends token - ArrayStarts(PkgbuildToken): (class attribute) array starts token - FunctionDeclaration(PkgbuildToken): (class attribute) function declaration token - FunctionEnds(PkgbuildToken): (class attribute) function ends token - FunctionStarts(PkgbuildToken): (class attribute) function starts token - """ - - ArrayStarts = "(" - ArrayEnds = ")" - - FunctionDeclaration = "()" - FunctionStarts = "{" - FunctionEnds = "}" - - @dataclass(frozen=True) -class Pkgbuild(Mapping[str, str | list[str]]): +class Pkgbuild(Mapping[str, Any]): """ - simple pkgbuild reader implementation in pure python, because others sucks + model and proxy for PKGBUILD properties Attributes: fields(dict[str, PkgbuildPatch]): PKGBUILD fields @@ -61,11 +38,6 @@ class Pkgbuild(Mapping[str, str | list[str]]): fields: dict[str, PkgbuildPatch] - _ARRAY_ASSIGNMENT = re.compile(r"^(?P\w+)=$") - _STRING_ASSIGNMENT = re.compile(r"^(?P\w+)=(?P.+)$") - # in addition, functions can have dash to usual assignment - _FUNCTION_DECLARATION = re.compile(r"^(?P[\w-]+)$") - @property def variables(self) -> dict[str, str]: """ @@ -106,141 +78,17 @@ class Pkgbuild(Mapping[str, str | list[str]]): Returns: Self: constructed instance of self """ - fields = {} - - parser = shlex.shlex(stream, posix=True, punctuation_chars=True) - # ignore substitution and extend bash symbols - parser.wordchars += "${}#:+" - # in case of default behaviour, it will ignore, for example, segment part of url outside of quotes - parser.commenters = "" - while token := parser.get_token(): - try: - patch = cls._parse_token(token, parser) - fields[patch.key] = patch - except StopIteration: - break + parser = PkgbuildParser(stream) + fields = {patch.key: patch for patch in parser.parse()} # pkgbase is optional field, the pkgname must be used instead if not set # however, pkgname is not presented is "package()" functions which we are parsing here too, # thus, in our terms, it is optional too - if "pkgbase" not in fields: - fields["pkgbase"] = fields.get("pkgname") + if "pkgbase" not in fields and "pkgname" in fields: + fields["pkgbase"] = fields["pkgname"] return cls({key: value for key, value in fields.items() if key}) - @staticmethod - def _parse_array(parser: shlex.shlex) -> list[str]: - """ - parse array from the PKGBUILD. This method will extract tokens from parser until it matches closing array, - modifying source parser state - - Args: - parser(shlex.shlex): shell parser instance - - Returns: - list[str]: extracted arrays elements - - Raises: - ValueError: if array is not closed - """ - def extract() -> Generator[str, None, None]: - while token := parser.get_token(): - if token == PkgbuildToken.ArrayEnds: - break - yield token - - if token != PkgbuildToken.ArrayEnds: - raise ValueError("No closing array bracket found") - - return list(extract()) - - @staticmethod - def _parse_function(parser: shlex.shlex) -> str: - """ - parse function from the PKGBUILD. This method will extract tokens from parser until it matches closing function, - modifying source parser state. Instead of trying to combine tokens together, it uses positions of the file - and read content again in this range - - Args: - parser(shlex.shlex): shell parser instance - - Returns: - str: function body - - Raises: - ValueError: if function body wasn't found or parser input stream doesn't support position reading - """ - io: IO[str] = parser.instream # type: ignore[assignment] - - # find start and end positions - start_position, end_position = -1, -1 - while token := parser.get_token(): - match token: - case PkgbuildToken.FunctionStarts: - start_position = io.tell() - 1 - case PkgbuildToken.FunctionEnds: - end_position = io.tell() - break - - if not 0 < start_position < end_position: - raise ValueError("Function body wasn't found") - - # read the specified interval from source stream - io.seek(start_position - 1) # start from the previous symbol - content = io.read(end_position - start_position) - - return content - - @staticmethod - def _parse_token(token: str, parser: shlex.shlex) -> PkgbuildPatch: - """ - parse single token to the PKGBUILD field - - Args: - token(str): current token - parser(shlex.shlex): shell parser instance - - Returns: - PkgbuildPatch: extracted a PKGBUILD node - - Raises: - StopIteration: if iteration reaches the end of the file - """ - # simple assignment rule - if (match := Pkgbuild._STRING_ASSIGNMENT.match(token)) is not None: - key = match.group("key") - value = match.group("value") - return PkgbuildPatch(key, value) - - match parser.get_token(): - # array processing. Arrays will be sent as "key=", "(", values, ")" - case PkgbuildToken.ArrayStarts if (match := Pkgbuild._ARRAY_ASSIGNMENT.match(token)) is not None: - key = match.group("key") - value = Pkgbuild._parse_array(parser) - return PkgbuildPatch(key, value) - - # functions processing. Function will be sent as "name", "()", "{", body, "}" - case PkgbuildToken.FunctionDeclaration if Pkgbuild._FUNCTION_DECLARATION.match(token): - key = f"{token}{PkgbuildToken.FunctionDeclaration}" - value = Pkgbuild._parse_function(parser) - return PkgbuildPatch(key, value) # this is not mistake, assign to token without () - - # special function case, where "(" and ")" are separated tokens, e.g. "pkgver ( )" - case PkgbuildToken.ArrayStarts if Pkgbuild._FUNCTION_DECLARATION.match(token): - next_token = parser.get_token() - if next_token == PkgbuildToken.ArrayEnds: # replace closing bracket with "()" - next_token = PkgbuildToken.FunctionDeclaration - parser.push_token(next_token) # type: ignore[arg-type] - return Pkgbuild._parse_token(token, parser) - - # some random token received without continuation, lets guess it is empty assignment (i.e. key=) - case other if other is not None: - return Pkgbuild._parse_token(other, parser) - - # reached the end of the parser - case None: - raise StopIteration - def packages(self) -> dict[str, Self]: """ extract properties from internal package functions @@ -252,44 +100,33 @@ class Pkgbuild(Mapping[str, str | list[str]]): def io(package_name: str) -> IO[str]: # try to read package specific function and fallback to default otherwise - # content = self.get_as(f"package_{package_name}") or self.get_as("package") - content = getattr(self, f"package_{package_name}") or self.package + content = self.get(f"package_{package_name}") or self["package"] return StringIO(content) return {package: self.from_io(io(package)) for package in packages} - def __getattr__(self, item: str) -> Any: - """ - proxy method for PKGBUILD properties - - Args: - item(str): property name - - Returns: - Any: attribute by its name - """ - return self[item] - - def __getitem__(self, key: str) -> str | list[str]: + def __getitem__(self, item: str) -> Any: """ get the field of the PKGBUILD. This method tries to get exact key value if possible; if none found, it tries to - fetch function with the same name. And, finally, it returns empty value if nothing found, so this function never - raises an ``KeyError``.exception`` + fetch function with the same name Args: - key(str): key name + item(str): key name Returns: - str | list[str]: value by the key + Any: substituted value by the key + + Raises: + KeyError: if key doesn't exist """ - value = self.fields.get(key) + value = self.fields.get(item) # if the key wasn't found and user didn't ask for function explicitly, we can try to get by function name - if value is None and not key.endswith(PkgbuildToken.FunctionDeclaration): - value = self.fields.get(f"{key}{PkgbuildToken.FunctionDeclaration}") - # if we still didn't find anything, we fall back to empty value (just like shell) - # to avoid recursion here, we can just drop from the method + if value is None and not item.endswith(PkgbuildToken.FunctionDeclaration): + value = self.fields.get(f"{item}{PkgbuildToken.FunctionDeclaration}") + + # if we still didn't find anything, we can just raise the exception if value is None: - return "" + raise KeyError(item) return value.substitute(self.variables) diff --git a/tests/ahriman/application/handlers/test_handler_versions.py b/tests/ahriman/application/handlers/test_handler_versions.py index 73602a42..be64b9c2 100644 --- a/tests/ahriman/application/handlers/test_handler_versions.py +++ b/tests/ahriman/application/handlers/test_handler_versions.py @@ -28,9 +28,9 @@ def test_package_dependencies() -> None: """ must extract package dependencies """ - packages = dict(Versions.package_dependencies("srcinfo")) + packages = dict(Versions.package_dependencies("requests")) assert packages - assert packages.get("parse") is not None + assert packages.get("urllib3") is not None def test_package_dependencies_missing() -> None: diff --git a/tests/ahriman/core/alpm/test_pkgbuild_parser.py b/tests/ahriman/core/alpm/test_pkgbuild_parser.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/ahriman/models/test_pkgbuild.py b/tests/ahriman/models/test_pkgbuild.py new file mode 100644 index 00000000..e69de29b