add support of array expansion

This commit is contained in:
Evgenii Alekseev 2024-09-13 23:42:44 +03:00
parent 36a53c4262
commit 20e11cd7f4
6 changed files with 281 additions and 191 deletions

View File

@ -0,0 +1,253 @@
#
# Copyright (c) 2021-2024 ahriman team.
#
# This file is part of ahriman
# (see https://github.com/arcan1s/ahriman).
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
import itertools
import re
import shlex
from collections.abc import Generator
from enum import StrEnum
from typing import IO
from ahriman.models.pkgbuild_patch import PkgbuildPatch
class PkgbuildToken(StrEnum):
"""
well-known tokens dictionary
Attributes:
ArrayEnds(PkgbuildToken): (class attribute) array ends token
ArrayStarts(PkgbuildToken): (class attribute) array starts token
Comma(PkgbuildToken): (class attribute) comma token
Comment(PkgbuildToken): (class attribute) comment token
FunctionDeclaration(PkgbuildToken): (class attribute) function declaration token
FunctionEnds(PkgbuildToken): (class attribute) function ends token
FunctionStarts(PkgbuildToken): (class attribute) function starts token
"""
ArrayStarts = "("
ArrayEnds = ")"
Comma = ","
Comment = "#"
FunctionDeclaration = "()"
FunctionStarts = "{"
FunctionEnds = "}"
class PkgbuildParser(shlex.shlex):
"""
simple pkgbuild reader implementation in pure python, because others suck
"""
_ARRAY_ASSIGNMENT = re.compile(r"^(?P<key>\w+)=$")
# in addition to usual assignment, functions can have dash
_FUNCTION_DECLARATION = re.compile(r"^(?P<key>[\w-]+)$")
_STRING_ASSIGNMENT = re.compile(r"^(?P<key>\w+)=(?P<value>.+)$")
def __init__(self, stream: IO[str]) -> None:
"""
default constructor
Args:
stream(IO[str]): input stream containing PKGBUILD content
"""
shlex.shlex.__init__(self, stream, posix=True, punctuation_chars=True)
self._io = stream # direct access without type casting
# ignore substitution and extend bash symbols
self.wordchars += "${}#:+-@"
# in case of default behaviour, it will ignore, for example, segment part of url outside of quotes
self.commenters = ""
@staticmethod
def _expand_array(array: list[str]) -> list[str]:
"""
bash array expansion simulator. It takes raw parsed array and tries to expand constructions like
``(first prefix-{mid1,mid2}-suffix last)`` into ``(first, prefix-mid1-suffix prefix-mid2-suffix last)``
Args:
array(list[str]): input array
Returns:
list[str]: either source array or expanded array if possible
Raises:
ValueError: if there are errors in parser
"""
# we are using comma as marker for expansion (if any)
if PkgbuildToken.Comma not in array:
return array
# again sanity check, for expansion there are at least 3 elements (first, last and comma)
if len(array) < 3:
return array
result = []
buffer, prefix = [], None
for index, (first, second) in enumerate(itertools.pairwise(array)):
match (first, second):
# in this case we check if expansion should be started
# this condition matches "prefix{first", ","
case (_, PkgbuildToken.Comma) if PkgbuildToken.FunctionStarts in first:
prefix, part = first.rsplit(PkgbuildToken.FunctionStarts, maxsplit=1)
buffer.append(f"{prefix}{part}")
# the last element case, it matches either ",", "last}" or ",", "last}suffix"
# in case if there is suffix, it must be appended to all list elements
case (PkgbuildToken.Comma, _) if prefix is not None and PkgbuildToken.FunctionEnds in second:
part, suffix = second.rsplit(PkgbuildToken.FunctionEnds, maxsplit=1)
buffer.append(f"{prefix}{part}")
result.extend([f"{part}{suffix}" for part in buffer])
# reset state
buffer, prefix = [], None
# we have already prefix string, so we are in progress of expansion
# we always operate the last element, so this matches ",", "next"
case (PkgbuildToken.Comma, _) if prefix is not None:
buffer.append(f"{prefix}{second}")
# exactly first element of the list
case (_, _) if prefix is None and index == 0:
result.append(first)
# any next normal element
case (_, _) if prefix is None:
result.append(second)
# small sanity check
if prefix is not None:
raise ValueError(f"Could not expand `{array}` as array")
return result
def _parse_array(self) -> list[str]:
"""
parse array from the PKGBUILD. This method will extract tokens from parser until it matches closing array,
modifying source parser state
Returns:
list[str]: extracted arrays elements
Raises:
ValueError: if array is not closed
"""
def extract() -> Generator[str, None, None]:
while token := self.get_token():
if token == PkgbuildToken.ArrayEnds:
break
if token == PkgbuildToken.Comment:
self.instream.readline()
continue
yield token
if token != PkgbuildToken.ArrayEnds:
raise ValueError("No closing array bracket found")
return self._expand_array(list(extract()))
def _parse_function(self) -> str:
"""
parse function from the PKGBUILD. This method will extract tokens from parser until it matches closing function,
modifying source parser state. Instead of trying to combine tokens together, it uses positions of the file
and read content again in this range
Returns:
str: function body
Raises:
ValueError: if function body wasn't found or parser input stream doesn't support position reading
"""
# find start and end positions
start_position, end_position = -1, -1
while token := self.get_token():
match token:
case PkgbuildToken.FunctionStarts:
start_position = self._io.tell() - 1
case PkgbuildToken.FunctionEnds:
end_position = self._io.tell()
break
if not 0 < start_position < end_position:
raise ValueError("Function body wasn't found")
# read the specified interval from source stream
self._io.seek(start_position - 1) # start from the previous symbol
content = self._io.read(end_position - start_position)
return content
def _parse_token(self, token: str) -> Generator[PkgbuildPatch, None, None]:
"""
parse single token to the PKGBUILD field
Args:
token(str): current token
Yields:
PkgbuildPatch: extracted a PKGBUILD node
"""
# simple assignment rule
if (match := self._STRING_ASSIGNMENT.match(token)) is not None:
key = match.group("key")
value = match.group("value")
yield PkgbuildPatch(key, value)
return
if token == PkgbuildToken.Comment:
self.instream.readline()
return
match self.get_token():
# array processing. Arrays will be sent as "key=", "(", values, ")"
case PkgbuildToken.ArrayStarts if (match := self._ARRAY_ASSIGNMENT.match(token)) is not None:
key = match.group("key")
value = self._parse_array()
yield PkgbuildPatch(key, value)
# functions processing. Function will be sent as "name", "()", "{", body, "}"
case PkgbuildToken.FunctionDeclaration if self._FUNCTION_DECLARATION.match(token):
key = f"{token}{PkgbuildToken.FunctionDeclaration}"
value = self._parse_function()
yield PkgbuildPatch(key, value) # this is not mistake, assign to token without ()
# special function case, where "(" and ")" are separated tokens, e.g. "pkgver ( )"
case PkgbuildToken.ArrayStarts if self._FUNCTION_DECLARATION.match(token):
next_token = self.get_token()
if next_token == PkgbuildToken.ArrayEnds: # replace closing bracket with "()"
next_token = PkgbuildToken.FunctionDeclaration
self.push_token(next_token) # type: ignore[arg-type]
yield from self._parse_token(token)
# some random token received without continuation, lets guess it is empty assignment (i.e. key=)
case other if other is not None:
yield from self._parse_token(other)
def parse(self) -> Generator[PkgbuildPatch, None, None]:
"""
parse source stream and yield parsed entries
Yields:
PkgbuildPatch: extracted a PKGBUILD node
"""
for token in self:
yield from self._parse_token(token)

View File

@ -266,7 +266,7 @@ class Package(LazyLogging):
)
for package, properties in pkgbuild.packages().items()
}
version = full_version(pkgbuild.epoch, pkgbuild.pkgver, pkgbuild.pkgrel)
version = full_version(pkgbuild.get("epoch"), pkgbuild["pkgver"], pkgbuild["pkgrel"])
remote = RemoteSource(
source=PackageSource.Local,
@ -277,7 +277,7 @@ class Package(LazyLogging):
)
return cls(
base=pkgbuild.pkgbase,
base=pkgbuild["pkgbase"],
version=version,
remote=remote,
packages=packages,
@ -372,7 +372,7 @@ class Package(LazyLogging):
yield Path(source)
if install := pkgbuild.get("install"):
if (install := pkgbuild.get("install")) is not None:
yield Path(install)
@staticmethod
@ -435,7 +435,7 @@ class Package(LazyLogging):
pkgbuild = Pkgbuild.from_file(paths.cache_for(self.base) / "PKGBUILD")
return full_version(pkgbuild.epoch, pkgbuild.pkgver, pkgbuild.pkgrel)
return full_version(pkgbuild.get("epoch"), pkgbuild["pkgver"], pkgbuild["pkgrel"])
except Exception:
self.logger.exception("cannot determine version of VCS package")
finally:

View File

@ -17,43 +17,20 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
import re
import shlex
from collections.abc import Generator, Iterator, Mapping
from collections.abc import Iterator, Mapping
from dataclasses import dataclass
from enum import StrEnum
from io import StringIO
from pathlib import Path
from typing import Any, IO, Self
from ahriman.core.alpm.pkgbuild_parser import PkgbuildParser, PkgbuildToken
from ahriman.models.pkgbuild_patch import PkgbuildPatch
class PkgbuildToken(StrEnum):
"""
well-known tokens dictionary
Attributes:
ArrayEnds(PkgbuildToken): (class attribute) array ends token
ArrayStarts(PkgbuildToken): (class attribute) array starts token
FunctionDeclaration(PkgbuildToken): (class attribute) function declaration token
FunctionEnds(PkgbuildToken): (class attribute) function ends token
FunctionStarts(PkgbuildToken): (class attribute) function starts token
"""
ArrayStarts = "("
ArrayEnds = ")"
FunctionDeclaration = "()"
FunctionStarts = "{"
FunctionEnds = "}"
@dataclass(frozen=True)
class Pkgbuild(Mapping[str, str | list[str]]):
class Pkgbuild(Mapping[str, Any]):
"""
simple pkgbuild reader implementation in pure python, because others sucks
model and proxy for PKGBUILD properties
Attributes:
fields(dict[str, PkgbuildPatch]): PKGBUILD fields
@ -61,11 +38,6 @@ class Pkgbuild(Mapping[str, str | list[str]]):
fields: dict[str, PkgbuildPatch]
_ARRAY_ASSIGNMENT = re.compile(r"^(?P<key>\w+)=$")
_STRING_ASSIGNMENT = re.compile(r"^(?P<key>\w+)=(?P<value>.+)$")
# in addition, functions can have dash to usual assignment
_FUNCTION_DECLARATION = re.compile(r"^(?P<key>[\w-]+)$")
@property
def variables(self) -> dict[str, str]:
"""
@ -106,141 +78,17 @@ class Pkgbuild(Mapping[str, str | list[str]]):
Returns:
Self: constructed instance of self
"""
fields = {}
parser = shlex.shlex(stream, posix=True, punctuation_chars=True)
# ignore substitution and extend bash symbols
parser.wordchars += "${}#:+"
# in case of default behaviour, it will ignore, for example, segment part of url outside of quotes
parser.commenters = ""
while token := parser.get_token():
try:
patch = cls._parse_token(token, parser)
fields[patch.key] = patch
except StopIteration:
break
parser = PkgbuildParser(stream)
fields = {patch.key: patch for patch in parser.parse()}
# pkgbase is optional field, the pkgname must be used instead if not set
# however, pkgname is not presented is "package()" functions which we are parsing here too,
# thus, in our terms, it is optional too
if "pkgbase" not in fields:
fields["pkgbase"] = fields.get("pkgname")
if "pkgbase" not in fields and "pkgname" in fields:
fields["pkgbase"] = fields["pkgname"]
return cls({key: value for key, value in fields.items() if key})
@staticmethod
def _parse_array(parser: shlex.shlex) -> list[str]:
"""
parse array from the PKGBUILD. This method will extract tokens from parser until it matches closing array,
modifying source parser state
Args:
parser(shlex.shlex): shell parser instance
Returns:
list[str]: extracted arrays elements
Raises:
ValueError: if array is not closed
"""
def extract() -> Generator[str, None, None]:
while token := parser.get_token():
if token == PkgbuildToken.ArrayEnds:
break
yield token
if token != PkgbuildToken.ArrayEnds:
raise ValueError("No closing array bracket found")
return list(extract())
@staticmethod
def _parse_function(parser: shlex.shlex) -> str:
"""
parse function from the PKGBUILD. This method will extract tokens from parser until it matches closing function,
modifying source parser state. Instead of trying to combine tokens together, it uses positions of the file
and read content again in this range
Args:
parser(shlex.shlex): shell parser instance
Returns:
str: function body
Raises:
ValueError: if function body wasn't found or parser input stream doesn't support position reading
"""
io: IO[str] = parser.instream # type: ignore[assignment]
# find start and end positions
start_position, end_position = -1, -1
while token := parser.get_token():
match token:
case PkgbuildToken.FunctionStarts:
start_position = io.tell() - 1
case PkgbuildToken.FunctionEnds:
end_position = io.tell()
break
if not 0 < start_position < end_position:
raise ValueError("Function body wasn't found")
# read the specified interval from source stream
io.seek(start_position - 1) # start from the previous symbol
content = io.read(end_position - start_position)
return content
@staticmethod
def _parse_token(token: str, parser: shlex.shlex) -> PkgbuildPatch:
"""
parse single token to the PKGBUILD field
Args:
token(str): current token
parser(shlex.shlex): shell parser instance
Returns:
PkgbuildPatch: extracted a PKGBUILD node
Raises:
StopIteration: if iteration reaches the end of the file
"""
# simple assignment rule
if (match := Pkgbuild._STRING_ASSIGNMENT.match(token)) is not None:
key = match.group("key")
value = match.group("value")
return PkgbuildPatch(key, value)
match parser.get_token():
# array processing. Arrays will be sent as "key=", "(", values, ")"
case PkgbuildToken.ArrayStarts if (match := Pkgbuild._ARRAY_ASSIGNMENT.match(token)) is not None:
key = match.group("key")
value = Pkgbuild._parse_array(parser)
return PkgbuildPatch(key, value)
# functions processing. Function will be sent as "name", "()", "{", body, "}"
case PkgbuildToken.FunctionDeclaration if Pkgbuild._FUNCTION_DECLARATION.match(token):
key = f"{token}{PkgbuildToken.FunctionDeclaration}"
value = Pkgbuild._parse_function(parser)
return PkgbuildPatch(key, value) # this is not mistake, assign to token without ()
# special function case, where "(" and ")" are separated tokens, e.g. "pkgver ( )"
case PkgbuildToken.ArrayStarts if Pkgbuild._FUNCTION_DECLARATION.match(token):
next_token = parser.get_token()
if next_token == PkgbuildToken.ArrayEnds: # replace closing bracket with "()"
next_token = PkgbuildToken.FunctionDeclaration
parser.push_token(next_token) # type: ignore[arg-type]
return Pkgbuild._parse_token(token, parser)
# some random token received without continuation, lets guess it is empty assignment (i.e. key=)
case other if other is not None:
return Pkgbuild._parse_token(other, parser)
# reached the end of the parser
case None:
raise StopIteration
def packages(self) -> dict[str, Self]:
"""
extract properties from internal package functions
@ -252,44 +100,33 @@ class Pkgbuild(Mapping[str, str | list[str]]):
def io(package_name: str) -> IO[str]:
# try to read package specific function and fallback to default otherwise
# content = self.get_as(f"package_{package_name}") or self.get_as("package")
content = getattr(self, f"package_{package_name}") or self.package
content = self.get(f"package_{package_name}") or self["package"]
return StringIO(content)
return {package: self.from_io(io(package)) for package in packages}
def __getattr__(self, item: str) -> Any:
"""
proxy method for PKGBUILD properties
Args:
item(str): property name
Returns:
Any: attribute by its name
"""
return self[item]
def __getitem__(self, key: str) -> str | list[str]:
def __getitem__(self, item: str) -> Any:
"""
get the field of the PKGBUILD. This method tries to get exact key value if possible; if none found, it tries to
fetch function with the same name. And, finally, it returns empty value if nothing found, so this function never
raises an ``KeyError``.exception``
fetch function with the same name
Args:
key(str): key name
item(str): key name
Returns:
str | list[str]: value by the key
Any: substituted value by the key
Raises:
KeyError: if key doesn't exist
"""
value = self.fields.get(key)
value = self.fields.get(item)
# if the key wasn't found and user didn't ask for function explicitly, we can try to get by function name
if value is None and not key.endswith(PkgbuildToken.FunctionDeclaration):
value = self.fields.get(f"{key}{PkgbuildToken.FunctionDeclaration}")
# if we still didn't find anything, we fall back to empty value (just like shell)
# to avoid recursion here, we can just drop from the method
if value is None and not item.endswith(PkgbuildToken.FunctionDeclaration):
value = self.fields.get(f"{item}{PkgbuildToken.FunctionDeclaration}")
# if we still didn't find anything, we can just raise the exception
if value is None:
return ""
raise KeyError(item)
return value.substitute(self.variables)

View File

@ -28,9 +28,9 @@ def test_package_dependencies() -> None:
"""
must extract package dependencies
"""
packages = dict(Versions.package_dependencies("srcinfo"))
packages = dict(Versions.package_dependencies("requests"))
assert packages
assert packages.get("parse") is not None
assert packages.get("urllib3") is not None
def test_package_dependencies_missing() -> None:

View File