diff --git a/src/ahriman/core/alpm/pkgbuild_parser.py b/src/ahriman/core/alpm/pkgbuild_parser.py
new file mode 100644
index 00000000..8e357f70
--- /dev/null
+++ b/src/ahriman/core/alpm/pkgbuild_parser.py
@@ -0,0 +1,253 @@
+#
+# Copyright (c) 2021-2024 ahriman team.
+#
+# This file is part of ahriman
+# (see https://github.com/arcan1s/ahriman).
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+#
+import itertools
+import re
+import shlex
+
+from collections.abc import Generator
+from enum import StrEnum
+from typing import IO
+
+from ahriman.models.pkgbuild_patch import PkgbuildPatch
+
+
+class PkgbuildToken(StrEnum):
+ """
+ well-known tokens dictionary
+
+ Attributes:
+ ArrayEnds(PkgbuildToken): (class attribute) array ends token
+ ArrayStarts(PkgbuildToken): (class attribute) array starts token
+ Comma(PkgbuildToken): (class attribute) comma token
+ Comment(PkgbuildToken): (class attribute) comment token
+ FunctionDeclaration(PkgbuildToken): (class attribute) function declaration token
+ FunctionEnds(PkgbuildToken): (class attribute) function ends token
+ FunctionStarts(PkgbuildToken): (class attribute) function starts token
+ """
+
+ ArrayStarts = "("
+ ArrayEnds = ")"
+
+ Comma = ","
+
+ Comment = "#"
+
+ FunctionDeclaration = "()"
+ FunctionStarts = "{"
+ FunctionEnds = "}"
+
+
+class PkgbuildParser(shlex.shlex):
+ """
+ simple pkgbuild reader implementation in pure python, because others suck
+ """
+
+ _ARRAY_ASSIGNMENT = re.compile(r"^(?P\w+)=$")
+ # in addition to usual assignment, functions can have dash
+ _FUNCTION_DECLARATION = re.compile(r"^(?P[\w-]+)$")
+ _STRING_ASSIGNMENT = re.compile(r"^(?P\w+)=(?P.+)$")
+
+ def __init__(self, stream: IO[str]) -> None:
+ """
+ default constructor
+
+ Args:
+ stream(IO[str]): input stream containing PKGBUILD content
+ """
+ shlex.shlex.__init__(self, stream, posix=True, punctuation_chars=True)
+ self._io = stream # direct access without type casting
+
+ # ignore substitution and extend bash symbols
+ self.wordchars += "${}#:+-@"
+ # in case of default behaviour, it will ignore, for example, segment part of url outside of quotes
+ self.commenters = ""
+
+ @staticmethod
+ def _expand_array(array: list[str]) -> list[str]:
+ """
+ bash array expansion simulator. It takes raw parsed array and tries to expand constructions like
+ ``(first prefix-{mid1,mid2}-suffix last)`` into ``(first, prefix-mid1-suffix prefix-mid2-suffix last)``
+
+ Args:
+ array(list[str]): input array
+
+ Returns:
+ list[str]: either source array or expanded array if possible
+
+ Raises:
+ ValueError: if there are errors in parser
+ """
+ # we are using comma as marker for expansion (if any)
+ if PkgbuildToken.Comma not in array:
+ return array
+ # again sanity check, for expansion there are at least 3 elements (first, last and comma)
+ if len(array) < 3:
+ return array
+
+ result = []
+ buffer, prefix = [], None
+
+ for index, (first, second) in enumerate(itertools.pairwise(array)):
+ match (first, second):
+ # in this case we check if expansion should be started
+ # this condition matches "prefix{first", ","
+ case (_, PkgbuildToken.Comma) if PkgbuildToken.FunctionStarts in first:
+ prefix, part = first.rsplit(PkgbuildToken.FunctionStarts, maxsplit=1)
+ buffer.append(f"{prefix}{part}")
+
+ # the last element case, it matches either ",", "last}" or ",", "last}suffix"
+ # in case if there is suffix, it must be appended to all list elements
+ case (PkgbuildToken.Comma, _) if prefix is not None and PkgbuildToken.FunctionEnds in second:
+ part, suffix = second.rsplit(PkgbuildToken.FunctionEnds, maxsplit=1)
+ buffer.append(f"{prefix}{part}")
+ result.extend([f"{part}{suffix}" for part in buffer])
+ # reset state
+ buffer, prefix = [], None
+
+ # we have already prefix string, so we are in progress of expansion
+ # we always operate the last element, so this matches ",", "next"
+ case (PkgbuildToken.Comma, _) if prefix is not None:
+ buffer.append(f"{prefix}{second}")
+
+ # exactly first element of the list
+ case (_, _) if prefix is None and index == 0:
+ result.append(first)
+
+ # any next normal element
+ case (_, _) if prefix is None:
+ result.append(second)
+
+ # small sanity check
+ if prefix is not None:
+ raise ValueError(f"Could not expand `{array}` as array")
+
+ return result
+
+ def _parse_array(self) -> list[str]:
+ """
+ parse array from the PKGBUILD. This method will extract tokens from parser until it matches closing array,
+ modifying source parser state
+
+ Returns:
+ list[str]: extracted arrays elements
+
+ Raises:
+ ValueError: if array is not closed
+ """
+ def extract() -> Generator[str, None, None]:
+ while token := self.get_token():
+ if token == PkgbuildToken.ArrayEnds:
+ break
+ if token == PkgbuildToken.Comment:
+ self.instream.readline()
+ continue
+ yield token
+
+ if token != PkgbuildToken.ArrayEnds:
+ raise ValueError("No closing array bracket found")
+
+ return self._expand_array(list(extract()))
+
+ def _parse_function(self) -> str:
+ """
+ parse function from the PKGBUILD. This method will extract tokens from parser until it matches closing function,
+ modifying source parser state. Instead of trying to combine tokens together, it uses positions of the file
+ and read content again in this range
+
+ Returns:
+ str: function body
+
+ Raises:
+ ValueError: if function body wasn't found or parser input stream doesn't support position reading
+ """
+ # find start and end positions
+ start_position, end_position = -1, -1
+ while token := self.get_token():
+ match token:
+ case PkgbuildToken.FunctionStarts:
+ start_position = self._io.tell() - 1
+ case PkgbuildToken.FunctionEnds:
+ end_position = self._io.tell()
+ break
+
+ if not 0 < start_position < end_position:
+ raise ValueError("Function body wasn't found")
+
+ # read the specified interval from source stream
+ self._io.seek(start_position - 1) # start from the previous symbol
+ content = self._io.read(end_position - start_position)
+
+ return content
+
+ def _parse_token(self, token: str) -> Generator[PkgbuildPatch, None, None]:
+ """
+ parse single token to the PKGBUILD field
+
+ Args:
+ token(str): current token
+
+ Yields:
+ PkgbuildPatch: extracted a PKGBUILD node
+ """
+ # simple assignment rule
+ if (match := self._STRING_ASSIGNMENT.match(token)) is not None:
+ key = match.group("key")
+ value = match.group("value")
+ yield PkgbuildPatch(key, value)
+ return
+
+ if token == PkgbuildToken.Comment:
+ self.instream.readline()
+ return
+
+ match self.get_token():
+ # array processing. Arrays will be sent as "key=", "(", values, ")"
+ case PkgbuildToken.ArrayStarts if (match := self._ARRAY_ASSIGNMENT.match(token)) is not None:
+ key = match.group("key")
+ value = self._parse_array()
+ yield PkgbuildPatch(key, value)
+
+ # functions processing. Function will be sent as "name", "()", "{", body, "}"
+ case PkgbuildToken.FunctionDeclaration if self._FUNCTION_DECLARATION.match(token):
+ key = f"{token}{PkgbuildToken.FunctionDeclaration}"
+ value = self._parse_function()
+ yield PkgbuildPatch(key, value) # this is not mistake, assign to token without ()
+
+ # special function case, where "(" and ")" are separated tokens, e.g. "pkgver ( )"
+ case PkgbuildToken.ArrayStarts if self._FUNCTION_DECLARATION.match(token):
+ next_token = self.get_token()
+ if next_token == PkgbuildToken.ArrayEnds: # replace closing bracket with "()"
+ next_token = PkgbuildToken.FunctionDeclaration
+ self.push_token(next_token) # type: ignore[arg-type]
+ yield from self._parse_token(token)
+
+ # some random token received without continuation, lets guess it is empty assignment (i.e. key=)
+ case other if other is not None:
+ yield from self._parse_token(other)
+
+ def parse(self) -> Generator[PkgbuildPatch, None, None]:
+ """
+ parse source stream and yield parsed entries
+
+ Yields:
+ PkgbuildPatch: extracted a PKGBUILD node
+ """
+ for token in self:
+ yield from self._parse_token(token)
diff --git a/src/ahriman/models/package.py b/src/ahriman/models/package.py
index 23e5f93c..ad20f4ab 100644
--- a/src/ahriman/models/package.py
+++ b/src/ahriman/models/package.py
@@ -266,7 +266,7 @@ class Package(LazyLogging):
)
for package, properties in pkgbuild.packages().items()
}
- version = full_version(pkgbuild.epoch, pkgbuild.pkgver, pkgbuild.pkgrel)
+ version = full_version(pkgbuild.get("epoch"), pkgbuild["pkgver"], pkgbuild["pkgrel"])
remote = RemoteSource(
source=PackageSource.Local,
@@ -277,7 +277,7 @@ class Package(LazyLogging):
)
return cls(
- base=pkgbuild.pkgbase,
+ base=pkgbuild["pkgbase"],
version=version,
remote=remote,
packages=packages,
@@ -372,7 +372,7 @@ class Package(LazyLogging):
yield Path(source)
- if install := pkgbuild.get("install"):
+ if (install := pkgbuild.get("install")) is not None:
yield Path(install)
@staticmethod
@@ -435,7 +435,7 @@ class Package(LazyLogging):
pkgbuild = Pkgbuild.from_file(paths.cache_for(self.base) / "PKGBUILD")
- return full_version(pkgbuild.epoch, pkgbuild.pkgver, pkgbuild.pkgrel)
+ return full_version(pkgbuild.get("epoch"), pkgbuild["pkgver"], pkgbuild["pkgrel"])
except Exception:
self.logger.exception("cannot determine version of VCS package")
finally:
diff --git a/src/ahriman/models/pkgbuild.py b/src/ahriman/models/pkgbuild.py
index 211c4799..72cd6e32 100644
--- a/src/ahriman/models/pkgbuild.py
+++ b/src/ahriman/models/pkgbuild.py
@@ -17,43 +17,20 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
#
-import re
-import shlex
-
-from collections.abc import Generator, Iterator, Mapping
+from collections.abc import Iterator, Mapping
from dataclasses import dataclass
-from enum import StrEnum
from io import StringIO
from pathlib import Path
from typing import Any, IO, Self
+from ahriman.core.alpm.pkgbuild_parser import PkgbuildParser, PkgbuildToken
from ahriman.models.pkgbuild_patch import PkgbuildPatch
-class PkgbuildToken(StrEnum):
- """
- well-known tokens dictionary
-
- Attributes:
- ArrayEnds(PkgbuildToken): (class attribute) array ends token
- ArrayStarts(PkgbuildToken): (class attribute) array starts token
- FunctionDeclaration(PkgbuildToken): (class attribute) function declaration token
- FunctionEnds(PkgbuildToken): (class attribute) function ends token
- FunctionStarts(PkgbuildToken): (class attribute) function starts token
- """
-
- ArrayStarts = "("
- ArrayEnds = ")"
-
- FunctionDeclaration = "()"
- FunctionStarts = "{"
- FunctionEnds = "}"
-
-
@dataclass(frozen=True)
-class Pkgbuild(Mapping[str, str | list[str]]):
+class Pkgbuild(Mapping[str, Any]):
"""
- simple pkgbuild reader implementation in pure python, because others sucks
+ model and proxy for PKGBUILD properties
Attributes:
fields(dict[str, PkgbuildPatch]): PKGBUILD fields
@@ -61,11 +38,6 @@ class Pkgbuild(Mapping[str, str | list[str]]):
fields: dict[str, PkgbuildPatch]
- _ARRAY_ASSIGNMENT = re.compile(r"^(?P\w+)=$")
- _STRING_ASSIGNMENT = re.compile(r"^(?P\w+)=(?P.+)$")
- # in addition, functions can have dash to usual assignment
- _FUNCTION_DECLARATION = re.compile(r"^(?P[\w-]+)$")
-
@property
def variables(self) -> dict[str, str]:
"""
@@ -106,141 +78,17 @@ class Pkgbuild(Mapping[str, str | list[str]]):
Returns:
Self: constructed instance of self
"""
- fields = {}
-
- parser = shlex.shlex(stream, posix=True, punctuation_chars=True)
- # ignore substitution and extend bash symbols
- parser.wordchars += "${}#:+"
- # in case of default behaviour, it will ignore, for example, segment part of url outside of quotes
- parser.commenters = ""
- while token := parser.get_token():
- try:
- patch = cls._parse_token(token, parser)
- fields[patch.key] = patch
- except StopIteration:
- break
+ parser = PkgbuildParser(stream)
+ fields = {patch.key: patch for patch in parser.parse()}
# pkgbase is optional field, the pkgname must be used instead if not set
# however, pkgname is not presented is "package()" functions which we are parsing here too,
# thus, in our terms, it is optional too
- if "pkgbase" not in fields:
- fields["pkgbase"] = fields.get("pkgname")
+ if "pkgbase" not in fields and "pkgname" in fields:
+ fields["pkgbase"] = fields["pkgname"]
return cls({key: value for key, value in fields.items() if key})
- @staticmethod
- def _parse_array(parser: shlex.shlex) -> list[str]:
- """
- parse array from the PKGBUILD. This method will extract tokens from parser until it matches closing array,
- modifying source parser state
-
- Args:
- parser(shlex.shlex): shell parser instance
-
- Returns:
- list[str]: extracted arrays elements
-
- Raises:
- ValueError: if array is not closed
- """
- def extract() -> Generator[str, None, None]:
- while token := parser.get_token():
- if token == PkgbuildToken.ArrayEnds:
- break
- yield token
-
- if token != PkgbuildToken.ArrayEnds:
- raise ValueError("No closing array bracket found")
-
- return list(extract())
-
- @staticmethod
- def _parse_function(parser: shlex.shlex) -> str:
- """
- parse function from the PKGBUILD. This method will extract tokens from parser until it matches closing function,
- modifying source parser state. Instead of trying to combine tokens together, it uses positions of the file
- and read content again in this range
-
- Args:
- parser(shlex.shlex): shell parser instance
-
- Returns:
- str: function body
-
- Raises:
- ValueError: if function body wasn't found or parser input stream doesn't support position reading
- """
- io: IO[str] = parser.instream # type: ignore[assignment]
-
- # find start and end positions
- start_position, end_position = -1, -1
- while token := parser.get_token():
- match token:
- case PkgbuildToken.FunctionStarts:
- start_position = io.tell() - 1
- case PkgbuildToken.FunctionEnds:
- end_position = io.tell()
- break
-
- if not 0 < start_position < end_position:
- raise ValueError("Function body wasn't found")
-
- # read the specified interval from source stream
- io.seek(start_position - 1) # start from the previous symbol
- content = io.read(end_position - start_position)
-
- return content
-
- @staticmethod
- def _parse_token(token: str, parser: shlex.shlex) -> PkgbuildPatch:
- """
- parse single token to the PKGBUILD field
-
- Args:
- token(str): current token
- parser(shlex.shlex): shell parser instance
-
- Returns:
- PkgbuildPatch: extracted a PKGBUILD node
-
- Raises:
- StopIteration: if iteration reaches the end of the file
- """
- # simple assignment rule
- if (match := Pkgbuild._STRING_ASSIGNMENT.match(token)) is not None:
- key = match.group("key")
- value = match.group("value")
- return PkgbuildPatch(key, value)
-
- match parser.get_token():
- # array processing. Arrays will be sent as "key=", "(", values, ")"
- case PkgbuildToken.ArrayStarts if (match := Pkgbuild._ARRAY_ASSIGNMENT.match(token)) is not None:
- key = match.group("key")
- value = Pkgbuild._parse_array(parser)
- return PkgbuildPatch(key, value)
-
- # functions processing. Function will be sent as "name", "()", "{", body, "}"
- case PkgbuildToken.FunctionDeclaration if Pkgbuild._FUNCTION_DECLARATION.match(token):
- key = f"{token}{PkgbuildToken.FunctionDeclaration}"
- value = Pkgbuild._parse_function(parser)
- return PkgbuildPatch(key, value) # this is not mistake, assign to token without ()
-
- # special function case, where "(" and ")" are separated tokens, e.g. "pkgver ( )"
- case PkgbuildToken.ArrayStarts if Pkgbuild._FUNCTION_DECLARATION.match(token):
- next_token = parser.get_token()
- if next_token == PkgbuildToken.ArrayEnds: # replace closing bracket with "()"
- next_token = PkgbuildToken.FunctionDeclaration
- parser.push_token(next_token) # type: ignore[arg-type]
- return Pkgbuild._parse_token(token, parser)
-
- # some random token received without continuation, lets guess it is empty assignment (i.e. key=)
- case other if other is not None:
- return Pkgbuild._parse_token(other, parser)
-
- # reached the end of the parser
- case None:
- raise StopIteration
-
def packages(self) -> dict[str, Self]:
"""
extract properties from internal package functions
@@ -252,44 +100,33 @@ class Pkgbuild(Mapping[str, str | list[str]]):
def io(package_name: str) -> IO[str]:
# try to read package specific function and fallback to default otherwise
- # content = self.get_as(f"package_{package_name}") or self.get_as("package")
- content = getattr(self, f"package_{package_name}") or self.package
+ content = self.get(f"package_{package_name}") or self["package"]
return StringIO(content)
return {package: self.from_io(io(package)) for package in packages}
- def __getattr__(self, item: str) -> Any:
- """
- proxy method for PKGBUILD properties
-
- Args:
- item(str): property name
-
- Returns:
- Any: attribute by its name
- """
- return self[item]
-
- def __getitem__(self, key: str) -> str | list[str]:
+ def __getitem__(self, item: str) -> Any:
"""
get the field of the PKGBUILD. This method tries to get exact key value if possible; if none found, it tries to
- fetch function with the same name. And, finally, it returns empty value if nothing found, so this function never
- raises an ``KeyError``.exception``
+ fetch function with the same name
Args:
- key(str): key name
+ item(str): key name
Returns:
- str | list[str]: value by the key
+ Any: substituted value by the key
+
+ Raises:
+ KeyError: if key doesn't exist
"""
- value = self.fields.get(key)
+ value = self.fields.get(item)
# if the key wasn't found and user didn't ask for function explicitly, we can try to get by function name
- if value is None and not key.endswith(PkgbuildToken.FunctionDeclaration):
- value = self.fields.get(f"{key}{PkgbuildToken.FunctionDeclaration}")
- # if we still didn't find anything, we fall back to empty value (just like shell)
- # to avoid recursion here, we can just drop from the method
+ if value is None and not item.endswith(PkgbuildToken.FunctionDeclaration):
+ value = self.fields.get(f"{item}{PkgbuildToken.FunctionDeclaration}")
+
+ # if we still didn't find anything, we can just raise the exception
if value is None:
- return ""
+ raise KeyError(item)
return value.substitute(self.variables)
diff --git a/tests/ahriman/application/handlers/test_handler_versions.py b/tests/ahriman/application/handlers/test_handler_versions.py
index 73602a42..be64b9c2 100644
--- a/tests/ahriman/application/handlers/test_handler_versions.py
+++ b/tests/ahriman/application/handlers/test_handler_versions.py
@@ -28,9 +28,9 @@ def test_package_dependencies() -> None:
"""
must extract package dependencies
"""
- packages = dict(Versions.package_dependencies("srcinfo"))
+ packages = dict(Versions.package_dependencies("requests"))
assert packages
- assert packages.get("parse") is not None
+ assert packages.get("urllib3") is not None
def test_package_dependencies_missing() -> None:
diff --git a/tests/ahriman/core/alpm/test_pkgbuild_parser.py b/tests/ahriman/core/alpm/test_pkgbuild_parser.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/ahriman/models/test_pkgbuild.py b/tests/ahriman/models/test_pkgbuild.py
new file mode 100644
index 00000000..e69de29b