From 0fbc361a9e7316227fa71f36ef1d273e1ecd23eb Mon Sep 17 00:00:00 2001 From: Evgenii Alekseev Date: Fri, 28 Mar 2025 17:26:00 +0200 Subject: [PATCH] in-house bytes pkgbuild parser --- .../core/alpm/bytes_pkgbuild_parser.py | 162 ++++++++++++++++++ 1 file changed, 162 insertions(+) create mode 100644 src/ahriman/core/alpm/bytes_pkgbuild_parser.py diff --git a/src/ahriman/core/alpm/bytes_pkgbuild_parser.py b/src/ahriman/core/alpm/bytes_pkgbuild_parser.py new file mode 100644 index 00000000..c54724d8 --- /dev/null +++ b/src/ahriman/core/alpm/bytes_pkgbuild_parser.py @@ -0,0 +1,162 @@ +# +# Copyright (c) 2021-2025 ahriman team. +# +# This file is part of ahriman +# (see https://github.com/arcan1s/ahriman). +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +from collections.abc import Iterator +from dataclasses import dataclass +from enum import ReprEnum +from types import SimpleNamespace +from typing import Generator, IO, Self + +from ahriman.models.pkgbuild_patch import PkgbuildPatch + + +class PkgbuildToken(bytes, ReprEnum): + + Comment = b"#" + Assignment = b"=" + SingleQuote = b"'" + DoubleQuote = b"\"" + Space = b" " + NewLine = b"\n" + + ParenthesisOpen = b"(" + ParenthesisClose = b")" + + FunctionStarts = b"function" + FunctionDeclaration = b"()" + BraceOpen = b"{" + BraceClose = b"}" + + +@dataclass +class PkgbuildWord: + + word: bytes + quote: bytes | None + + @property + def original(self) -> bytes: + quote = self.quote or b"" + return quote + self.word + quote + + def __bool__(self) -> bool: + return bool(self.word) + + +class BytesPkgbuildParser(Iterator[PkgbuildPatch]): + + def __init__(self, stream: IO[bytes]) -> None: + self._io = stream + + def _next(self, *, declaration: bool) -> tuple[bytes, bytes]: + while token := self._next_token(declaration=declaration): + continue + return token + + def _next_token(self, *, declaration: bool) -> tuple[bytes, bytes]: + buffer = b"" + while word := self._next_word(): + match word: + case PkgbuildWord(PkgbuildToken.Comment, None): + self._io.readline() + + case PkgbuildWord(PkgbuildToken.ParenthesisOpen, None): + buffer += PkgbuildToken.ParenthesisOpen + buffer += b"".join(self._next_words_until(PkgbuildWord(PkgbuildToken.ParenthesisClose, None))) + + case PkgbuildWord(PkgbuildToken.BraceOpen, None): + buffer += PkgbuildToken.BraceOpen + buffer += b"".join(self._next_words_until(PkgbuildWord(PkgbuildToken.BraceClose, None))) + + case PkgbuildWord(PkgbuildToken.FunctionStarts, None) if declaration: + continue + + case PkgbuildWord(PkgbuildToken.Assignment | PkgbuildToken.NewLine | PkgbuildToken.Space, None): + return buffer, word.word + + case PkgbuildWord(token, _): + buffer += token + + raise StopIteration + + def _next_word(self) -> PkgbuildWord: + # pass SimpleNamespace as an argument to implement side effects + def generator(quote: SimpleNamespace) -> Generator[bytes, None, None]: + while token := self._io.read(1): + match token: + case(PkgbuildToken.SingleQuote | PkgbuildToken.DoubleQuote) if quote.open is None: + quote.open = token + case closing_quote if closing_quote == quote.open: + return + case value: + yield value + if quote.open is None: + return + + if quote.open is not None: + raise ValueError("No closing quotation") + + open_quote = SimpleNamespace(open=None) + value = b"".join(generator(open_quote)) + + return PkgbuildWord(value, open_quote.open) + + def _next_words_until(self, ending: PkgbuildWord) -> Generator[bytes, None, None]: + parenthesis = braces = 0 + while element := self._next_word(): + yield element.original + match element: + case PkgbuildWord(PkgbuildToken.BraceOpen, None): + braces += 1 + case PkgbuildWord(PkgbuildToken.BraceClose, None) if braces > 0: + braces -= 1 + case PkgbuildWord(PkgbuildToken.ParenthesisOpen, None): + parenthesis += 1 + case PkgbuildWord(PkgbuildToken.ParenthesisClose, None) if parenthesis > 0: + parenthesis -= 1 + case _ if element == ending: + return + + if parenthesis != 0 or braces != 0: + raise ValueError("Unclosed parenthesis and/or braces found") + raise ValueError(f"No matching ending element {ending.word} found") + + def parse(self) -> Generator[PkgbuildPatch, None, None]: + """ + parse source stream and yield parsed entries + + Yields: + PkgbuildPatch: extracted a PKGBUILD node + """ + yield from self + + def __iter__(self) -> Self: + """ + base iterator method + + Returns: + Self: iterator instance + """ + return self + + def __next__(self) -> PkgbuildPatch: + key, termination = self._next(declaration=True) + value, _ = self._next(declaration=False) + + return PkgbuildPatch(key.decode(encoding="utf8"), value.decode(encoding="utf8"))