diff --git a/src/ahriman/core/alpm/bytes_pkgbuild_parser.py b/src/ahriman/core/alpm/bytes_pkgbuild_parser.py new file mode 100644 index 00000000..083d96f2 --- /dev/null +++ b/src/ahriman/core/alpm/bytes_pkgbuild_parser.py @@ -0,0 +1,144 @@ +# +# Copyright (c) 2021-2025 ahriman team. +# +# This file is part of ahriman +# (see https://github.com/arcan1s/ahriman). +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +from collections.abc import Iterator +from dataclasses import dataclass +from enum import ReprEnum +from types import SimpleNamespace +from typing import Generator, IO, Self + +from ahriman.models.pkgbuild_patch import PkgbuildPatch + + +class PkgbuildToken(bytes, ReprEnum): + + Comment = b"#" + Assignment = b"=" + SingleQuote = b"'" + DoubleQuote = b"\"" + Space = b" " + NewLine = b"\n" + + ParenthesisOpen = b"(" + ParenthesisClose = b")" + + FunctionStarts = b"function" + BraceOpen = b"{" + BraceClose = b"}" + + +@dataclass +class PkgbuildWord: + + word: bytes + quote: bytes | None + + @property + def original(self) -> bytes: + quote = self.quote or b"" + return quote + self.word + quote + + def __bool__(self) -> bool: + return bool(self.word) + + +class BytesPkgbuildParser(Iterator[PkgbuildPatch]): + + def __init__(self, stream: IO[bytes]) -> None: + self._io = stream + + def _next(self) -> bytes: + while not (token := self._next_token()): + continue + return token + + def _next_token(self) -> bytes: + def get_words_until(ending: PkgbuildWord) -> Generator[bytes, None, None]: + while (element := self._next_word()) != ending: + yield element.original + + buffer = b"" + while word := self._next_word(): + match word: + case PkgbuildWord(PkgbuildToken.Comment, None): + self._io.readline() + + case PkgbuildWord(PkgbuildToken.ParenthesisOpen, None): + buffer += PkgbuildToken.ParenthesisOpen + buffer += b"".join(get_words_until(PkgbuildWord(PkgbuildToken.ParenthesisClose, None))) + buffer += PkgbuildToken.ParenthesisClose + + case PkgbuildWord(PkgbuildToken.BraceOpen, None): + buffer += PkgbuildToken.BraceOpen + buffer += b"".join(get_words_until(PkgbuildWord(PkgbuildToken.BraceClose, None))) + buffer += PkgbuildToken.BraceClose + + case PkgbuildWord(PkgbuildToken.Assignment | PkgbuildToken.NewLine | PkgbuildToken.Space, None): + return buffer + + case PkgbuildWord(token, _): + buffer += token + + raise StopIteration + + def _next_word(self) -> PkgbuildWord: + # pass SimpleNamespace as an argument to implement side effects + def generator(quote: SimpleNamespace) -> Generator[bytes, None, None]: + while token := self._io.read(1): + match token: + case(PkgbuildToken.SingleQuote | PkgbuildToken.DoubleQuote) if quote.open is None: + quote.open = token + case closing_quote if closing_quote == quote.open: + return + case value: + yield value + if quote.open is None: + return + + if quote.open is not None: + raise ValueError("No closing quotation") + + open_quote = SimpleNamespace(open=None) + value = b"".join(generator(open_quote)) + + return PkgbuildWord(value, open_quote.open) + + def parse(self) -> Generator[PkgbuildPatch, None, None]: + """ + parse source stream and yield parsed entries + + Yields: + PkgbuildPatch: extracted a PKGBUILD node + """ + yield from self + + def __iter__(self) -> Self: + """ + base iterator method + + Returns: + Self: iterator instance + """ + return self + + def __next__(self) -> PkgbuildPatch: + key = self._next() + value = self._next() + + return PkgbuildPatch(key.decode(encoding="utf8"), value.decode(encoding="utf8"))