in-house bytes pkgbuild parser

This commit is contained in:
Evgenii Alekseev 2025-03-28 17:26:00 +02:00
parent d283dccc1e
commit 0fbc361a9e

View File

@ -0,0 +1,162 @@
#
# Copyright (c) 2021-2025 ahriman team.
#
# This file is part of ahriman
# (see https://github.com/arcan1s/ahriman).
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from collections.abc import Iterator
from dataclasses import dataclass
from enum import ReprEnum
from types import SimpleNamespace
from typing import Generator, IO, Self
from ahriman.models.pkgbuild_patch import PkgbuildPatch
class PkgbuildToken(bytes, ReprEnum):
Comment = b"#"
Assignment = b"="
SingleQuote = b"'"
DoubleQuote = b"\""
Space = b" "
NewLine = b"\n"
ParenthesisOpen = b"("
ParenthesisClose = b")"
FunctionStarts = b"function"
FunctionDeclaration = b"()"
BraceOpen = b"{"
BraceClose = b"}"
@dataclass
class PkgbuildWord:
word: bytes
quote: bytes | None
@property
def original(self) -> bytes:
quote = self.quote or b""
return quote + self.word + quote
def __bool__(self) -> bool:
return bool(self.word)
class BytesPkgbuildParser(Iterator[PkgbuildPatch]):
def __init__(self, stream: IO[bytes]) -> None:
self._io = stream
def _next(self, *, declaration: bool) -> tuple[bytes, bytes]:
while token := self._next_token(declaration=declaration):
continue
return token
def _next_token(self, *, declaration: bool) -> tuple[bytes, bytes]:
buffer = b""
while word := self._next_word():
match word:
case PkgbuildWord(PkgbuildToken.Comment, None):
self._io.readline()
case PkgbuildWord(PkgbuildToken.ParenthesisOpen, None):
buffer += PkgbuildToken.ParenthesisOpen
buffer += b"".join(self._next_words_until(PkgbuildWord(PkgbuildToken.ParenthesisClose, None)))
case PkgbuildWord(PkgbuildToken.BraceOpen, None):
buffer += PkgbuildToken.BraceOpen
buffer += b"".join(self._next_words_until(PkgbuildWord(PkgbuildToken.BraceClose, None)))
case PkgbuildWord(PkgbuildToken.FunctionStarts, None) if declaration:
continue
case PkgbuildWord(PkgbuildToken.Assignment | PkgbuildToken.NewLine | PkgbuildToken.Space, None):
return buffer, word.word
case PkgbuildWord(token, _):
buffer += token
raise StopIteration
def _next_word(self) -> PkgbuildWord:
# pass SimpleNamespace as an argument to implement side effects
def generator(quote: SimpleNamespace) -> Generator[bytes, None, None]:
while token := self._io.read(1):
match token:
case(PkgbuildToken.SingleQuote | PkgbuildToken.DoubleQuote) if quote.open is None:
quote.open = token
case closing_quote if closing_quote == quote.open:
return
case value:
yield value
if quote.open is None:
return
if quote.open is not None:
raise ValueError("No closing quotation")
open_quote = SimpleNamespace(open=None)
value = b"".join(generator(open_quote))
return PkgbuildWord(value, open_quote.open)
def _next_words_until(self, ending: PkgbuildWord) -> Generator[bytes, None, None]:
parenthesis = braces = 0
while element := self._next_word():
yield element.original
match element:
case PkgbuildWord(PkgbuildToken.BraceOpen, None):
braces += 1
case PkgbuildWord(PkgbuildToken.BraceClose, None) if braces > 0:
braces -= 1
case PkgbuildWord(PkgbuildToken.ParenthesisOpen, None):
parenthesis += 1
case PkgbuildWord(PkgbuildToken.ParenthesisClose, None) if parenthesis > 0:
parenthesis -= 1
case _ if element == ending:
return
if parenthesis != 0 or braces != 0:
raise ValueError("Unclosed parenthesis and/or braces found")
raise ValueError(f"No matching ending element {ending.word} found")
def parse(self) -> Generator[PkgbuildPatch, None, None]:
"""
parse source stream and yield parsed entries
Yields:
PkgbuildPatch: extracted a PKGBUILD node
"""
yield from self
def __iter__(self) -> Self:
"""
base iterator method
Returns:
Self: iterator instance
"""
return self
def __next__(self) -> PkgbuildPatch:
key, termination = self._next(declaration=True)
value, _ = self._next(declaration=False)
return PkgbuildPatch(key.decode(encoding="utf8"), value.decode(encoding="utf8"))