From 58a1fd02f8f1f1e83bed98c4948ffcdad8dd9ca5 Mon Sep 17 00:00:00 2001 From: Evgenii Alekseev Date: Mon, 9 Sep 2024 18:43:39 +0300 Subject: [PATCH] pkgbuild parser impl --- src/ahriman/models/pkgbuild.py | 154 +++++++++++++++++++++++++++++++++ 1 file changed, 154 insertions(+) create mode 100644 src/ahriman/models/pkgbuild.py diff --git a/src/ahriman/models/pkgbuild.py b/src/ahriman/models/pkgbuild.py new file mode 100644 index 00000000..0b765b13 --- /dev/null +++ b/src/ahriman/models/pkgbuild.py @@ -0,0 +1,154 @@ +# +# Copyright (c) 2021-2024 ahriman team. +# +# This file is part of ahriman +# (see https://github.com/arcan1s/ahriman). +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +import re +import shlex + +from collections.abc import Generator +from dataclasses import dataclass +from enum import StrEnum +from pathlib import Path +from typing import Self + +from ahriman.models.pkgbuild_patch import PkgbuildPatch + + +class PkgbuildToken(StrEnum): + """ + well-known tokens dictionary + + Attributes: + ArrayStarts(PkgbuildToken): (class attribute) array starts token + ArrayEnds(PkgbuildToken): (class attribute) array ends token + FunctionDeclaration(PkgbuildToken): (class attribute) function declaration token + FunctionStarts(PkgbuildToken): (class attribute) function starts token + FunctionEnds(PkgbuildToken): (class attribute) function ends token + """ + + ArrayStarts = "(" + ArrayEnds = ")" + + FunctionDeclaration = "()" + FunctionStarts = "{" + FunctionEnds = "}" + + +@dataclass(frozen=True) +class Pkgbuild: + """ + simple pkgbuild reader implementation in pure python, because others sucks + + Attributes: + fields(dict[str, PkgbuildPatch]): PKGBUILD fields + """ + + fields: dict[str, PkgbuildPatch] + + _ASSIGNMENT_REGEX = re.compile(r"^(?P\w+)=(?P.+)$") + + @classmethod + def parse(cls, path: Path) -> Self: + """ + parse PKGBUILD to the file + + Args: + path(Path): path to the PKGBUILD file + + Returns: + Self: constructed instance of self + """ + + fields = {} + with path.open() as input_file: + parser = shlex.shlex(input_file, posix=True, punctuation_chars=True) + + while token := parser.get_token(): + if (match := cls._ASSIGNMENT_REGEX.match(token)) is not None: + key = match.group("key") + value = match.group("value") + fields[key] = PkgbuildPatch(key, value) + else: + match parser.get_token(): + # array processing. Arrays will be sent as "key=", "(", values, ")" + case PkgbuildToken.ArrayStarts: + key = token[:-1] + value = cls._parse_array(parser) + fields[key] = PkgbuildPatch(key, value) + + # functions processing. Function will be sent as "name", "()", "{", body, "}" + case PkgbuildToken.FunctionDeclaration: + key = f"{token}{PkgbuildToken.FunctionDeclaration}" + value = cls._parse_function(parser, path) + fields[key] = PkgbuildPatch(key, value) + + return cls(fields) + + @staticmethod + def _parse_array(parser: shlex.shlex) -> list[str]: + """ + parse array from the PKGBUILD. This method will extract tokens from parser until it matches closing array, + modifying source parser state + + Args: + parser(shlex.shlex): shell parser instance + + Returns: + list[str]: extracted arrays elements + """ + def extract() -> Generator[str, None, None]: + while token := parser.get_token(): + if token == PkgbuildToken.ArrayEnds: + break + yield token + + return list(extract()) + + @staticmethod + def _parse_function(parser: shlex.shlex, path: Path) -> str: + """ + parse function from the PKGBUILD. This method will extract tokens from parser until it matches closing function, + modifying source parser state. Instead of trying to combine tokens together, it uses positions of the file + and read content again in this range + + Args: + parser(shlex.shlex): shell parser instance + path(Path): path to the source file + + Returns: + str: function body + """ + if not hasattr(parser.instream, "tell"): + return "" # mypy guard + + # find start and end positions + start_position, end_position = 0, 0 + while token := parser.get_token(): + match token: + case PkgbuildToken.FunctionStarts: + start_position = parser.instream.tell() + case PkgbuildToken.FunctionEnds: + end_position = parser.instream.tell() + break + + # open file again and read specified range + with path.open() as input_file: + input_file.seek(start_position) + content = input_file.read(end_position - start_position) + + return content