pkgbuild parser impl

This commit is contained in:
Evgenii Alekseev 2024-09-09 18:43:39 +03:00
parent 05f87a36d6
commit 58a1fd02f8

View File

@ -0,0 +1,154 @@
#
# Copyright (c) 2021-2024 ahriman team.
#
# This file is part of ahriman
# (see https://github.com/arcan1s/ahriman).
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
import re
import shlex
from collections.abc import Generator
from dataclasses import dataclass
from enum import StrEnum
from pathlib import Path
from typing import Self
from ahriman.models.pkgbuild_patch import PkgbuildPatch
class PkgbuildToken(StrEnum):
"""
well-known tokens dictionary
Attributes:
ArrayStarts(PkgbuildToken): (class attribute) array starts token
ArrayEnds(PkgbuildToken): (class attribute) array ends token
FunctionDeclaration(PkgbuildToken): (class attribute) function declaration token
FunctionStarts(PkgbuildToken): (class attribute) function starts token
FunctionEnds(PkgbuildToken): (class attribute) function ends token
"""
ArrayStarts = "("
ArrayEnds = ")"
FunctionDeclaration = "()"
FunctionStarts = "{"
FunctionEnds = "}"
@dataclass(frozen=True)
class Pkgbuild:
"""
simple pkgbuild reader implementation in pure python, because others sucks
Attributes:
fields(dict[str, PkgbuildPatch]): PKGBUILD fields
"""
fields: dict[str, PkgbuildPatch]
_ASSIGNMENT_REGEX = re.compile(r"^(?P<key>\w+)=(?P<value>.+)$")
@classmethod
def parse(cls, path: Path) -> Self:
"""
parse PKGBUILD to the file
Args:
path(Path): path to the PKGBUILD file
Returns:
Self: constructed instance of self
"""
fields = {}
with path.open() as input_file:
parser = shlex.shlex(input_file, posix=True, punctuation_chars=True)
while token := parser.get_token():
if (match := cls._ASSIGNMENT_REGEX.match(token)) is not None:
key = match.group("key")
value = match.group("value")
fields[key] = PkgbuildPatch(key, value)
else:
match parser.get_token():
# array processing. Arrays will be sent as "key=", "(", values, ")"
case PkgbuildToken.ArrayStarts:
key = token[:-1]
value = cls._parse_array(parser)
fields[key] = PkgbuildPatch(key, value)
# functions processing. Function will be sent as "name", "()", "{", body, "}"
case PkgbuildToken.FunctionDeclaration:
key = f"{token}{PkgbuildToken.FunctionDeclaration}"
value = cls._parse_function(parser, path)
fields[key] = PkgbuildPatch(key, value)
return cls(fields)
@staticmethod
def _parse_array(parser: shlex.shlex) -> list[str]:
"""
parse array from the PKGBUILD. This method will extract tokens from parser until it matches closing array,
modifying source parser state
Args:
parser(shlex.shlex): shell parser instance
Returns:
list[str]: extracted arrays elements
"""
def extract() -> Generator[str, None, None]:
while token := parser.get_token():
if token == PkgbuildToken.ArrayEnds:
break
yield token
return list(extract())
@staticmethod
def _parse_function(parser: shlex.shlex, path: Path) -> str:
"""
parse function from the PKGBUILD. This method will extract tokens from parser until it matches closing function,
modifying source parser state. Instead of trying to combine tokens together, it uses positions of the file
and read content again in this range
Args:
parser(shlex.shlex): shell parser instance
path(Path): path to the source file
Returns:
str: function body
"""
if not hasattr(parser.instream, "tell"):
return "" # mypy guard
# find start and end positions
start_position, end_position = 0, 0
while token := parser.get_token():
match token:
case PkgbuildToken.FunctionStarts:
start_position = parser.instream.tell()
case PkgbuildToken.FunctionEnds:
end_position = parser.instream.tell()
break
# open file again and read specified range
with path.open() as input_file:
input_file.seek(start_position)
content = input_file.read(end_position - start_position)
return content