add ability to partition tree before calculationn

This commit is contained in:
2023-08-27 01:12:12 +03:00
parent f6081507c0
commit a1db4dc8b8
13 changed files with 393 additions and 45 deletions

View File

@ -715,6 +715,8 @@ def _set_repo_tree_parser(root: SubParserAction) -> argparse.ArgumentParser:
parser = root.add_parser("repo-tree", help="dump repository tree",
description="dump repository tree based on packages dependencies",
formatter_class=_formatter)
parser.add_argument("-p", "--partitions", help="also divide packages by independent partitions",
type=int, default=1)
parser.set_defaults(handler=handlers.Structure, lock=None, report=False, quiet=True, unsafe=True)
return parser

View File

@ -22,7 +22,7 @@ import argparse
from ahriman.application.application import Application
from ahriman.application.handlers import Handler
from ahriman.core.configuration import Configuration
from ahriman.core.formatters import TreePrinter
from ahriman.core.formatters import StringPrinter, TreePrinter
from ahriman.core.tree import Tree
@ -45,8 +45,14 @@ class Structure(Handler):
report(bool): force enable or disable reporting
"""
application = Application(architecture, configuration, report=report)
packages = application.repository.packages()
partitions = Tree.partition(application.repository.packages(), count=args.partitions)
tree = Tree.resolve(packages)
for num, level in enumerate(tree):
TreePrinter(num, level).print(verbose=True, separator=" ")
for partition_id, partition in enumerate(partitions):
StringPrinter(f"partition #{partition_id}").print(verbose=False)
tree = Tree.resolve(partition)
for num, level in enumerate(tree):
TreePrinter(num, level).print(verbose=True, separator=" ")
# empty line
StringPrinter("").print(verbose=False)

View File

@ -244,6 +244,21 @@ class PasswordError(ValueError):
ValueError.__init__(self, f"Password error: {details}")
class PartitionError(RuntimeError):
"""
exception raised during packages partition actions
"""
def __init__(self, count: int) -> None:
"""
default constructor
Args:
count(int): count of partitions
"""
RuntimeError.__init__(self, f"Could not divide packages into {count} partitions")
class PkgbuildGeneratorError(RuntimeError):
"""
exception class for support type triggers

View File

@ -38,7 +38,7 @@ class TreePrinter(StringPrinter):
level(int): dependencies tree level
packages(list[Package]): packages which belong to this level
"""
StringPrinter.__init__(self, f"level {level}")
StringPrinter.__init__(self, f"level #{level}")
self.packages = packages
def properties(self) -> list[Property]:

View File

@ -21,9 +21,10 @@ from __future__ import annotations
import functools
from collections.abc import Callable, Iterable
from collections.abc import Iterable
from ahriman.core.util import partition
from ahriman.core.exceptions import PartitionError
from ahriman.core.util import minmax, partition
from ahriman.models.package import Package
@ -128,6 +129,75 @@ class Tree:
"""
self.leaves = leaves
@staticmethod
def balance(partitions: list[list[Leaf]]) -> list[list[Leaf]]:
"""
balance partitions. This method tries to find the longest and the shortest lists and move free leaves between
them if possible. In case if there are no free packages (i.e. the ones which don't depend on any other in
partition and are not dependency of any), it will drop it as it is. This method is guaranteed to produce the
same unsorted sequences for same unsorted input
Args:
partitions(list[list[Leaf]]): source unbalanced partitions
Returns:
list[list[Leaf]]: balanced partitions
"""
# to make sure that we will have same sequences after balance we need to ensure that list is sorted
partitions = [
sorted(part, key=lambda leaf: leaf.package.base)
for part in partitions if part
]
while True:
min_part, max_part = minmax(partitions, key=len)
if len(max_part) - len(min_part) <= 1: # there is nothing to balance
break
# find first package from max list which is not dependency and doesn't depend on any other package
free_index = next(
(
index
for index, leaf in enumerate(max_part)
if not leaf.is_dependency(max_part) and leaf.is_root(max_part)
),
None
)
if free_index is None: # impossible to balance between the shortest and the longest
break
min_part.append(max_part.pop(free_index))
return partitions
@staticmethod
def partition(packages: Iterable[Package], *, count: int) -> list[list[Package]]:
"""
partition tree into independent chunks of more or less equal amount of packages. The packages in produced
partitions don't depend on any package from other partitions
Args:
packages(Iterable[Package]): packages list
count(int): maximal amount of partitions
Returns:
list[list[Package]]: list of packages lists based on their dependencies. The amount of elements in each
sublist is less or equal to ``count``
Raises:
PartitionError: in case if it is impossible to divide tree by specified amount of partitions
"""
if count < 1:
raise PartitionError(count)
# special case
if count == 1:
return [sorted(packages, key=lambda package: package.base)]
leaves = [Leaf(package) for package in packages]
instance = Tree(leaves)
return instance.partitions(count=count)
@staticmethod
def resolve(packages: Iterable[Package]) -> list[list[Package]]:
"""
@ -143,6 +213,22 @@ class Tree:
instance = Tree(leaves)
return instance.levels()
@staticmethod
def sort(leaves: list[list[Leaf]]) -> list[list[Package]]:
"""
sort given list of leaves by package base
Args:
leaves(list[list[Leaf]]): leaves to sort
Returns:
list[list[Package]]: sorted list of packages on each level
"""
return [
sorted([leaf.package for leaf in level], key=lambda package: package.base)
for level in leaves if level
]
def levels(self) -> list[list[Package]]:
"""
get build levels starting from the packages which do not require any other package to build
@ -155,8 +241,10 @@ class Tree:
# build initial tree
unprocessed = self.leaves[:]
while unprocessed:
unsorted.append([leaf for leaf in unprocessed if leaf.is_root(unprocessed)])
unprocessed = [leaf for leaf in unprocessed if not leaf.is_root(unprocessed)]
# additional workaround with partial in order to hide cell-var-from-loop pylint warning
predicate = functools.partial(Leaf.is_root, packages=unprocessed)
new_level, unprocessed = partition(unprocessed, predicate)
unsorted.append(new_level)
# move leaves to the end if they are not required at the next level
for current_num, current_level in enumerate(unsorted[:-1]):
@ -164,13 +252,47 @@ class Tree:
next_level = unsorted[next_num]
# change lists inside the collection
# additional workaround with partial in order to hide cell-var-from-loop pylint warning
predicate = functools.partial(Leaf.is_dependency, packages=next_level)
unsorted[current_num], to_be_moved = partition(current_level, predicate)
unsorted[next_num].extend(to_be_moved)
comparator: Callable[[Package], str] = lambda package: package.base
return [
sorted([leaf.package for leaf in level], key=comparator)
for level in unsorted if level
]
return self.sort(unsorted)
def partitions(self, *, count: int) -> list[list[Package]]:
"""
partition tree into (more or less) equal chunks of packages which don't depend on each other
Args:
count(int): maximal amount of partitions
Returns:
list[list[Package]]: sorted list of packages partitions
"""
unsorted: list[list[Leaf]] = [[] for _ in range(count)]
# in order to keep result stable we will need to sort packages all times
unprocessed = sorted(self.leaves, key=lambda leaf: leaf.package.base)
while unprocessed:
# pick one and append it to the most free partition and build chunk
leaf = unprocessed.pop()
chunk = [leaf]
while True: # python doesn't allow to use walrus operator to unpack tuples
# get packages which depend on packages in chunk
predicate = functools.partial(Leaf.is_root, packages=chunk)
unprocessed, new_dependent = partition(unprocessed, predicate)
chunk.extend(new_dependent)
# get packages which are dependency of packages in chunk
predicate = functools.partial(Leaf.is_dependency, packages=chunk)
new_dependencies, unprocessed = partition(unprocessed, predicate)
chunk.extend(new_dependencies)
if not new_dependent and not new_dependencies:
break
part = min(unsorted, key=len)
part.extend(chunk)
balanced = self.balance(unsorted)
return self.sort(balanced)

View File

@ -46,6 +46,7 @@ __all__ = [
"extract_user",
"filter_json",
"full_version",
"minmax",
"package_like",
"parse_version",
"partition",
@ -263,6 +264,22 @@ def full_version(epoch: str | int | None, pkgver: str, pkgrel: str) -> str:
return f"{prefix}{pkgver}-{pkgrel}"
def minmax(source: Iterable[T], *, key: Callable[[T], Any] | None = None) -> tuple[T, T]:
"""
get min and max value from iterable
Args:
source(Iterable[T]): source list to find min and max values
key(Callable[[T], Any] | None, optional): key to sort (Default value = None)
Returns:
tuple[T, T]: min and max values for sequence
"""
first_iter, second_iter = itertools.tee(source)
# typing doesn't expose SupportLessThan, so we just ignore this in typecheck
return min(first_iter, key=key), max(second_iter, key=key) # type: ignore
def package_like(filename: Path) -> bool:
"""
check if file looks like package
@ -296,12 +313,12 @@ def parse_version(version: str) -> tuple[str | None, str, str]:
return epoch, pkgver, pkgrel
def partition(source: list[T], predicate: Callable[[T], bool]) -> tuple[list[T], list[T]]:
def partition(source: Iterable[T], predicate: Callable[[T], bool]) -> tuple[list[T], list[T]]:
"""
partition list into two based on predicate, based on https://docs.python.org/dev/library/itertools.html#itertools-recipes
Args:
source(list[T]): source list to be partitioned
source(Iterable[T]): source list to be partitioned
predicate(Callable[[T], bool]): filter function
Returns: