feat: remove excess dependencies leaves (#128)

This mr improves implicit dependencies processing by reducing tree leaves by using the following algorithm:

* remove paths which belong to any base package
* remove packages which are (opt)dependencies of one of the package which provides same path. It also tries to handle circular dependencies by excluding them from being "satisfied"
* remove packages which are already satisfied by any children path
This commit is contained in:
2024-08-06 18:00:53 +03:00
parent a01b090c2b
commit 434057ec49
11 changed files with 425 additions and 44 deletions

View File

@ -80,7 +80,7 @@ class Executor(PackageInfo, Cleaner):
# clear changes and update commit hash
self.reporter.package_changes_update(single.base, Changes(last_commit_sha))
# update dependencies list
dependencies = PackageArchive(self.paths.build_directory, single).depends_on()
dependencies = PackageArchive(self.paths.build_directory, single, self.pacman).depends_on()
self.reporter.package_dependencies_update(single.base, dependencies)
# update result set
result.add_updated(single)

View File

@ -57,6 +57,7 @@ class AURPackage:
provides(list[str]): list of packages which this package provides
license(list[str]): list of package licenses
keywords(list[str]): list of package keywords
groups(list[str]): list of package groups
Examples:
Mainly this class must be used from class methods instead of default :func:`__init__()`::
@ -100,6 +101,7 @@ class AURPackage:
provides: list[str] = field(default_factory=list)
license: list[str] = field(default_factory=list)
keywords: list[str] = field(default_factory=list)
groups: list[str] = field(default_factory=list)
@classmethod
def from_json(cls, dump: dict[str, Any]) -> Self:
@ -153,6 +155,7 @@ class AURPackage:
provides=package.provides,
license=package.licenses,
keywords=[],
groups=package.groups,
)
@classmethod
@ -191,6 +194,7 @@ class AURPackage:
provides=dump["provides"],
license=dump["licenses"],
keywords=[],
groups=dump["groups"],
)
@staticmethod

View File

@ -34,6 +34,13 @@ class Dependencies:
paths: dict[str, list[str]] = field(default_factory=dict)
def __post_init__(self) -> None:
"""
remove empty paths
"""
paths = {path: packages for path, packages in self.paths.items() if packages}
object.__setattr__(self, "paths", paths)
@classmethod
def from_json(cls, dump: dict[str, Any]) -> Self:
"""

View File

@ -0,0 +1,90 @@
#
# Copyright (c) 2021-2024 ahriman team.
#
# This file is part of ahriman
# (see https://github.com/arcan1s/ahriman).
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import annotations
from collections.abc import Iterable
from dataclasses import dataclass, field
from pathlib import Path
from ahriman.core.util import trim_package
@dataclass(frozen=True, kw_only=True)
class FilesystemPackage:
"""
class representing a simplified model for the package installed to filesystem
Attributes:
package_name(str): package name
depends(set[str]): list of package dependencies
directories(set[Path]): list of directories this package contains
files(list[Path]): list of files this package contains
opt_depends(set[str]): list of package optional dependencies
"""
package_name: str
depends: set[str]
opt_depends: set[str]
directories: list[Path] = field(default_factory=list)
files: list[Path] = field(default_factory=list)
def __post_init__(self) -> None:
"""
update dependencies list accordingly
"""
object.__setattr__(self, "depends", {trim_package(package) for package in self.depends})
object.__setattr__(self, "opt_depends", {trim_package(package) for package in self.opt_depends})
def depends_on(self, package_name: str, *, include_optional: bool) -> bool:
"""
check if package depends on given package name
Args:
package_name(str): package name to check dependencies
include_optional(bool): include optional dependencies to check
Returns:
bool: ``True`` in case if the given package in the dependencies lists
"""
if package_name in self.depends:
return True
if include_optional and package_name in self.opt_depends:
return True
return False
def is_root_package(self, packages: Iterable[FilesystemPackage], *, include_optional: bool) -> bool:
"""
check if the package is the one of the root packages. This method checks if there are any packages which are
dependency of the package and - to avoid circular dependencies - does not depend on the package. In addition,
if ``include_optional`` is set to ``True``, then it will also check optional dependencies of the package
Args:
packages(Iterable[FilesystemPackage]): list of packages in which we need to search
include_optional(bool): include optional dependencies to check
Returns:
bool: whether this package depends on any other package in the list of packages
"""
return not any(
package
for package in packages
if self.depends_on(package.package_name, include_optional=include_optional)
and not package.depends_on(self.package_name, include_optional=False)
)

View File

@ -23,8 +23,12 @@ from elftools.elf.elffile import ELFFile
from pathlib import Path
from typing import IO
from ahriman.core.alpm.pacman import Pacman
from ahriman.core.alpm.remote import OfficialSyncdb
from ahriman.core.exceptions import UnknownPackageError
from ahriman.core.util import walk
from ahriman.models.dependencies import Dependencies
from ahriman.models.filesystem_package import FilesystemPackage
from ahriman.models.package import Package
@ -36,10 +40,12 @@ class PackageArchive:
Attributes:
package(Package): package descriptor
root(Path): path to root filesystem
pacman(Pacman): alpm wrapper instance
"""
root: Path
package: Package
pacman: Pacman
@staticmethod
def dynamic_needed(binary_path: Path) -> list[str]:
@ -80,7 +86,7 @@ class PackageArchive:
content(IO[bytes]): content of the file
Returns:
bool: True in case if file has elf header and False otherwise
bool: ``True`` in case if file has elf header and ``False`` otherwise
"""
expected = b"\x7fELF"
length = len(expected)
@ -90,6 +96,89 @@ class PackageArchive:
return magic_bytes == expected
def _load_pacman_package(self, path: Path) -> FilesystemPackage:
"""
load pacman package model from path
Args:
path(Path): path to package files database
Returns:
FilesystemPackage: generated pacman package model with empty paths
"""
package_name, *_ = path.parent.name.rsplit("-", 2)
try:
pacman_package = OfficialSyncdb.info(package_name, pacman=self.pacman)
return FilesystemPackage(
package_name=package_name,
depends=set(pacman_package.depends),
opt_depends=set(pacman_package.opt_depends),
)
except UnknownPackageError:
return FilesystemPackage(package_name=package_name, depends=set(), opt_depends=set())
def _raw_dependencies_packages(self) -> dict[Path, list[FilesystemPackage]]:
"""
extract the initial list of packages which contain specific path this package depends on
Returns:
dict[Path, list[FilesystemPackage]]: map of path to packages containing this path
"""
dependencies, roots = self.depends_on_paths()
installed_packages = self.installed_packages()
# build initial map of file path -> packages containing this path
# in fact, keys will contain all libraries the package linked to and all directories it contains
dependencies_per_path: dict[Path, list[FilesystemPackage]] = {}
for package_base, package in installed_packages.items():
if package_base in self.package.packages:
continue # skip package itself
required_by = [directory for directory in package.directories if directory in roots]
required_by.extend(library for library in package.files if library.name in dependencies)
for path in required_by:
dependencies_per_path.setdefault(path, []).append(package)
return dependencies_per_path
def _refine_dependencies(self, source: dict[Path, list[FilesystemPackage]]) -> dict[Path, list[FilesystemPackage]]:
"""
reduce the initial dependency list by removing packages which are already satisfied (e.g. by other path or by
dependency list, or belonging to the base packages)
Args:
source(dict[Path, list[FilesystemPackage]]): the initial map of path to packages containing it
Returns:
dict[Path, list[FilesystemPackage]]: reduced source map of packages
"""
# base packages should be always excluded from checking
base_packages = OfficialSyncdb.info("base", pacman=self.pacman).depends
result: dict[Path, list[FilesystemPackage]] = {}
# sort items from children directories to root
for path, packages in reversed(sorted(source.items())):
# skip if this path belongs to the one of the base packages
if any(package.package_name in base_packages for package in packages):
continue
# remove explicit dependencies
packages = [package for package in packages if package.is_root_package(packages, include_optional=False)]
# remove optional dependencies
packages = [package for package in packages if package.is_root_package(packages, include_optional=True)]
# check if there is already parent of current path in the result and has the same packages
for children_path, children_packages in result.items():
if not children_path.is_relative_to(path):
continue
children_packages_names = {package.package_name for package in children_packages}
packages = [package for package in packages if package.package_name not in children_packages_names]
result[path] = packages
return result
def depends_on(self) -> Dependencies:
"""
extract packages and paths which are required for this package
@ -97,20 +186,14 @@ class PackageArchive:
Returns:
Dependencies: map of the package name to set of paths used by this package
"""
dependencies, roots = self.depends_on_paths()
initial_packages = self._raw_dependencies_packages()
refined_packages = self._refine_dependencies(initial_packages)
result: dict[str, list[str]] = {}
for package, (directories, files) in self.installed_packages().items():
if package in self.package.packages:
continue # skip package itself
required_by = [directory for directory in directories if directory in roots]
required_by.extend(library for library in files if library.name in dependencies)
for path in required_by:
result.setdefault(str(path), []).append(package)
return Dependencies(result)
paths = {
str(path): [package.package_name for package in packages]
for path, packages in refined_packages.items()
}
return Dependencies(paths)
def depends_on_paths(self) -> tuple[set[str], set[Path]]:
"""
@ -130,36 +213,35 @@ class PackageArchive:
return dependencies, roots
def installed_packages(self) -> dict[str, tuple[list[Path], list[Path]]]:
def installed_packages(self) -> dict[str, FilesystemPackage]:
"""
extract list of the installed packages and their content
Returns:
dict[str, tuple[list[Path], list[Path]]]; map of package name to list of directories and files contained
dict[str, FilesystemPackage]; map of package name to list of directories and files contained
by this package
"""
result = {}
pacman_local_files = self.root / "var" / "lib" / "pacman" / "local"
for path in filter(lambda fn: fn.name == "files", walk(pacman_local_files)):
package, *_ = path.parent.name.rsplit("-", 2)
package = self._load_pacman_package(path)
directories, files = [], []
is_files = False
is_files_section = False
for line in path.read_text(encoding="utf8").splitlines():
if not line: # skip empty lines
continue
if line.startswith("%") and line.endswith("%"): # directive started
is_files = line == "%FILES%"
if not is_files: # not a files directive
is_files_section = line == "%FILES%"
if not is_files_section: # not a files directive
continue
entry = Path(line)
if line.endswith("/"): # simple check if it is directory
directories.append(entry)
package.directories.append(entry)
else:
files.append(entry)
package.files.append(entry)
result[package] = directories, files
result[package.package_name] = package
return result