store built packages in archive tree instead of repository

This commit is contained in:
2025-07-16 01:42:24 +03:00
parent 4fa5d55317
commit afd62e88f6
14 changed files with 439 additions and 75 deletions

View File

@@ -21,6 +21,7 @@ import argparse
from ahriman.application.handlers.handler import Handler, SubParserAction
from ahriman.core.configuration import Configuration
from ahriman.core.utils import walk
from ahriman.models.repository_id import RepositoryId
from ahriman.models.repository_paths import RepositoryPaths
@@ -49,6 +50,7 @@ class TreeMigrate(Handler):
target_tree.tree_create()
# perform migration
TreeMigrate.tree_move(current_tree, target_tree)
TreeMigrate.fix_symlinks(current_tree)
@staticmethod
def _set_service_tree_migrate_parser(root: SubParserAction) -> argparse.ArgumentParser:
@@ -66,6 +68,22 @@ class TreeMigrate(Handler):
parser.set_defaults(lock=None, quiet=True, report=False)
return parser
@staticmethod
def fix_symlinks(paths: RepositoryPaths) -> None:
"""
fix packages archives symlinks
Args:
paths(RepositoryPaths): new repository paths
"""
archives = {path.name: path for path in walk(paths.archive)}
for symlink in walk(paths.repository):
if symlink.exists(): # no need to check for symlinks as we have just walked through the tree
continue
if (source_archive := archives.get(symlink.name)) is not None:
symlink.unlink()
symlink.symlink_to(source_archive.relative_to(symlink.parent, walk_up=True))
@staticmethod
def tree_move(from_tree: RepositoryPaths, to_tree: RepositoryPaths) -> None:
"""
@@ -77,6 +95,7 @@ class TreeMigrate(Handler):
"""
# we don't care about devtools chroot
for attribute in (
RepositoryPaths.archive,
RepositoryPaths.packages,
RepositoryPaths.pacman,
RepositoryPaths.repository,

View File

@@ -31,20 +31,21 @@ class Repo(LazyLogging):
Attributes:
name(str): repository name
paths(RepositoryPaths): repository paths instance
root(Path): repository root
sign_args(list[str]): additional args which have to be used to sign repository archive
uid(int): uid of the repository owner user
"""
def __init__(self, name: str, paths: RepositoryPaths, sign_args: list[str]) -> None:
def __init__(self, name: str, paths: RepositoryPaths, sign_args: list[str], root: Path | None = None) -> None:
"""
Args:
name(str): repository name
paths(RepositoryPaths): repository paths instance
sign_args(list[str]): additional args which have to be used to sign repository archive
root(Path | None, optional): repository root. If none set, the default will be used (Default value = None)
"""
self.name = name
self.paths = paths
self.root = root or paths.repository
self.uid, _ = paths.root_owner
self.sign_args = sign_args
@@ -56,28 +57,36 @@ class Repo(LazyLogging):
Returns:
Path: path to repository database
"""
return self.paths.repository / f"{self.name}.db.tar.gz"
return self.root / f"{self.name}.db.tar.gz"
def add(self, path: Path) -> None:
def add(self, path: Path, remove: bool = True) -> None:
"""
add new package to repository
Args:
path(Path): path to archive to add
remove(bool, optional): whether to remove old packages or not (Default value = True)
"""
command = ["repo-add", *self.sign_args]
if remove:
command.extend(["--remove"])
command.extend([str(self.repo_path), str(path)])
# add to repository
check_output(
"repo-add", *self.sign_args, "-R", str(self.repo_path), str(path),
*command,
exception=BuildError.from_process(path.name),
cwd=self.paths.repository,
cwd=self.root,
logger=self.logger,
user=self.uid)
user=self.uid,
)
def init(self) -> None:
"""
create empty repository database. It just calls add with empty arguments
"""
check_output("repo-add", *self.sign_args, str(self.repo_path),
cwd=self.paths.repository, logger=self.logger, user=self.uid)
cwd=self.root, logger=self.logger, user=self.uid)
def remove(self, package: str, filename: Path) -> None:
"""
@@ -88,13 +97,14 @@ class Repo(LazyLogging):
filename(Path): package filename to remove
"""
# remove package and signature (if any) from filesystem
for full_path in self.paths.repository.glob(f"{filename}*"):
for full_path in self.root.glob(f"**/{filename}*"):
full_path.unlink()
# remove package from registry
check_output(
"repo-remove", *self.sign_args, str(self.repo_path), package,
exception=BuildError.from_process(package),
cwd=self.paths.repository,
cwd=self.root,
logger=self.logger,
user=self.uid)
user=self.uid,
)

View File

@@ -0,0 +1,84 @@
#
# Copyright (c) 2021-2025 ahriman team.
#
# This file is part of ahriman
# (see https://github.com/arcan1s/ahriman).
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
import argparse
from dataclasses import replace
from sqlite3 import Connection
from ahriman.application.handlers.handler import Handler
from ahriman.core.alpm.pacman import Pacman
from ahriman.core.configuration import Configuration
from ahriman.models.package import Package
from ahriman.models.pacman_synchronization import PacmanSynchronization
from ahriman.models.repository_paths import RepositoryPaths
__all__ = ["migrate_data"]
def migrate_data(connection: Connection, configuration: Configuration) -> None:
"""
perform data migration
Args:
connection(Connection): database connection
configuration(Configuration): configuration instance
"""
del connection
config_path, _ = configuration.check_loaded()
args = argparse.Namespace(configuration=config_path, architecture=None, repository=None, repository_id=None)
for repository_id in Handler.repositories_extract(args):
paths = replace(configuration.repository_paths, repository_id=repository_id)
pacman = Pacman(repository_id, configuration, refresh_database=PacmanSynchronization.Disabled)
# create archive directory if required
if not paths.archive.is_dir():
with paths.preserve_owner(paths.root / "archive"):
paths.archive.mkdir(mode=0o755, parents=True)
move_packages(paths, pacman)
def move_packages(repository_paths: RepositoryPaths, pacman: Pacman) -> None:
"""
move packages from repository to archive and create symbolic links
Args:
repository_paths(RepositoryPaths): repository paths instance
pacman(Pacman): alpm wrapper instance
"""
for source in repository_paths.repository.iterdir():
if not source.is_file(follow_symlinks=False):
continue # skip symbolic links if any
filename = source.name
if filename.startswith(".") or ".pkg." not in filename:
# we don't use package_like method here, because it also filters out signatures
continue
package = Package.from_archive(source, pacman)
# move package to the archive directory
target = repository_paths.archive_for(package.base) / filename
source.rename(target)
# create symlink to the archive
source.symlink_to(target.relative_to(source.parent, walk_up=True))

View File

@@ -17,4 +17,5 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from ahriman.core.housekeeping.archive_rotation_trigger import ArchiveRotationTrigger
from ahriman.core.housekeeping.logs_rotation_trigger import LogsRotationTrigger

View File

@@ -0,0 +1,115 @@
#
# Copyright (c) 2021-2025 ahriman team.
#
# This file is part of ahriman
# (see https://github.com/arcan1s/ahriman).
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from collections.abc import Callable
from functools import cmp_to_key
from ahriman.core import context
from ahriman.core.alpm.pacman import Pacman
from ahriman.core.configuration import Configuration
from ahriman.core.triggers import Trigger
from ahriman.core.utils import package_like
from ahriman.models.package import Package
from ahriman.models.repository_id import RepositoryId
from ahriman.models.result import Result
class ArchiveRotationTrigger(Trigger):
"""
remove packages from archive
Attributes:
keep_built_packages(int): number of last packages to keep
paths(RepositoryPaths): repository paths instance
"""
CONFIGURATION_SCHEMA = {
"archive": {
"type": "dict",
"schema": {
"keep_built_packages": {
"type": "integer",
"required": True,
"coerce": "integer",
"min": 0,
},
},
},
}
def __init__(self, repository_id: RepositoryId, configuration: Configuration) -> None:
"""
Args:
repository_id(RepositoryId): repository unique identifier
configuration(Configuration): configuration instance
"""
Trigger.__init__(self, repository_id, configuration)
section = next(iter(self.configuration_sections(configuration)))
self.keep_built_packages = max(configuration.getint(section, "keep_built_packages"), 0)
self.paths = configuration.repository_paths
@classmethod
def configuration_sections(cls, configuration: Configuration) -> list[str]:
"""
extract configuration sections from configuration
Args:
configuration(Configuration): configuration instance
Returns:
list[str]: read configuration sections belong to this trigger
"""
return list(cls.CONFIGURATION_SCHEMA.keys())
def archives_remove(self, package: Package, pacman: Pacman) -> None:
"""
remove older versions of the specified package
Args:
package(Package): package which has been updated to check for older versions
pacman(Pacman): alpm wrapper instance
"""
packages: dict[tuple[str, str], Package] = {}
# we can't use here load_archives, because it ignores versions
for full_path in filter(package_like, self.paths.archive_for(package.base).iterdir()):
local = Package.from_archive(full_path, pacman)
packages.setdefault((local.base, local.version), local).packages.update(local.packages)
comparator: Callable[[Package, Package], int] = lambda left, right: left.vercmp(right.version)
to_remove = sorted(packages.values(), key=cmp_to_key(comparator))
for single in to_remove[:-self.keep_built_packages]:
self.logger.info("removing version %s of package %s", single.version, single.base)
for archive in single.packages.values():
for path in self.paths.archive_for(single.base).glob(f"{archive.filename}*"):
path.unlink()
def on_result(self, result: Result, packages: list[Package]) -> None:
"""
run trigger
Args:
result(Result): build result
packages(list[Package]): list of all available packages
"""
ctx = context.get()
pacman = ctx.get(Pacman)
for package in result.success:
self.archives_remove(package, pacman)

View File

@@ -17,7 +17,7 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
import shutil
import shutil # shutil.move is used here to ensure cross fs file movement
from collections.abc import Iterable
from pathlib import Path
@@ -41,6 +41,101 @@ class Executor(PackageInfo, Cleaner):
trait for common repository update processes
"""
def _archive_remove(self, description: PackageDescription, package_base: str) -> None:
"""
rename package archive removing special symbols
Args:
description(PackageDescription): package description
package_base(str): package base name
"""
if description.filename is None:
self.logger.warning("received empty package name for base %s", package_base)
return # suppress type checking, it never can be none actually
if (safe := safe_filename(description.filename)) != description.filename:
(self.paths.packages / description.filename).rename(self.paths.packages / safe)
description.filename = safe
def _package_build(self, package: Package, path: Path, packager: str | None,
local_version: str | None) -> str | None:
"""
build single package
Args:
package(Package): package to build
path(Path): path to directory with package files
packager(str | None): packager identifier used for this package
local_version(str | None): local version of the package
Returns:
str | None: current commit sha if available
"""
self.reporter.set_building(package.base)
task = Task(package, self.configuration, self.architecture, self.paths)
patches = self.reporter.package_patches_get(package.base, None)
commit_sha = task.init(path, patches, local_version)
built = task.build(path, PACKAGER=packager)
package.with_packages(built, self.pacman)
for src in built:
dst = self.paths.packages / src.name
shutil.move(src, dst)
return commit_sha
def _package_remove(self, package_name: str, path: Path) -> None:
"""
remove single package from repository
Args:
package_name(str): package name
path(Path): path to package archive
"""
try:
self.repo.remove(package_name, path)
except Exception:
self.logger.exception("could not remove %s", package_name)
def _package_remove_base(self, package_base: str) -> None:
"""
remove package base from repository
Args:
package_base(str): package base name:
"""
try:
with self.in_event(package_base, EventType.PackageRemoved):
self.reporter.package_remove(package_base)
except Exception:
self.logger.exception("could not remove base %s", package_base)
def _package_update(self, filename: str | None, package_base: str, packager_key: str | None) -> None:
"""
update built package in repository database
Args:
filename(str | None): archive filename
package_base(str): package base name
packager_key(str | None): packager key identifier
"""
if filename is None:
self.logger.warning("received empty package name for base %s", package_base)
return # suppress type checking, it never can be none actually
# in theory, it might be NOT packages directory, but we suppose it is
full_path = self.paths.packages / filename
files = self.sign.process_sign_package(full_path, packager_key)
for src in files:
dst = self.paths.archive_for(package_base) / src.name
src.rename(dst) # move package to archive directory
if not (symlink := self.paths.repository / dst.name).exists():
symlink.symlink_to(dst.relative_to(symlink.parent, walk_up=True)) # create link to archive
self.repo.add(self.paths.repository / filename)
def process_build(self, updates: Iterable[Package], packagers: Packagers | None = None, *,
bump_pkgrel: bool = False) -> Result:
"""
@@ -55,21 +150,6 @@ class Executor(PackageInfo, Cleaner):
Returns:
Result: build result
"""
def build_single(package: Package, local_path: Path, packager_id: str | None) -> str | None:
self.reporter.set_building(package.base)
task = Task(package, self.configuration, self.architecture, self.paths)
local_version = local_versions.get(package.base) if bump_pkgrel else None
patches = self.reporter.package_patches_get(package.base, None)
commit_sha = task.init(local_path, patches, local_version)
built = task.build(local_path, PACKAGER=packager_id)
package.with_packages(built, self.pacman)
for src in built:
dst = self.paths.packages / src.name
shutil.move(src, dst)
return commit_sha
packagers = packagers or Packagers()
local_versions = {package.base: package.version for package in self.packages()}
@@ -80,16 +160,21 @@ class Executor(PackageInfo, Cleaner):
try:
with self.in_event(single.base, EventType.PackageUpdated, failure=EventType.PackageUpdateFailed):
packager = self.packager(packagers, single.base)
last_commit_sha = build_single(single, Path(dir_name), packager.packager_id)
local_version = local_versions.get(single.base) if bump_pkgrel else None
commit_sha = self._package_build(single, Path(dir_name), packager.packager_id, local_version)
# update commit hash for changes keeping current diff if there is any
changes = self.reporter.package_changes_get(single.base)
self.reporter.package_changes_update(single.base, Changes(last_commit_sha, changes.changes))
self.reporter.package_changes_update(single.base, Changes(commit_sha, changes.changes))
# update dependencies list
package_archive = PackageArchive(self.paths.build_root, single, self.pacman, self.scan_paths)
dependencies = package_archive.depends_on()
self.reporter.package_dependencies_update(single.base, dependencies)
# update result set
result.add_updated(single)
except Exception:
self.reporter.set_failed(single.base)
result.add_failed(single)
@@ -107,19 +192,6 @@ class Executor(PackageInfo, Cleaner):
Returns:
Result: remove result
"""
def remove_base(package_base: str) -> None:
try:
with self.in_event(package_base, EventType.PackageRemoved):
self.reporter.package_remove(package_base)
except Exception:
self.logger.exception("could not remove base %s", package_base)
def remove_package(package: str, archive_path: Path) -> None:
try:
self.repo.remove(package, archive_path) # remove the package itself
except Exception:
self.logger.exception("could not remove %s", package)
packages_to_remove: dict[str, Path] = {}
bases_to_remove: list[str] = []
@@ -136,6 +208,7 @@ class Executor(PackageInfo, Cleaner):
})
bases_to_remove.append(local.base)
result.add_removed(local)
elif requested.intersection(local.packages.keys()):
packages_to_remove.update({
package: properties.filepath
@@ -152,11 +225,11 @@ class Executor(PackageInfo, Cleaner):
# remove packages from repository files
for package, filename in packages_to_remove.items():
remove_package(package, filename)
self._package_remove(package, filename)
# remove bases from registered
for package in bases_to_remove:
remove_base(package)
self._package_remove_base(package)
return result
@@ -172,27 +245,6 @@ class Executor(PackageInfo, Cleaner):
Returns:
Result: path to repository database
"""
def rename(archive: PackageDescription, package_base: str) -> None:
if archive.filename is None:
self.logger.warning("received empty package name for base %s", package_base)
return # suppress type checking, it never can be none actually
if (safe := safe_filename(archive.filename)) != archive.filename:
shutil.move(self.paths.packages / archive.filename, self.paths.packages / safe)
archive.filename = safe
def update_single(name: str | None, package_base: str, packager_key: str | None) -> None:
if name is None:
self.logger.warning("received empty package name for base %s", package_base)
return # suppress type checking, it never can be none actually
# in theory, it might be NOT packages directory, but we suppose it is
full_path = self.paths.packages / name
files = self.sign.process_sign_package(full_path, packager_key)
for src in files:
dst = self.paths.repository / safe_filename(src.name)
shutil.move(src, dst)
package_path = self.paths.repository / safe_filename(name)
self.repo.add(package_path)
current_packages = {package.base: package for package in self.packages()}
local_versions = {package_base: package.version for package_base, package in current_packages.items()}
@@ -207,8 +259,8 @@ class Executor(PackageInfo, Cleaner):
packager = self.packager(packagers, local.base)
for description in local.packages.values():
rename(description, local.base)
update_single(description.filename, local.base, packager.key)
self._archive_remove(description, local.base)
self._package_update(description.filename, local.base, packager.key)
self.reporter.set_success(local)
result.add_updated(local)
@@ -216,12 +268,13 @@ class Executor(PackageInfo, Cleaner):
if local.base in current_packages:
current_package_archives = set(current_packages[local.base].packages.keys())
removed_packages.extend(current_package_archives.difference(local.packages))
except Exception:
self.reporter.set_failed(local.base)
result.add_failed(local)
self.logger.exception("could not process %s", local.base)
self.clear_packages()
self.clear_packages()
self.process_remove(removed_packages)
return result

View File

@@ -520,8 +520,7 @@ class Package(LazyLogging):
else:
remote_version = remote.version
result: int = vercmp(self.version, remote_version)
return result < 0
return self.vercmp(remote_version) < 0
def next_pkgrel(self, local_version: str | None) -> str | None:
"""
@@ -540,7 +539,7 @@ class Package(LazyLogging):
if local_version is None:
return None # local version not found, keep upstream pkgrel
if vercmp(self.version, local_version) > 0:
if self.vercmp(local_version) > 0:
return None # upstream version is newer than local one, keep upstream pkgrel
*_, local_pkgrel = parse_version(local_version)
@@ -561,6 +560,19 @@ class Package(LazyLogging):
details = "" if self.is_single_package else f""" ({" ".join(sorted(self.packages.keys()))})"""
return f"{self.base}{details}"
def vercmp(self, version: str) -> int:
"""
typed wrapper around :func:`pyalpm.vercmp()`
Args:
version(str): version to compare
Returns:
int: negative if current version is less than provided, positive if greater than and zero if equals
"""
result: int = vercmp(self.version, version)
return result
def view(self) -> dict[str, Any]:
"""
generate json package view

View File

@@ -85,6 +85,16 @@ class RepositoryPaths(LazyLogging):
return Path(self.repository_id.architecture) # legacy tree suffix
return Path(self.repository_id.name) / self.repository_id.architecture
@property
def archive(self) -> Path:
"""
archive directory root
Returns:
Path: archive directory root
"""
return self.root / "archive" / self._suffix
@property
def build_root(self) -> Path:
"""
@@ -227,7 +237,7 @@ class RepositoryPaths(LazyLogging):
set owner of path recursively (from root) to root owner
Notes:
More likely you don't want to call this method explicitly, consider using :func:`preserve_owner`
More likely you don't want to call this method explicitly, consider using :func:`preserve_owner()`
as context manager instead
Args:
@@ -249,6 +259,23 @@ class RepositoryPaths(LazyLogging):
set_owner(path)
path = path.parent
def archive_for(self, package_base: str) -> Path:
"""
get path to archive specified search criteria
Args:
package_base(str): package base name
Returns:
Path: path to archive directory for package base
"""
directory = self.archive / "packages" / package_base[0] / package_base
if not directory.is_dir(): # create if not exists
with self.preserve_owner(self.archive):
directory.mkdir(mode=0o755, parents=True)
return directory
def cache_for(self, package_base: str) -> Path:
"""
get path to cached PKGBUILD and package sources for the package base
@@ -320,6 +347,7 @@ class RepositoryPaths(LazyLogging):
with self.preserve_owner():
for directory in (
self.archive,
self.cache,
self.chroot,
self.packages,