From 7b647a9b5a980407560d1f679eb171c1200c7794 Mon Sep 17 00:00:00 2001 From: Evgeniy Alekseev Date: Mon, 27 Jun 2022 18:53:48 +0300 Subject: [PATCH] fix case with package name which cannot be downloaded (without special settings) The issue appears if file or its version contains one of special URI characters, e.g. +. Theu will be interpreted as query parameters by (some) servers (e.g. S3 works in this way). In this commit we rename archive to the one with safe name. --- src/ahriman/application/handlers/rebuild.py | 2 +- src/ahriman/core/repository/executor.py | 26 +++++++++++++------ src/ahriman/core/repository/repository.py | 2 +- src/ahriman/core/util.py | 24 ++++++++++++++++- .../handlers/test_handler_rebuild.py | 12 ++++----- .../ahriman/core/repository/test_executor.py | 21 +++++++++++++++ .../core/repository/test_repository.py | 12 ++++----- tests/ahriman/core/test_util.py | 15 ++++++++++- 8 files changed, 90 insertions(+), 24 deletions(-) diff --git a/src/ahriman/application/handlers/rebuild.py b/src/ahriman/application/handlers/rebuild.py index 3362a7b5..7220042e 100644 --- a/src/ahriman/application/handlers/rebuild.py +++ b/src/ahriman/application/handlers/rebuild.py @@ -52,7 +52,7 @@ class Rebuild(Handler): if args.from_database: updates = Rebuild.extract_packages(application) else: - updates = application.repository.packages_depends_on(depends_on) + updates = application.repository.packages_depend_on(depends_on) Rebuild.check_if_empty(args.exit_code, not updates) if args.dry_run: diff --git a/src/ahriman/core/repository/executor.py b/src/ahriman/core/repository/executor.py index cb935a42..4c3c1bb6 100644 --- a/src/ahriman/core/repository/executor.py +++ b/src/ahriman/core/repository/executor.py @@ -24,8 +24,9 @@ from typing import Iterable, List, Optional, Set from ahriman.core.build_tools.task import Task from ahriman.core.repository.cleaner import Cleaner -from ahriman.core.util import tmpdir +from ahriman.core.util import safe_filename, tmpdir from ahriman.models.package import Package +from ahriman.models.package_description import PackageDescription from ahriman.models.result import Result @@ -122,16 +123,16 @@ class Executor(Cleaner): for local in self.packages(): if local.base in packages or all(package in requested for package in local.packages): to_remove = { - package: Path(properties.filename) + package: properties.filepath for package, properties in local.packages.items() - if properties.filename is not None + if properties.filepath is not None } remove_base(local.base) elif requested.intersection(local.packages.keys()): to_remove = { - package: Path(properties.filename) + package: properties.filepath for package, properties in local.packages.items() - if package in requested and properties.filename is not None + if package in requested and properties.filepath is not None } else: to_remove = {} @@ -160,17 +161,25 @@ class Executor(Cleaner): Returns: Result: path to repository database """ + def rename(archive: PackageDescription, base: str) -> None: + if archive.filename is None: + self.logger.warning("received empty package name for base %s", base) + return # suppress type checking, it never can be none actually + if (safe := safe_filename(archive.filename)) != archive.filename: + shutil.move(self.paths.packages / archive.filename, self.paths.packages / safe) + archive.filename = safe + def update_single(name: Optional[str], base: str) -> None: if name is None: self.logger.warning("received empty package name for base %s", base) return # suppress type checking, it never can be none actually - # in theory it might be NOT packages directory, but we suppose it is + # in theory, it might be NOT packages directory, but we suppose it is full_path = self.paths.packages / name files = self.sign.process_sign_package(full_path, base) for src in files: - dst = self.paths.repository / src.name + dst = self.paths.repository / safe_filename(src.name) shutil.move(src, dst) - package_path = self.paths.repository / name + package_path = self.paths.repository / safe_filename(name) self.repo.add(package_path) current_packages = self.packages() @@ -181,6 +190,7 @@ class Executor(Cleaner): for local in updates: try: for description in local.packages.values(): + rename(description, local.base) update_single(description.filename, local.base) self.reporter.set_success(local) result.add_success(local) diff --git a/src/ahriman/core/repository/repository.py b/src/ahriman/core/repository/repository.py index fbe20063..c5548e25 100644 --- a/src/ahriman/core/repository/repository.py +++ b/src/ahriman/core/repository/repository.py @@ -99,7 +99,7 @@ class Repository(Executor, UpdateHandler): """ return list(filter(package_like, self.paths.packages.iterdir())) - def packages_depends_on(self, depends_on: Optional[Iterable[str]]) -> List[Package]: + def packages_depend_on(self, depends_on: Optional[Iterable[str]]) -> List[Package]: """ extract list of packages which depends on specified package diff --git a/src/ahriman/core/util.py b/src/ahriman/core/util.py index 2d0e0f8f..060ebe4b 100644 --- a/src/ahriman/core/util.py +++ b/src/ahriman/core/util.py @@ -20,6 +20,7 @@ import datetime import io import os +import re import requests import shutil import subprocess @@ -36,7 +37,7 @@ from ahriman.models.repository_paths import RepositoryPaths __all__ = ["check_output", "check_user", "exception_response_text", "filter_json", "full_version", "enum_values", - "package_like", "pretty_datetime", "pretty_size", "tmpdir", "walk"] + "package_like", "pretty_datetime", "pretty_size", "safe_filename", "tmpdir", "walk"] def check_output(*args: str, exception: Optional[Exception], cwd: Optional[Path] = None, @@ -272,6 +273,27 @@ def pretty_size(size: Optional[float], level: int = 0) -> str: return pretty_size(size / 1024, level + 1) +def safe_filename(source: str) -> str: + """ + convert source string to its safe representation + + Args: + source(str): string to convert + + Returns: + str: result string in which all unsafe characters are replaced by dash + """ + # RFC-3986 https://datatracker.ietf.org/doc/html/rfc3986 states that unreserved characters are + # https://datatracker.ietf.org/doc/html/rfc3986#section-2.3 + # unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" + # however we would like to allow some gen-delims characters in filename, because those characters are used + # as delimiter in other URI parts. The ones we allow are + # ":" - used as separator in schema and userinfo + # "[" and "]" - used for host part + # "@" - used as separator between host and userinfo + return re.sub(r"[^A-Za-z\d\-._~:\[\]@]", "-", source) + + @contextmanager def tmpdir() -> Generator[Path, None, None]: """ diff --git a/tests/ahriman/application/handlers/test_handler_rebuild.py b/tests/ahriman/application/handlers/test_handler_rebuild.py index db725ec9..024753a2 100644 --- a/tests/ahriman/application/handlers/test_handler_rebuild.py +++ b/tests/ahriman/application/handlers/test_handler_rebuild.py @@ -37,7 +37,7 @@ def test_run(args: argparse.Namespace, package_ahriman: Package, result = Result() result.add_success(package_ahriman) mocker.patch("ahriman.models.repository_paths.RepositoryPaths.tree_create") - application_packages_mock = mocker.patch("ahriman.core.repository.repository.Repository.packages_depends_on", + application_packages_mock = mocker.patch("ahriman.core.repository.repository.Repository.packages_depend_on", return_value=[package_ahriman]) application_mock = mocker.patch("ahriman.application.application.Application.update", return_value=result) check_mock = mocker.patch("ahriman.application.handlers.Handler.check_if_empty") @@ -71,7 +71,7 @@ def test_run_dry_run(args: argparse.Namespace, configuration: Configuration, args = _default_args(args) args.dry_run = True mocker.patch("ahriman.models.repository_paths.RepositoryPaths.tree_create") - mocker.patch("ahriman.core.repository.repository.Repository.packages_depends_on", return_value=[package_ahriman]) + mocker.patch("ahriman.core.repository.repository.Repository.packages_depend_on", return_value=[package_ahriman]) application_mock = mocker.patch("ahriman.application.application.Application.update") check_mock = mocker.patch("ahriman.application.handlers.Handler.check_if_empty") @@ -88,7 +88,7 @@ def test_run_filter(args: argparse.Namespace, configuration: Configuration, mock args.depends_on = ["python-aur"] mocker.patch("ahriman.application.application.Application.update") mocker.patch("ahriman.models.repository_paths.RepositoryPaths.tree_create") - application_packages_mock = mocker.patch("ahriman.core.repository.repository.Repository.packages_depends_on") + application_packages_mock = mocker.patch("ahriman.core.repository.repository.Repository.packages_depend_on") Rebuild.run(args, "x86_64", configuration, True, False) application_packages_mock.assert_called_once_with({"python-aur"}) @@ -101,7 +101,7 @@ def test_run_without_filter(args: argparse.Namespace, configuration: Configurati args = _default_args(args) mocker.patch("ahriman.application.application.Application.update") mocker.patch("ahriman.models.repository_paths.RepositoryPaths.tree_create") - application_packages_mock = mocker.patch("ahriman.core.repository.repository.Repository.packages_depends_on") + application_packages_mock = mocker.patch("ahriman.core.repository.repository.Repository.packages_depend_on") Rebuild.run(args, "x86_64", configuration, True, False) application_packages_mock.assert_called_once_with(None) @@ -116,7 +116,7 @@ def test_run_update_empty_exception(args: argparse.Namespace, configuration: Con args.exit_code = True args.dry_run = True mocker.patch("ahriman.models.repository_paths.RepositoryPaths.tree_create") - mocker.patch("ahriman.core.repository.repository.Repository.packages_depends_on", return_value=[]) + mocker.patch("ahriman.core.repository.repository.Repository.packages_depend_on", return_value=[]) check_mock = mocker.patch("ahriman.application.handlers.Handler.check_if_empty") Rebuild.run(args, "x86_64", configuration, True, False) @@ -131,7 +131,7 @@ def test_run_build_empty_exception(args: argparse.Namespace, configuration: Conf args = _default_args(args) args.exit_code = True mocker.patch("ahriman.models.repository_paths.RepositoryPaths.tree_create") - mocker.patch("ahriman.core.repository.repository.Repository.packages_depends_on", return_value=[package_ahriman]) + mocker.patch("ahriman.core.repository.repository.Repository.packages_depend_on", return_value=[package_ahriman]) mocker.patch("ahriman.application.application.Application.update", return_value=Result()) check_mock = mocker.patch("ahriman.application.handlers.Handler.check_if_empty") diff --git a/tests/ahriman/core/repository/test_executor.py b/tests/ahriman/core/repository/test_executor.py index ad8d05da..b530770e 100644 --- a/tests/ahriman/core/repository/test_executor.py +++ b/tests/ahriman/core/repository/test_executor.py @@ -206,6 +206,27 @@ def test_process_update_group(executor: Executor, package_python_schedule: Packa remove_mock.assert_called_once_with([]) +def test_process_update_unsafe(executor: Executor, package_ahriman: Package, mocker: MockerFixture) -> None: + """ + must encode file name + """ + path = "gconf-3.2.6+11+g07808097-10-x86_64.pkg.tar.zst" + safe_path = "gconf-3.2.6-11-g07808097-10-x86_64.pkg.tar.zst" + package_ahriman.packages[package_ahriman.base].filename = path + + mocker.patch("ahriman.core.repository.executor.Executor.load_archives", return_value=[package_ahriman]) + mocker.patch("ahriman.core.repository.executor.Executor.packages", return_value=[package_ahriman]) + move_mock = mocker.patch("shutil.move") + repo_add_mock = mocker.patch("ahriman.core.alpm.repo.Repo.add") + + executor.process_update([Path(path)]) + move_mock.assert_has_calls([ + mock.call(executor.paths.packages / path, executor.paths.packages / safe_path), + mock.call(executor.paths.packages / safe_path, executor.paths.repository / safe_path) + ]) + repo_add_mock.assert_called_once_with(executor.paths.repository / safe_path) + + def test_process_empty_filename(executor: Executor, package_ahriman: Package, mocker: MockerFixture) -> None: """ must skip update for package which does not have path diff --git a/tests/ahriman/core/repository/test_repository.py b/tests/ahriman/core/repository/test_repository.py index 45c44b78..1f1f704a 100644 --- a/tests/ahriman/core/repository/test_repository.py +++ b/tests/ahriman/core/repository/test_repository.py @@ -85,21 +85,21 @@ def test_packages_built(repository: Repository, mocker: MockerFixture) -> None: assert repository.packages_built() == [Path("b.pkg.tar.xz")] -def test_packages_depends_on(repository: Repository, package_ahriman: Package, package_python_schedule: Package, - mocker: MockerFixture) -> None: +def test_packages_depend_on(repository: Repository, package_ahriman: Package, package_python_schedule: Package, + mocker: MockerFixture) -> None: """ must filter packages by depends list """ mocker.patch("ahriman.core.repository.repository.Repository.packages", return_value=[package_ahriman, package_python_schedule]) - assert repository.packages_depends_on(["python-aur"]) == [package_ahriman] + assert repository.packages_depend_on(["python-aur"]) == [package_ahriman] -def test_packages_depends_on_empty(repository: Repository, package_ahriman: Package, package_python_schedule: Package, - mocker: MockerFixture) -> None: +def test_packages_depend_on_empty(repository: Repository, package_ahriman: Package, package_python_schedule: Package, + mocker: MockerFixture) -> None: """ must return all packages in case if no filter is provided """ mocker.patch("ahriman.core.repository.repository.Repository.packages", return_value=[package_ahriman, package_python_schedule]) - assert repository.packages_depends_on(None) == [package_ahriman, package_python_schedule] + assert repository.packages_depend_on(None) == [package_ahriman, package_python_schedule] diff --git a/tests/ahriman/core/test_util.py b/tests/ahriman/core/test_util.py index 1a9c4ddf..5d1dda11 100644 --- a/tests/ahriman/core/test_util.py +++ b/tests/ahriman/core/test_util.py @@ -10,7 +10,7 @@ from unittest.mock import MagicMock from ahriman.core.exceptions import BuildFailed, InvalidOption, UnsafeRun from ahriman.core.util import check_output, check_user, exception_response_text, filter_json, full_version, \ - enum_values, package_like, pretty_datetime, pretty_size, tmpdir, walk + enum_values, package_like, pretty_datetime, pretty_size, safe_filename, tmpdir, walk from ahriman.models.package import Package from ahriman.models.package_source import PackageSource from ahriman.models.repository_paths import RepositoryPaths @@ -294,6 +294,19 @@ def test_pretty_size_empty() -> None: assert pretty_size(None) == "" +def test_safe_filename() -> None: + """ + must replace unsafe characters by dashes + """ + # so far I found only plus sign + assert safe_filename( + "gconf-3.2.6+11+g07808097-10-x86_64.pkg.tar.zst") == "gconf-3.2.6-11-g07808097-10-x86_64.pkg.tar.zst" + assert safe_filename( + "netkit-telnet-ssl-0.17.41+0.2-6-x86_64.pkg.tar.zst") == "netkit-telnet-ssl-0.17.41-0.2-6-x86_64.pkg.tar.zst" + assert safe_filename("spotify-1:1.1.84.716-2-x86_64.pkg.tar.zst") == "spotify-1:1.1.84.716-2-x86_64.pkg.tar.zst" + assert safe_filename("tolua++-1.0.93-4-x86_64.pkg.tar.zst") == "tolua---1.0.93-4-x86_64.pkg.tar.zst" + + def test_tmpdir() -> None: """ must create temporary directory and remove it after