Compare commits

..

14 Commits

Author SHA1 Message Date
dbfb460557 feat: optimize archive reading
Instead of trying to load every database and look for files, this commit
introduces the optimization in which, the service loads packages first,
groups them by database and load files later.

In some cases it significantly descreases times for loading files
2024-08-14 17:07:10 +03:00
f7f76c4119 fix: explicitly process list of packages
Small workaround to remove debug packages from being processed
2024-08-14 17:07:10 +03:00
88ee300b9e fix: remove trailit slash when loading packages files from a database 2024-08-14 17:07:10 +03:00
6f30c687c2 fix: skip debug packages as well 2024-08-14 17:07:10 +03:00
c023ebe165 docs: update documentation for implicit dependencies resolution 2024-08-14 17:07:10 +03:00
54b99cacfd feat: remove excess dependencies leaves (#128)
This mr improves implicit dependencies processing by reducing tree leaves by using the following algorithm:

* remove paths which belong to any base package
* remove packages which are (opt)dependencies of one of the package which provides same path. It also tries to handle circular dependencies by excluding them from being "satisfied"
* remove packages which are already satisfied by any children path
2024-08-14 17:07:10 +03:00
4f5166ff25 feat: improve lock mechanisms
* improve lock mechanisms

* use /run/ahriman for sockett

* better water
2024-08-14 17:07:10 +03:00
c8afcbf36a feat: implement local reporter mode (#126)
* implement local reporter mode

* simplify watcher class

* review changes

* do not update unknown status

* allow empty key patches via api

* fix some pylint warnings in tests
2024-08-14 17:07:10 +03:00
2b9880bd3c feat: allow to use simplified keys for context
Initial implementation requires explicit context key name to be set.
Though it is still useful sometimes (e.g. if there should be two
variables with the same type), in the most used scenarios internally
only type is required. This commit extends set and get methods to allow
to construct ContextKey from type directly

Also it breaks old keys, since - in order to reduce amount of possible
mistakes - internal classes uses this generation method
2024-08-14 17:07:10 +03:00
3be5cdafe8 feat: add abillity to check broken dependencies (#122)
* implement elf dynamic linking check

* load local database too in pacman wrapper
2024-08-14 17:07:10 +03:00
668be41c3e type: drop MiddlewareType in favour of Middleware builtin 2024-08-14 17:07:10 +03:00
3353daec6d type: fix mypy warn for fresh unixsocket release 2024-08-14 17:07:10 +03:00
eef4d2dd98 type: remove another unused mypy directive 2024-08-14 17:07:10 +03:00
b15161554e build: use requests-unixsocket2 fork
Since requests-2.32.0, the http+unix url scheme is brokek, check
https://github.com/msabramo/requests-unixsocket/issues/73 for more
details
2024-08-14 17:07:10 +03:00
8 changed files with 100 additions and 87 deletions

View File

@ -17,6 +17,7 @@
# You should have received a copy of the GNU General Public License # You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>. # along with this program. If not, see <http://www.gnu.org/licenses/>.
# #
import itertools
import shutil import shutil
import tarfile import tarfile
@ -177,39 +178,48 @@ class Pacman(LazyLogging):
PacmanDatabase(database, self.configuration).sync(force=force) PacmanDatabase(database, self.configuration).sync(force=force)
transaction.release() transaction.release()
def files(self, packages: Iterable[str] | None = None) -> dict[str, set[str]]: def files(self, packages: Iterable[str]) -> dict[str, set[str]]:
""" """
extract list of known packages from the databases extract list of known packages from the databases
Args: Args:
packages(Iterable[str] | None, optional): filter by package names (Default value = None) packages(Iterable[str]): filter by package names
Returns: Returns:
dict[str, set[str]]: map of package name to its list of files dict[str, set[str]]: map of package name to its list of files
""" """
packages = packages or [] def extract(tar: tarfile.TarFile, package_names: dict[str, str]) -> Generator[tuple[str, set[str]], None, None]:
for package_name, version in package_names.items():
def extract(tar: tarfile.TarFile) -> Generator[tuple[str, set[str]], None, None]: path = Path(f"{package_name}-{version}") / "files"
for descriptor in filter(lambda info: info.path.endswith("/files"), tar.getmembers()): try:
package, *_ = str(Path(descriptor.path).parent).rsplit("-", 2) content = tar.extractfile(str(path))
if packages and package not in packages: except KeyError:
continue # skip unused packages # in case if database and its files has been desync somehow, the extractfile will raise
content = tar.extractfile(descriptor) # KeyError because the entry doesn't exist
content = None
if content is None: if content is None:
continue continue
# this is just array of files, however, the directories are with trailing slash, # this is just array of files, however, the directories are with trailing slash,
# which previously has been removed by the conversion to ``pathlib.Path`` # which previously has been removed by the conversion to ``pathlib.Path``
files = {filename.decode("utf8").rstrip().removesuffix("/") for filename in content.readlines()} files = {filename.decode("utf8").rstrip().removesuffix("/") for filename in content.readlines()}
yield package_name, files
yield package, files # sort is required for the following group by operation
descriptors = sorted(
(package for package_name in packages for package in self.package(package_name)),
key=lambda package: package.db.name
)
result: dict[str, set[str]] = {} result: dict[str, set[str]] = {}
for database in self.handle.get_syncdbs(): for database_name, pacman_packages in itertools.groupby(descriptors, lambda package: package.db.name):
database_file = self.repository_paths.pacman / "sync" / f"{database.name}.files.tar.gz" database_file = self.repository_paths.pacman / "sync" / f"{database_name}.files.tar.gz"
if not database_file.is_file(): if not database_file.is_file():
continue # no database file found continue # no database file found
package_names = {package.name: package.version for package in pacman_packages}
with tarfile.open(database_file, "r:gz") as archive: with tarfile.open(database_file, "r:gz") as archive:
result.update(extract(archive)) result.update(extract(archive, package_names))
return result return result

View File

@ -22,4 +22,3 @@ from collections.abc import Awaitable, Callable
HandlerType = Callable[[Request], Awaitable[StreamResponse]] HandlerType = Callable[[Request], Awaitable[StreamResponse]]
MiddlewareType = Callable[[Request, HandlerType], Awaitable[StreamResponse]]

View File

@ -21,6 +21,7 @@ import aiohttp_security
import socket import socket
import types import types
from aiohttp.typedefs import Middleware
from aiohttp.web import Application, Request, StaticResource, StreamResponse, middleware from aiohttp.web import Application, Request, StaticResource, StreamResponse, middleware
from aiohttp_session import setup as setup_session from aiohttp_session import setup as setup_session
from aiohttp_session.cookie_storage import EncryptedCookieStorage from aiohttp_session.cookie_storage import EncryptedCookieStorage
@ -30,7 +31,7 @@ from enum import Enum
from ahriman.core.auth import Auth from ahriman.core.auth import Auth
from ahriman.core.configuration import Configuration from ahriman.core.configuration import Configuration
from ahriman.models.user_access import UserAccess from ahriman.models.user_access import UserAccess
from ahriman.web.middlewares import HandlerType, MiddlewareType from ahriman.web.middlewares import HandlerType
__all__ = ["setup_auth"] __all__ = ["setup_auth"]
@ -84,7 +85,7 @@ class _AuthorizationPolicy(aiohttp_security.AbstractAuthorizationPolicy):
return await self.validator.verify_access(identity, permission, context) return await self.validator.verify_access(identity, permission, context)
def _auth_handler(allow_read_only: bool) -> MiddlewareType: def _auth_handler(allow_read_only: bool) -> Middleware:
""" """
authorization and authentication middleware authorization and authentication middleware
@ -92,7 +93,7 @@ def _auth_handler(allow_read_only: bool) -> MiddlewareType:
allow_read_only: allow allow_read_only: allow
Returns: Returns:
MiddlewareType: built middleware Middleware: built middleware
""" """
@middleware @middleware
async def handle(request: Request, handler: HandlerType) -> StreamResponse: async def handle(request: Request, handler: HandlerType) -> StreamResponse:

View File

@ -20,10 +20,11 @@
import aiohttp_jinja2 import aiohttp_jinja2
import logging import logging
from aiohttp.typedefs import Middleware
from aiohttp.web import HTTPClientError, HTTPException, HTTPMethodNotAllowed, HTTPNoContent, HTTPServerError, \ from aiohttp.web import HTTPClientError, HTTPException, HTTPMethodNotAllowed, HTTPNoContent, HTTPServerError, \
HTTPUnauthorized, Request, StreamResponse, json_response, middleware HTTPUnauthorized, Request, StreamResponse, json_response, middleware
from ahriman.web.middlewares import HandlerType, MiddlewareType from ahriman.web.middlewares import HandlerType
__all__ = ["exception_handler"] __all__ = ["exception_handler"]
@ -43,7 +44,7 @@ def _is_templated_unauthorized(request: Request) -> bool:
and "application/json" not in request.headers.getall("accept", []) and "application/json" not in request.headers.getall("accept", [])
def exception_handler(logger: logging.Logger) -> MiddlewareType: def exception_handler(logger: logging.Logger) -> Middleware:
""" """
exception handler middleware. Just log any exception (except for client ones) exception handler middleware. Just log any exception (except for client ones)
@ -51,7 +52,7 @@ def exception_handler(logger: logging.Logger) -> MiddlewareType:
logger(logging.Logger): class logger logger(logging.Logger): class logger
Returns: Returns:
MiddlewareType: built middleware Middleware: built middleware
Raises: Raises:
HTTPNoContent: OPTIONS method response HTTPNoContent: OPTIONS method response

View File

@ -4,7 +4,7 @@ import pytest
from pathlib import Path from pathlib import Path
from pytest_mock import MockerFixture from pytest_mock import MockerFixture
from typing import Any, TypeVar from typing import Any, TypeVar
from unittest.mock import MagicMock from unittest.mock import MagicMock, PropertyMock
from ahriman.core.alpm.pacman import Pacman from ahriman.core.alpm.pacman import Pacman
from ahriman.core.alpm.remote import AUR from ahriman.core.alpm.remote import AUR
@ -476,6 +476,41 @@ def passwd() -> MagicMock:
return passwd return passwd
@pytest.fixture
def pyalpm_package_ahriman(aur_package_ahriman: AURPackage) -> MagicMock:
"""
mock object for pyalpm package
Args:
aur_package_ahriman(AURPackage): package fixture
Returns:
MagicMock: pyalpm package mock
"""
mock = MagicMock()
db = type(mock).db = MagicMock()
type(mock).base = PropertyMock(return_value=aur_package_ahriman.package_base)
type(mock).builddate = PropertyMock(
return_value=aur_package_ahriman.last_modified.replace(tzinfo=datetime.timezone.utc).timestamp())
type(mock).conflicts = PropertyMock(return_value=aur_package_ahriman.conflicts)
type(db).name = PropertyMock(return_value="aur")
type(mock).depends = PropertyMock(return_value=aur_package_ahriman.depends)
type(mock).desc = PropertyMock(return_value=aur_package_ahriman.description)
type(mock).licenses = PropertyMock(return_value=aur_package_ahriman.license)
type(mock).makedepends = PropertyMock(return_value=aur_package_ahriman.make_depends)
type(mock).name = PropertyMock(return_value=aur_package_ahriman.name)
type(mock).optdepends = PropertyMock(return_value=aur_package_ahriman.opt_depends)
type(mock).checkdepends = PropertyMock(return_value=aur_package_ahriman.check_depends)
type(mock).packager = PropertyMock(return_value="packager")
type(mock).provides = PropertyMock(return_value=aur_package_ahriman.provides)
type(mock).version = PropertyMock(return_value=aur_package_ahriman.version)
type(mock).url = PropertyMock(return_value=aur_package_ahriman.url)
type(mock).groups = PropertyMock(return_value=aur_package_ahriman.groups)
return mock
@pytest.fixture @pytest.fixture
def remote_source() -> RemoteSource: def remote_source() -> RemoteSource:
""" """

View File

@ -1,3 +1,4 @@
import pyalpm
import pytest import pytest
import tarfile import tarfile
@ -175,31 +176,12 @@ def test_database_sync_forced(pacman: Pacman, mocker: MockerFixture) -> None:
sync_mock.assert_called_once_with(force=True) sync_mock.assert_called_once_with(force=True)
def test_files(pacman: Pacman, package_ahriman: Package, mocker: MockerFixture, resource_path_root: Path) -> None: def test_files_package(pacman: Pacman, package_ahriman: Package, pyalpm_package_ahriman: pyalpm.Package,
""" mocker: MockerFixture, resource_path_root: Path) -> None:
must load files from databases
"""
handle_mock = MagicMock()
handle_mock.get_syncdbs.return_value = [MagicMock()]
pacman.handle = handle_mock
tarball = resource_path_root / "core" / "arcanisrepo.files.tar.gz"
with tarfile.open(tarball, "r:gz") as fd:
mocker.patch("pathlib.Path.is_file", return_value=True)
open_mock = mocker.patch("ahriman.core.alpm.pacman.tarfile.open", return_value=fd)
files = pacman.files()
assert len(files) == 2
assert package_ahriman.base in files
assert "usr/bin/ahriman" in files[package_ahriman.base]
open_mock.assert_called_once_with(pytest.helpers.anyvar(int), "r:gz")
def test_files_package(pacman: Pacman, package_ahriman: Package, mocker: MockerFixture,
resource_path_root: Path) -> None:
""" """
must load files only for the specified package must load files only for the specified package
""" """
mocker.patch("ahriman.core.alpm.pacman.Pacman.package", return_value=[pyalpm_package_ahriman])
handle_mock = MagicMock() handle_mock = MagicMock()
handle_mock.get_syncdbs.return_value = [MagicMock()] handle_mock.get_syncdbs.return_value = [MagicMock()]
pacman.handle = handle_mock pacman.handle = handle_mock
@ -210,34 +192,35 @@ def test_files_package(pacman: Pacman, package_ahriman: Package, mocker: MockerF
mocker.patch("pathlib.Path.is_file", return_value=True) mocker.patch("pathlib.Path.is_file", return_value=True)
mocker.patch("ahriman.core.alpm.pacman.tarfile.open", return_value=fd) mocker.patch("ahriman.core.alpm.pacman.tarfile.open", return_value=fd)
files = pacman.files(package_ahriman.base) files = pacman.files([package_ahriman.base])
assert len(files) == 1 assert len(files) == 1
assert package_ahriman.base in files assert package_ahriman.base in files
def test_files_skip(pacman: Pacman, mocker: MockerFixture) -> None: def test_files_skip(pacman: Pacman, pyalpm_package_ahriman: pyalpm.Package, mocker: MockerFixture) -> None:
""" """
must return empty list if no database found must return empty list if no database found
""" """
mocker.patch("ahriman.core.alpm.pacman.Pacman.package", return_value=[pyalpm_package_ahriman])
handle_mock = MagicMock() handle_mock = MagicMock()
handle_mock.get_syncdbs.return_value = [MagicMock()] handle_mock.get_syncdbs.return_value = [MagicMock()]
pacman.handle = handle_mock pacman.handle = handle_mock
mocker.patch("pathlib.Path.is_file", return_value=False) mocker.patch("pathlib.Path.is_file", return_value=False)
assert not pacman.files() assert not pacman.files([pyalpm_package_ahriman.name])
def test_files_no_content(pacman: Pacman, mocker: MockerFixture) -> None: def test_files_no_content(pacman: Pacman, pyalpm_package_ahriman: pyalpm.Package, mocker: MockerFixture) -> None:
""" """
must skip package if no content can be loaded must skip package if no content can be loaded
""" """
mocker.patch("ahriman.core.alpm.pacman.Pacman.package", return_value=[pyalpm_package_ahriman])
handle_mock = MagicMock() handle_mock = MagicMock()
handle_mock.get_syncdbs.return_value = [MagicMock()] handle_mock.get_syncdbs.return_value = [MagicMock()]
pacman.handle = handle_mock pacman.handle = handle_mock
tar_mock = MagicMock() tar_mock = MagicMock()
tar_mock.getmembers.return_value = [MagicMock()]
tar_mock.extractfile.return_value = None tar_mock.extractfile.return_value = None
open_mock = MagicMock() open_mock = MagicMock()
@ -246,7 +229,28 @@ def test_files_no_content(pacman: Pacman, mocker: MockerFixture) -> None:
mocker.patch("pathlib.Path.is_file", return_value=True) mocker.patch("pathlib.Path.is_file", return_value=True)
mocker.patch("ahriman.core.alpm.pacman.tarfile.open", return_value=open_mock) mocker.patch("ahriman.core.alpm.pacman.tarfile.open", return_value=open_mock)
assert not pacman.files() assert not pacman.files([pyalpm_package_ahriman.name])
def test_files_no_entry(pacman: Pacman, pyalpm_package_ahriman: pyalpm.Package, mocker: MockerFixture) -> None:
"""
must skip package if it wasn't found in the archive
"""
mocker.patch("ahriman.core.alpm.pacman.Pacman.package", return_value=[pyalpm_package_ahriman])
handle_mock = MagicMock()
handle_mock.get_syncdbs.return_value = [MagicMock()]
pacman.handle = handle_mock
tar_mock = MagicMock()
tar_mock.extractfile.side_effect = KeyError()
open_mock = MagicMock()
open_mock.__enter__.return_value = tar_mock
mocker.patch("pathlib.Path.is_file", return_value=True)
mocker.patch("ahriman.core.alpm.pacman.tarfile.open", return_value=open_mock)
assert not pacman.files([pyalpm_package_ahriman.name])
def test_package(pacman: Pacman) -> None: def test_package(pacman: Pacman) -> None:

View File

@ -1,4 +1,3 @@
import datetime
import pytest import pytest
from typing import Any from typing import Any
@ -8,7 +7,6 @@ from pytest_mock import MockerFixture
from ahriman import __version__ from ahriman import __version__
from ahriman.core.alpm.pacman import Pacman from ahriman.core.alpm.pacman import Pacman
from ahriman.core.alpm.remote import AUR from ahriman.core.alpm.remote import AUR
from ahriman.models.aur_package import AURPackage
from ahriman.models.build_status import BuildStatus, BuildStatusEnum from ahriman.models.build_status import BuildStatus, BuildStatusEnum
from ahriman.models.counters import Counters from ahriman.models.counters import Counters
from ahriman.models.filesystem_package import FilesystemPackage from ahriman.models.filesystem_package import FilesystemPackage
@ -134,41 +132,6 @@ def pyalpm_handle(pyalpm_package_ahriman: MagicMock) -> MagicMock:
return mock return mock
@pytest.fixture
def pyalpm_package_ahriman(aur_package_ahriman: AURPackage) -> MagicMock:
"""
mock object for pyalpm package
Args:
aur_package_ahriman(AURPackage): package fixture
Returns:
MagicMock: pyalpm package mock
"""
mock = MagicMock()
db = type(mock).db = MagicMock()
type(mock).base = PropertyMock(return_value=aur_package_ahriman.package_base)
type(mock).builddate = PropertyMock(
return_value=aur_package_ahriman.last_modified.replace(tzinfo=datetime.timezone.utc).timestamp())
type(mock).conflicts = PropertyMock(return_value=aur_package_ahriman.conflicts)
type(db).name = PropertyMock(return_value="aur")
type(mock).depends = PropertyMock(return_value=aur_package_ahriman.depends)
type(mock).desc = PropertyMock(return_value=aur_package_ahriman.description)
type(mock).licenses = PropertyMock(return_value=aur_package_ahriman.license)
type(mock).makedepends = PropertyMock(return_value=aur_package_ahriman.make_depends)
type(mock).name = PropertyMock(return_value=aur_package_ahriman.name)
type(mock).optdepends = PropertyMock(return_value=aur_package_ahriman.opt_depends)
type(mock).checkdepends = PropertyMock(return_value=aur_package_ahriman.check_depends)
type(mock).packager = PropertyMock(return_value="packager")
type(mock).provides = PropertyMock(return_value=aur_package_ahriman.provides)
type(mock).version = PropertyMock(return_value=aur_package_ahriman.version)
type(mock).url = PropertyMock(return_value=aur_package_ahriman.url)
type(mock).groups = PropertyMock(return_value=aur_package_ahriman.groups)
return mock
@pytest.fixture @pytest.fixture
def pyalpm_package_description_ahriman(package_description_ahriman: PackageDescription) -> MagicMock: def pyalpm_package_description_ahriman(package_description_ahriman: PackageDescription) -> MagicMock:
""" """