mirror of
https://github.com/arcan1s/ahriman.git
synced 2025-04-24 15:27:17 +00:00
feat: optimize archive reading
Instead of trying to load every database and look for files, this commit introduces the optimization in which, the service loads packages first, groups them by database and load files later. In some cases it significantly descreases times for loading files
This commit is contained in:
parent
fd3c6343f1
commit
f44fa19c42
@ -17,6 +17,7 @@
|
|||||||
# You should have received a copy of the GNU General Public License
|
# You should have received a copy of the GNU General Public License
|
||||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
#
|
#
|
||||||
|
import itertools
|
||||||
import shutil
|
import shutil
|
||||||
import tarfile
|
import tarfile
|
||||||
|
|
||||||
@ -177,39 +178,48 @@ class Pacman(LazyLogging):
|
|||||||
PacmanDatabase(database, self.configuration).sync(force=force)
|
PacmanDatabase(database, self.configuration).sync(force=force)
|
||||||
transaction.release()
|
transaction.release()
|
||||||
|
|
||||||
def files(self, packages: Iterable[str] | None = None) -> dict[str, set[str]]:
|
def files(self, packages: Iterable[str]) -> dict[str, set[str]]:
|
||||||
"""
|
"""
|
||||||
extract list of known packages from the databases
|
extract list of known packages from the databases
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
packages(Iterable[str] | None, optional): filter by package names (Default value = None)
|
packages(Iterable[str]): filter by package names
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
dict[str, set[str]]: map of package name to its list of files
|
dict[str, set[str]]: map of package name to its list of files
|
||||||
"""
|
"""
|
||||||
packages = packages or []
|
def extract(tar: tarfile.TarFile, package_names: dict[str, str]) -> Generator[tuple[str, set[str]], None, None]:
|
||||||
|
for package_name, version in package_names.items():
|
||||||
def extract(tar: tarfile.TarFile) -> Generator[tuple[str, set[str]], None, None]:
|
path = Path(f"{package_name}-{version}") / "files"
|
||||||
for descriptor in filter(lambda info: info.path.endswith("/files"), tar.getmembers()):
|
try:
|
||||||
package, *_ = str(Path(descriptor.path).parent).rsplit("-", 2)
|
content = tar.extractfile(str(path))
|
||||||
if packages and package not in packages:
|
except KeyError:
|
||||||
continue # skip unused packages
|
# in case if database and its files has been desync somehow, the extractfile will raise
|
||||||
content = tar.extractfile(descriptor)
|
# KeyError because the entry doesn't exist
|
||||||
|
content = None
|
||||||
if content is None:
|
if content is None:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# this is just array of files, however, the directories are with trailing slash,
|
# this is just array of files, however, the directories are with trailing slash,
|
||||||
# which previously has been removed by the conversion to ``pathlib.Path``
|
# which previously has been removed by the conversion to ``pathlib.Path``
|
||||||
files = {filename.decode("utf8").rstrip().removesuffix("/") for filename in content.readlines()}
|
files = {filename.decode("utf8").rstrip().removesuffix("/") for filename in content.readlines()}
|
||||||
|
yield package_name, files
|
||||||
|
|
||||||
yield package, files
|
# sort is required for the following group by operation
|
||||||
|
descriptors = sorted(
|
||||||
|
(package for package_name in packages for package in self.package(package_name)),
|
||||||
|
key=lambda package: package.db.name
|
||||||
|
)
|
||||||
|
|
||||||
result: dict[str, set[str]] = {}
|
result: dict[str, set[str]] = {}
|
||||||
for database in self.handle.get_syncdbs():
|
for database_name, pacman_packages in itertools.groupby(descriptors, lambda package: package.db.name):
|
||||||
database_file = self.repository_paths.pacman / "sync" / f"{database.name}.files.tar.gz"
|
database_file = self.repository_paths.pacman / "sync" / f"{database_name}.files.tar.gz"
|
||||||
if not database_file.is_file():
|
if not database_file.is_file():
|
||||||
continue # no database file found
|
continue # no database file found
|
||||||
|
|
||||||
|
package_names = {package.name: package.version for package in pacman_packages}
|
||||||
with tarfile.open(database_file, "r:gz") as archive:
|
with tarfile.open(database_file, "r:gz") as archive:
|
||||||
result.update(extract(archive))
|
result.update(extract(archive, package_names))
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
@ -4,7 +4,7 @@ import pytest
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from pytest_mock import MockerFixture
|
from pytest_mock import MockerFixture
|
||||||
from typing import Any, TypeVar
|
from typing import Any, TypeVar
|
||||||
from unittest.mock import MagicMock
|
from unittest.mock import MagicMock, PropertyMock
|
||||||
|
|
||||||
from ahriman.core.alpm.pacman import Pacman
|
from ahriman.core.alpm.pacman import Pacman
|
||||||
from ahriman.core.alpm.remote import AUR
|
from ahriman.core.alpm.remote import AUR
|
||||||
@ -476,6 +476,41 @@ def passwd() -> MagicMock:
|
|||||||
return passwd
|
return passwd
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def pyalpm_package_ahriman(aur_package_ahriman: AURPackage) -> MagicMock:
|
||||||
|
"""
|
||||||
|
mock object for pyalpm package
|
||||||
|
|
||||||
|
Args:
|
||||||
|
aur_package_ahriman(AURPackage): package fixture
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
MagicMock: pyalpm package mock
|
||||||
|
"""
|
||||||
|
mock = MagicMock()
|
||||||
|
db = type(mock).db = MagicMock()
|
||||||
|
|
||||||
|
type(mock).base = PropertyMock(return_value=aur_package_ahriman.package_base)
|
||||||
|
type(mock).builddate = PropertyMock(
|
||||||
|
return_value=aur_package_ahriman.last_modified.replace(tzinfo=datetime.timezone.utc).timestamp())
|
||||||
|
type(mock).conflicts = PropertyMock(return_value=aur_package_ahriman.conflicts)
|
||||||
|
type(db).name = PropertyMock(return_value="aur")
|
||||||
|
type(mock).depends = PropertyMock(return_value=aur_package_ahriman.depends)
|
||||||
|
type(mock).desc = PropertyMock(return_value=aur_package_ahriman.description)
|
||||||
|
type(mock).licenses = PropertyMock(return_value=aur_package_ahriman.license)
|
||||||
|
type(mock).makedepends = PropertyMock(return_value=aur_package_ahriman.make_depends)
|
||||||
|
type(mock).name = PropertyMock(return_value=aur_package_ahriman.name)
|
||||||
|
type(mock).optdepends = PropertyMock(return_value=aur_package_ahriman.opt_depends)
|
||||||
|
type(mock).checkdepends = PropertyMock(return_value=aur_package_ahriman.check_depends)
|
||||||
|
type(mock).packager = PropertyMock(return_value="packager")
|
||||||
|
type(mock).provides = PropertyMock(return_value=aur_package_ahriman.provides)
|
||||||
|
type(mock).version = PropertyMock(return_value=aur_package_ahriman.version)
|
||||||
|
type(mock).url = PropertyMock(return_value=aur_package_ahriman.url)
|
||||||
|
type(mock).groups = PropertyMock(return_value=aur_package_ahriman.groups)
|
||||||
|
|
||||||
|
return mock
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def remote_source() -> RemoteSource:
|
def remote_source() -> RemoteSource:
|
||||||
"""
|
"""
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
import pyalpm
|
||||||
import pytest
|
import pytest
|
||||||
import tarfile
|
import tarfile
|
||||||
|
|
||||||
@ -175,31 +176,12 @@ def test_database_sync_forced(pacman: Pacman, mocker: MockerFixture) -> None:
|
|||||||
sync_mock.assert_called_once_with(force=True)
|
sync_mock.assert_called_once_with(force=True)
|
||||||
|
|
||||||
|
|
||||||
def test_files(pacman: Pacman, package_ahriman: Package, mocker: MockerFixture, resource_path_root: Path) -> None:
|
def test_files_package(pacman: Pacman, package_ahriman: Package, pyalpm_package_ahriman: pyalpm.Package,
|
||||||
"""
|
mocker: MockerFixture, resource_path_root: Path) -> None:
|
||||||
must load files from databases
|
|
||||||
"""
|
|
||||||
handle_mock = MagicMock()
|
|
||||||
handle_mock.get_syncdbs.return_value = [MagicMock()]
|
|
||||||
pacman.handle = handle_mock
|
|
||||||
tarball = resource_path_root / "core" / "arcanisrepo.files.tar.gz"
|
|
||||||
|
|
||||||
with tarfile.open(tarball, "r:gz") as fd:
|
|
||||||
mocker.patch("pathlib.Path.is_file", return_value=True)
|
|
||||||
open_mock = mocker.patch("ahriman.core.alpm.pacman.tarfile.open", return_value=fd)
|
|
||||||
|
|
||||||
files = pacman.files()
|
|
||||||
assert len(files) == 2
|
|
||||||
assert package_ahriman.base in files
|
|
||||||
assert "usr/bin/ahriman" in files[package_ahriman.base]
|
|
||||||
open_mock.assert_called_once_with(pytest.helpers.anyvar(int), "r:gz")
|
|
||||||
|
|
||||||
|
|
||||||
def test_files_package(pacman: Pacman, package_ahriman: Package, mocker: MockerFixture,
|
|
||||||
resource_path_root: Path) -> None:
|
|
||||||
"""
|
"""
|
||||||
must load files only for the specified package
|
must load files only for the specified package
|
||||||
"""
|
"""
|
||||||
|
mocker.patch("ahriman.core.alpm.pacman.Pacman.package", return_value=[pyalpm_package_ahriman])
|
||||||
handle_mock = MagicMock()
|
handle_mock = MagicMock()
|
||||||
handle_mock.get_syncdbs.return_value = [MagicMock()]
|
handle_mock.get_syncdbs.return_value = [MagicMock()]
|
||||||
pacman.handle = handle_mock
|
pacman.handle = handle_mock
|
||||||
@ -210,34 +192,35 @@ def test_files_package(pacman: Pacman, package_ahriman: Package, mocker: MockerF
|
|||||||
mocker.patch("pathlib.Path.is_file", return_value=True)
|
mocker.patch("pathlib.Path.is_file", return_value=True)
|
||||||
mocker.patch("ahriman.core.alpm.pacman.tarfile.open", return_value=fd)
|
mocker.patch("ahriman.core.alpm.pacman.tarfile.open", return_value=fd)
|
||||||
|
|
||||||
files = pacman.files(package_ahriman.base)
|
files = pacman.files([package_ahriman.base])
|
||||||
assert len(files) == 1
|
assert len(files) == 1
|
||||||
assert package_ahriman.base in files
|
assert package_ahriman.base in files
|
||||||
|
|
||||||
|
|
||||||
def test_files_skip(pacman: Pacman, mocker: MockerFixture) -> None:
|
def test_files_skip(pacman: Pacman, pyalpm_package_ahriman: pyalpm.Package, mocker: MockerFixture) -> None:
|
||||||
"""
|
"""
|
||||||
must return empty list if no database found
|
must return empty list if no database found
|
||||||
"""
|
"""
|
||||||
|
mocker.patch("ahriman.core.alpm.pacman.Pacman.package", return_value=[pyalpm_package_ahriman])
|
||||||
handle_mock = MagicMock()
|
handle_mock = MagicMock()
|
||||||
handle_mock.get_syncdbs.return_value = [MagicMock()]
|
handle_mock.get_syncdbs.return_value = [MagicMock()]
|
||||||
pacman.handle = handle_mock
|
pacman.handle = handle_mock
|
||||||
|
|
||||||
mocker.patch("pathlib.Path.is_file", return_value=False)
|
mocker.patch("pathlib.Path.is_file", return_value=False)
|
||||||
|
|
||||||
assert not pacman.files()
|
assert not pacman.files([pyalpm_package_ahriman.name])
|
||||||
|
|
||||||
|
|
||||||
def test_files_no_content(pacman: Pacman, mocker: MockerFixture) -> None:
|
def test_files_no_content(pacman: Pacman, pyalpm_package_ahriman: pyalpm.Package, mocker: MockerFixture) -> None:
|
||||||
"""
|
"""
|
||||||
must skip package if no content can be loaded
|
must skip package if no content can be loaded
|
||||||
"""
|
"""
|
||||||
|
mocker.patch("ahriman.core.alpm.pacman.Pacman.package", return_value=[pyalpm_package_ahriman])
|
||||||
handle_mock = MagicMock()
|
handle_mock = MagicMock()
|
||||||
handle_mock.get_syncdbs.return_value = [MagicMock()]
|
handle_mock.get_syncdbs.return_value = [MagicMock()]
|
||||||
pacman.handle = handle_mock
|
pacman.handle = handle_mock
|
||||||
|
|
||||||
tar_mock = MagicMock()
|
tar_mock = MagicMock()
|
||||||
tar_mock.getmembers.return_value = [MagicMock()]
|
|
||||||
tar_mock.extractfile.return_value = None
|
tar_mock.extractfile.return_value = None
|
||||||
|
|
||||||
open_mock = MagicMock()
|
open_mock = MagicMock()
|
||||||
@ -246,7 +229,28 @@ def test_files_no_content(pacman: Pacman, mocker: MockerFixture) -> None:
|
|||||||
mocker.patch("pathlib.Path.is_file", return_value=True)
|
mocker.patch("pathlib.Path.is_file", return_value=True)
|
||||||
mocker.patch("ahriman.core.alpm.pacman.tarfile.open", return_value=open_mock)
|
mocker.patch("ahriman.core.alpm.pacman.tarfile.open", return_value=open_mock)
|
||||||
|
|
||||||
assert not pacman.files()
|
assert not pacman.files([pyalpm_package_ahriman.name])
|
||||||
|
|
||||||
|
|
||||||
|
def test_files_no_entry(pacman: Pacman, pyalpm_package_ahriman: pyalpm.Package, mocker: MockerFixture) -> None:
|
||||||
|
"""
|
||||||
|
must skip package if it wasn't found in the archive
|
||||||
|
"""
|
||||||
|
mocker.patch("ahriman.core.alpm.pacman.Pacman.package", return_value=[pyalpm_package_ahriman])
|
||||||
|
handle_mock = MagicMock()
|
||||||
|
handle_mock.get_syncdbs.return_value = [MagicMock()]
|
||||||
|
pacman.handle = handle_mock
|
||||||
|
|
||||||
|
tar_mock = MagicMock()
|
||||||
|
tar_mock.extractfile.side_effect = KeyError()
|
||||||
|
|
||||||
|
open_mock = MagicMock()
|
||||||
|
open_mock.__enter__.return_value = tar_mock
|
||||||
|
|
||||||
|
mocker.patch("pathlib.Path.is_file", return_value=True)
|
||||||
|
mocker.patch("ahriman.core.alpm.pacman.tarfile.open", return_value=open_mock)
|
||||||
|
|
||||||
|
assert not pacman.files([pyalpm_package_ahriman.name])
|
||||||
|
|
||||||
|
|
||||||
def test_package(pacman: Pacman) -> None:
|
def test_package(pacman: Pacman) -> None:
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
import datetime
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from typing import Any
|
from typing import Any
|
||||||
@ -8,7 +7,6 @@ from pytest_mock import MockerFixture
|
|||||||
from ahriman import __version__
|
from ahriman import __version__
|
||||||
from ahriman.core.alpm.pacman import Pacman
|
from ahriman.core.alpm.pacman import Pacman
|
||||||
from ahriman.core.alpm.remote import AUR
|
from ahriman.core.alpm.remote import AUR
|
||||||
from ahriman.models.aur_package import AURPackage
|
|
||||||
from ahriman.models.build_status import BuildStatus, BuildStatusEnum
|
from ahriman.models.build_status import BuildStatus, BuildStatusEnum
|
||||||
from ahriman.models.counters import Counters
|
from ahriman.models.counters import Counters
|
||||||
from ahriman.models.filesystem_package import FilesystemPackage
|
from ahriman.models.filesystem_package import FilesystemPackage
|
||||||
@ -134,41 +132,6 @@ def pyalpm_handle(pyalpm_package_ahriman: MagicMock) -> MagicMock:
|
|||||||
return mock
|
return mock
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def pyalpm_package_ahriman(aur_package_ahriman: AURPackage) -> MagicMock:
|
|
||||||
"""
|
|
||||||
mock object for pyalpm package
|
|
||||||
|
|
||||||
Args:
|
|
||||||
aur_package_ahriman(AURPackage): package fixture
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
MagicMock: pyalpm package mock
|
|
||||||
"""
|
|
||||||
mock = MagicMock()
|
|
||||||
db = type(mock).db = MagicMock()
|
|
||||||
|
|
||||||
type(mock).base = PropertyMock(return_value=aur_package_ahriman.package_base)
|
|
||||||
type(mock).builddate = PropertyMock(
|
|
||||||
return_value=aur_package_ahriman.last_modified.replace(tzinfo=datetime.timezone.utc).timestamp())
|
|
||||||
type(mock).conflicts = PropertyMock(return_value=aur_package_ahriman.conflicts)
|
|
||||||
type(db).name = PropertyMock(return_value="aur")
|
|
||||||
type(mock).depends = PropertyMock(return_value=aur_package_ahriman.depends)
|
|
||||||
type(mock).desc = PropertyMock(return_value=aur_package_ahriman.description)
|
|
||||||
type(mock).licenses = PropertyMock(return_value=aur_package_ahriman.license)
|
|
||||||
type(mock).makedepends = PropertyMock(return_value=aur_package_ahriman.make_depends)
|
|
||||||
type(mock).name = PropertyMock(return_value=aur_package_ahriman.name)
|
|
||||||
type(mock).optdepends = PropertyMock(return_value=aur_package_ahriman.opt_depends)
|
|
||||||
type(mock).checkdepends = PropertyMock(return_value=aur_package_ahriman.check_depends)
|
|
||||||
type(mock).packager = PropertyMock(return_value="packager")
|
|
||||||
type(mock).provides = PropertyMock(return_value=aur_package_ahriman.provides)
|
|
||||||
type(mock).version = PropertyMock(return_value=aur_package_ahriman.version)
|
|
||||||
type(mock).url = PropertyMock(return_value=aur_package_ahriman.url)
|
|
||||||
type(mock).groups = PropertyMock(return_value=aur_package_ahriman.groups)
|
|
||||||
|
|
||||||
return mock
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def pyalpm_package_description_ahriman(package_description_ahriman: PackageDescription) -> MagicMock:
|
def pyalpm_package_description_ahriman(package_description_ahriman: PackageDescription) -> MagicMock:
|
||||||
"""
|
"""
|
||||||
|
Binary file not shown.
Loading…
Reference in New Issue
Block a user