multipart upload with signatures as well as safe file save

This commit is contained in:
Evgenii Alekseev 2023-08-17 04:00:28 +03:00
parent d918022840
commit 37e870920b
13 changed files with 318 additions and 91 deletions

View File

@ -20,6 +20,14 @@ ahriman.core.upload.http\_upload module
:no-undoc-members:
:show-inheritance:
ahriman.core.upload.remote\_service module
------------------------------------------
.. automodule:: ahriman.core.upload.remote_service
:members:
:no-undoc-members:
:show-inheritance:
ahriman.core.upload.rsync module
--------------------------------

View File

@ -36,6 +36,14 @@ ahriman.web.schemas.error\_schema module
:no-undoc-members:
:show-inheritance:
ahriman.web.schemas.file\_schema module
---------------------------------------
.. automodule:: ahriman.web.schemas.file_schema
:members:
:no-undoc-members:
:show-inheritance:
ahriman.web.schemas.internal\_status\_schema module
---------------------------------------------------

View File

@ -68,6 +68,14 @@ ahriman.web.views.service.update module
:no-undoc-members:
:show-inheritance:
ahriman.web.views.service.upload module
---------------------------------------
.. automodule:: ahriman.web.views.service.upload
:members:
:no-undoc-members:
:show-inheritance:
Module contents
---------------

View File

@ -855,6 +855,27 @@ In addition, the following settings are recommended for workers:
[remote-call]
wait_timeout = 0
Repository and packages signing
"""""""""""""""""""""""""""""""
You can sign packages on worker nodes and then signatures will be synced to master node. In order to do so, you need to configure worker node as following, e.g.:
.. code-block:: ini
[sign]
target = package
key = 8BE91E5A773FB48AC05CC1EDBED105AED6246B39
Note, however, that in this case, signatures will not be validated on master node and just will be copied to repository tree.
If you would like to sign only database files (aka repository sign), it has to be configured on master node only as usual, e.g.:
.. code-block:: ini
[sign]
target = repository
key = 8BE91E5A773FB48AC05CC1EDBED105AED6246B39
Double node minimal docker example
""""""""""""""""""""""""""""""""""

View File

@ -101,6 +101,19 @@ class GPG(LazyLogging):
default_key = configuration.get("sign", "key") if targets else None
return targets, default_key
@staticmethod
def signature(filepath: Path) -> Path:
"""
generate signature name for the file
Args:
filepath(Path): path to the file which will be signed
Returns:
str: path to signature file
"""
return filepath.parent / f"{filepath.name}.sig"
def key_download(self, server: str, key: str) -> str:
"""
download key from public PGP server
@ -179,11 +192,11 @@ class GPG(LazyLogging):
*GPG.sign_command(path, key),
exception=BuildError(path.name),
logger=self.logger)
return [path, path.parent / f"{path.name}.sig"]
return [path, self.signature(path)]
def process_sign_package(self, path: Path, packager_key: str | None) -> list[Path]:
"""
sign package if required by configuration
sign package if required by configuration and signature doesn't exist
Args:
path(Path): path to file to sign
@ -192,6 +205,10 @@ class GPG(LazyLogging):
Returns:
list[Path]: list of generated files including original file
"""
if (signature := self.signature(path)).is_file():
# the file was already signed before, just use its signature
return [path, signature]
if SignSettings.Packages not in self.targets:
return [path]

View File

@ -37,6 +37,10 @@ from ahriman.models.package import Package
from ahriman.models.user import User
# filename, file, content-type, headers
MultipartType = tuple[str, IO[bytes], str, dict[str, str]]
class WebClient(Client, LazyLogging):
"""
build status reporter web client
@ -188,9 +192,10 @@ class WebClient(Client, LazyLogging):
}
self.make_request("POST", self._login_url, json=payload, session=session)
def make_request(self, method: Literal["DELETE", "GET", "POST"], url: str,
params: list[tuple[str, str]] | None = None, json: dict[str, Any] | None = None,
files: dict[str, tuple[str, IO[bytes], str, dict[str, str]]] | None = None,
def make_request(self, method: Literal["DELETE", "GET", "POST"], url: str, *,
params: list[tuple[str, str]] | None = None,
json: dict[str, Any] | None = None,
files: dict[str, MultipartType] | None = None,
session: requests.Session | None = None) -> requests.Response | None:
"""
perform request with specified parameters
@ -200,8 +205,7 @@ class WebClient(Client, LazyLogging):
url(str): remote url to call
params(list[tuple[str, str]] | None, optional): request query parameters (Default value = None)
json(dict[str, Any] | None, optional): request json parameters (Default value = None)
files(dict[str, tuple[str, IO, str, dict[str, str]]] | None, optional): multipart upload
(Default value = None)
files(dict[str, MultipartType] | None, optional): multipart upload (Default value = None)
session(requests.Session | None, optional): session object if any (Default value = None)
Returns:

View File

@ -21,10 +21,10 @@ import requests
from functools import cached_property
from pathlib import Path
from typing import IO
from ahriman.core.configuration import Configuration
from ahriman.core.status.web_client import WebClient
from ahriman.core.sign.gpg import GPG
from ahriman.core.status.web_client import MultipartType, WebClient
from ahriman.core.upload.http_upload import HttpUpload
from ahriman.models.package import Package
@ -67,17 +67,31 @@ class RemoteService(HttpUpload):
path(Path): local path to sync
package(Package): package to upload
"""
def upload(package_path: Path, signature_path: Path | None) -> None:
files: dict[str, MultipartType] = {}
try:
# package part always persists
files["package"] = package_path.name, package_path.open("rb"), "application/octet-stream", {}
# signature part is optional
if signature_path is not None:
files["signature"] = signature_path.name, signature_path.open("rb"), "application/octet-stream", {}
self._request("POST", f"{self.client.address}/api/v1/service/upload", files=files)
finally:
for _, fd, _, _ in files.values():
fd.close()
for key, descriptor in package.packages.items():
if descriptor.filename is None:
self.logger.warning("package %s of %s doesn't have filename set", key, package.base)
continue
with (path / descriptor.filename).open("rb") as archive:
# filename, file, content-type, headers
part: tuple[str, IO[bytes], str, dict[str, str]] = (
descriptor.filename, archive, "application/octet-stream", {}
)
self._request("POST", f"{self.client.address}/api/v1/service/upload", files={"archive": part})
archive = path / descriptor.filename
maybe_signature_path = GPG.signature(archive)
signature = maybe_signature_path if maybe_signature_path.is_file() else None
upload(archive, signature)
def sync(self, path: Path, built_packages: list[Package]) -> None:
"""

View File

@ -279,7 +279,7 @@ def package_like(filename: Path) -> bool:
bool: True in case if name contains ``.pkg.`` and not signature, False otherwise
"""
name = filename.name
return ".pkg." in name and not name.endswith(".sig")
return not name.startswith(".") and ".pkg." in name and not name.endswith(".sig")
def parse_version(version: str) -> tuple[str | None, str, str]:

View File

@ -40,6 +40,56 @@ class UploadView(BaseView):
POST_PERMISSION = UserAccess.Full
@staticmethod
async def save_file(part: BodyPartReader, target: Path, *, max_body_size: int | None = None) -> tuple[str, Path]:
"""
save file to local cache
Args:
part(BodyPartReader): multipart part to be saved
target(Path): path to directory to which file should be saved
max_body_size(int | None, optional): max body size in bytes (Default value = None)
Returns:
tuple[str, Path]: map of received filename to its local path
Raises:
HTTPBadRequest: if bad data is supplied
"""
archive_name = part.filename
if archive_name is None:
raise HTTPBadRequest(reason="Filename must be set")
# some magic inside. We would like to make sure that passed filename is filename
# without slashes, dots, etc
if Path(archive_name).resolve().name != archive_name:
raise HTTPBadRequest(reason="Filename must be valid archive name")
current_size = 0
# in order to handle errors automatically we create temporary file for long operation (transfer)
# and then copy it to valid location
with tempfile.NamedTemporaryFile() as cache:
while True:
chunk = await part.read_chunk()
if not chunk:
break
current_size += len(chunk)
if max_body_size is not None and current_size > max_body_size:
raise HTTPBadRequest(reason="Body part is too large")
cache.write(chunk)
cache.seek(0) # reset file position
# and now copy temporary file to target location as hidden file
# we put it as hidden in order to make sure that it will not be handled during some random process
temporary_output = target / f".{archive_name}"
with temporary_output.open("wb") as archive:
shutil.copyfileobj(cache, archive)
return archive_name, temporary_output
@aiohttp_apispec.docs(
tags=["Actions"],
summary="Upload package",
@ -72,42 +122,23 @@ class UploadView(BaseView):
except Exception as e:
raise HTTPBadRequest(reason=str(e))
part = await reader.next()
max_body_size = self.configuration.getint("web", "max_body_size", fallback=None)
target = self.configuration.repository_paths.packages
files = []
while (part := await reader.next()) is not None:
if not isinstance(part, BodyPartReader):
raise HTTPBadRequest(reason="Invalid multipart message received")
if part.name != "archive":
raise HTTPBadRequest(reason="Multipart field isn't archive")
if part.name not in ("package", "signature"):
raise HTTPBadRequest(reason="Multipart field isn't package or signature")
archive_name = part.filename
if archive_name is None:
raise HTTPBadRequest(reason="Filename must be set") # pragma: no cover
# some magic inside. We would like to make sure that passed filename is filename
# without slashes, dots, etc
if Path(archive_name).resolve().name != archive_name:
raise HTTPBadRequest(reason="Filename must be valid archive name")
files.append(await self.save_file(part, target, max_body_size=max_body_size))
max_body_size = self.configuration.getint("web", "max_body_size", fallback=None)
current_size = 0
# in order to handle errors automatically we create temporary file for long operation (transfer) and then copy
# it to valid location
with tempfile.NamedTemporaryFile() as cache:
while True:
chunk = await part.read_chunk()
if not chunk:
break
current_size += len(chunk)
if max_body_size is not None and current_size > max_body_size:
raise HTTPBadRequest(reason="Body part is too large")
cache.write(chunk)
# and now copy temporary file to correct location
cache.seek(0) # reset file position
output = self.configuration.repository_paths.packages / archive_name
with output.open("wb") as archive:
shutil.copyfileobj(cache, archive)
# and now we can rename files, which is relatively fast operation
# it is probably good way to call lock here, however
for filename, current_location in files:
target_location = current_location.parent / filename
current_location.rename(target_location)
raise HTTPCreated()

View File

@ -76,6 +76,13 @@ def test_sign_options(configuration: Configuration) -> None:
assert default_key == "default-key"
def test_signature() -> None:
"""
must correctly generate the signature path
"""
assert GPG.signature(Path("path") / "to" / "package.tar.xz") == Path("path") / "to" / "package.tar.xz.sig"
def test_key_download(gpg: GPG, mocker: MockerFixture) -> None:
"""
must download the key from public server
@ -222,6 +229,18 @@ def test_process_sign_package_skip_4(gpg: GPG, mocker: MockerFixture) -> None:
process_mock.assert_not_called()
def test_process_sign_package_skip_already_signed(gpg_with_key: GPG, mocker: MockerFixture) -> None:
"""
must not sign package if it was already signed
"""
result = [Path("a"), Path("a.sig")]
mocker.patch("pathlib.Path.is_file", return_value=True)
process_mock = mocker.patch("ahriman.core.sign.gpg.GPG.process")
assert gpg_with_key.process_sign_package(Path("a"), gpg_with_key.default_key) == result
process_mock.assert_not_called()
def test_process_sign_repository_1(gpg_with_key: GPG, mocker: MockerFixture) -> None:
"""
must sign repository

View File

@ -273,6 +273,15 @@ def test_package_like(package_ahriman: Package) -> None:
assert package_like(package_ahriman.packages[package_ahriman.base].filepath)
def test_package_like_hidden(package_ahriman: Package) -> None:
"""
package_like must return false for hidden files
"""
package_file = package_ahriman.packages[package_ahriman.base].filepath
hidden_file = package_file.parent / f".{package_file.name}"
assert not package_like(hidden_file)
def test_package_like_sig(package_ahriman: Package) -> None:
"""
package_like must return false for signature files

View File

@ -2,6 +2,7 @@ import pytest
from pathlib import Path
from pytest_mock import MockerFixture
from unittest.mock import MagicMock, call as MockCall
from ahriman.core.upload.remote_service import RemoteService
from ahriman.models.package import Package
@ -20,20 +21,43 @@ def test_package_upload(remote_service: RemoteService, package_ahriman: Package,
"""
must upload package to remote host
"""
open_mock = mocker.patch("pathlib.Path.open")
mocker.patch("pathlib.Path.is_file", return_value=False)
file_mock = MagicMock()
open_mock = mocker.patch("pathlib.Path.open", return_value=file_mock)
upload_mock = mocker.patch("ahriman.core.upload.http_upload.HttpUpload._request")
filename = package_ahriman.packages[package_ahriman.base].filename
remote_service.sync(Path("local"), [package_ahriman])
open_mock.assert_called_once_with("rb")
file_mock.close.assert_called_once()
upload_mock.assert_called_once_with("POST", f"{remote_service.client.address}/api/v1/service/upload", files={
"archive": (filename, pytest.helpers.anyvar(int), "application/octet-stream", {})
"package": (filename, pytest.helpers.anyvar(int), "application/octet-stream", {})
})
def test_package_upload_no_filename(
remote_service: RemoteService,
package_ahriman: Package,
def test_package_upload_with_signature(remote_service: RemoteService, package_ahriman: Package,
mocker: MockerFixture) -> None:
"""
must upload package to remote host with signatures
"""
mocker.patch("pathlib.Path.is_file", return_value=True)
file_mock = MagicMock()
open_mock = mocker.patch("pathlib.Path.open", return_value=file_mock)
upload_mock = mocker.patch("ahriman.core.upload.http_upload.HttpUpload._request")
filename = package_ahriman.packages[package_ahriman.base].filename
remote_service.sync(Path("local"), [package_ahriman])
open_mock.assert_has_calls([MockCall("rb"), MockCall("rb")])
file_mock.close.assert_has_calls([MockCall(), MockCall()])
upload_mock.assert_called_once_with(
"POST", f"{remote_service.client.address}/api/v1/service/upload", files={
"package": (filename, pytest.helpers.anyvar(int), "application/octet-stream", {}),
"signature": (f"{filename}.sig", pytest.helpers.anyvar(int), "application/octet-stream", {})
}
)
def test_package_upload_no_filename(remote_service: RemoteService, package_ahriman: Package,
mocker: MockerFixture) -> None:
"""
must skip upload if no filename set

View File

@ -2,9 +2,13 @@ import pytest
from aiohttp import FormData
from aiohttp.test_utils import TestClient
from aiohttp.web import HTTPBadRequest
from io import BytesIO
from pathlib import Path
from pytest_mock import MockerFixture
from unittest.mock import AsyncMock, MagicMock, call as MockCall
from ahriman.models.repository_paths import RepositoryPaths
from ahriman.models.user_access import UserAccess
from ahriman.web.views.service.upload import UploadView
@ -18,21 +22,109 @@ async def test_get_permission() -> None:
assert await UploadView.get_permission(request) == UserAccess.Full
async def test_post(client: TestClient, mocker: MockerFixture) -> None:
async def test_save_file(mocker: MockerFixture) -> None:
"""
must correctly save file
"""
part_mock = MagicMock()
part_mock.filename = "filename"
part_mock.read_chunk = AsyncMock(side_effect=[b"content", None])
tempfile_mock = mocker.patch("tempfile.NamedTemporaryFile")
file_mock = MagicMock()
tempfile_mock.return_value.__enter__.return_value = file_mock
open_mock = mocker.patch("pathlib.Path.open")
copy_mock = mocker.patch("shutil.copyfileobj")
local = Path("local")
assert await UploadView.save_file(part_mock, local, max_body_size=None) == \
(part_mock.filename, local / f".{part_mock.filename}")
file_mock.write.assert_called_once_with(b"content")
open_mock.assert_called_once_with("wb")
copy_mock.assert_called_once_with(pytest.helpers.anyvar(int), pytest.helpers.anyvar(int))
async def test_save_file_no_filename() -> None:
"""
must raise exception on missing filename
"""
part_mock = MagicMock()
part_mock.filename = None
with pytest.raises(HTTPBadRequest):
await UploadView.save_file(part_mock, Path("local"), max_body_size=None)
async def test_save_file_invalid_filename() -> None:
"""
must raise exception on invalid filename
"""
part_mock = MagicMock()
part_mock.filename = ".."
with pytest.raises(HTTPBadRequest):
await UploadView.save_file(part_mock, Path("local"), max_body_size=None)
async def test_save_file_too_big() -> None:
"""
must raise exception on too big file
"""
part_mock = MagicMock()
part_mock.filename = "filename"
part_mock.read_chunk = AsyncMock(side_effect=[b"content", None])
with pytest.raises(HTTPBadRequest):
await UploadView.save_file(part_mock, Path("local"), max_body_size=0)
async def test_post(client: TestClient, repository_paths: RepositoryPaths, mocker: MockerFixture) -> None:
"""
must process file upload via http
"""
open_mock = mocker.patch("pathlib.Path.open")
copy_mock = mocker.patch("shutil.copyfileobj")
local = Path("local")
save_mock = mocker.patch("ahriman.web.views.service.upload.UploadView.save_file",
side_effect=AsyncMock(return_value=("filename", local / ".filename")))
rename_mock = mocker.patch("pathlib.Path.rename")
# no content validation here because it has invalid schema
data = FormData()
data.add_field("archive", BytesIO(b"content"), filename="filename", content_type="application/octet-stream")
data.add_field("package", BytesIO(b"content"), filename="filename", content_type="application/octet-stream")
response = await client.post("/api/v1/service/upload", data=data)
assert response.ok
open_mock.assert_called_once_with("wb")
copy_mock.assert_called_once_with(pytest.helpers.anyvar(int), pytest.helpers.anyvar(int))
save_mock.assert_called_once_with(pytest.helpers.anyvar(int), repository_paths.packages, max_body_size=None)
rename_mock.assert_called_once_with(local / "filename")
async def test_post_with_sig(client: TestClient, repository_paths: RepositoryPaths, mocker: MockerFixture) -> None:
"""
must process file upload with signature via http
"""
local = Path("local")
save_mock = mocker.patch("ahriman.web.views.service.upload.UploadView.save_file",
side_effect=AsyncMock(side_effect=[
("filename", local / ".filename"),
("filename.sig", local / ".filename.sig"),
]))
rename_mock = mocker.patch("pathlib.Path.rename")
# no content validation here because it has invalid schema
data = FormData()
data.add_field("package", BytesIO(b"content"), filename="filename")
data.add_field("signature", BytesIO(b"sig"), filename="filename.sig")
response = await client.post("/api/v1/service/upload", data=data)
assert response.ok
save_mock.assert_has_calls([
MockCall(pytest.helpers.anyvar(int), repository_paths.packages, max_body_size=None),
MockCall(pytest.helpers.anyvar(int), repository_paths.packages, max_body_size=None),
])
rename_mock.assert_has_calls([
MockCall(local / "filename"),
MockCall(local / "filename.sig"),
])
async def test_post_not_found(client: TestClient, mocker: MockerFixture) -> None:
@ -41,7 +133,7 @@ async def test_post_not_found(client: TestClient, mocker: MockerFixture) -> None
"""
mocker.patch("ahriman.core.configuration.Configuration.getboolean", return_value=False)
data = FormData()
data.add_field("archive", BytesIO(b"content"), filename="filename", content_type="application/octet-stream")
data.add_field("package", BytesIO(b"content"), filename="filename", content_type="application/octet-stream")
response_schema = pytest.helpers.schema_response(UploadView.post, code=404)
response = await client.post("/api/v1/service/upload", data=data)
@ -67,14 +159,14 @@ async def test_post_not_body_part(client: TestClient, mocker: MockerFixture) ->
response_schema = pytest.helpers.schema_response(UploadView.post, code=400)
mocker.patch("aiohttp.MultipartReader.next", return_value=42) # surprise, motherfucker
data = FormData()
data.add_field("archive", BytesIO(b"content"), filename="filename", content_type="application/octet-stream")
data.add_field("package", BytesIO(b"content"), filename="filename", content_type="application/octet-stream")
response = await client.post("/api/v1/service/upload", data=data)
assert response.status == 400
assert not response_schema.validate(await response.json())
async def test_post_not_archive(client: TestClient) -> None:
async def test_post_not_package(client: TestClient) -> None:
"""
must return 400 on invalid multipart key
"""
@ -85,31 +177,3 @@ async def test_post_not_archive(client: TestClient) -> None:
response = await client.post("/api/v1/service/upload", data=data)
assert response.status == 400
assert not response_schema.validate(await response.json())
async def test_post_filename_invalid(client: TestClient) -> None:
"""
must return 400 if filename is invalid
"""
response_schema = pytest.helpers.schema_response(UploadView.post, code=400)
data = FormData()
data.add_field("archive", BytesIO(b"content"), filename="..", content_type="application/octet-stream")
response = await client.post("/api/v1/service/upload", data=data)
assert response.status == 400
assert not response_schema.validate(await response.json())
async def test_post_file_too_big(client: TestClient, mocker: MockerFixture) -> None:
"""
must return 400 if file is too big
"""
mocker.patch("pathlib.Path.open")
mocker.patch("ahriman.core.configuration.Configuration.getint", return_value=0)
data = FormData()
data.add_field("archive", BytesIO(b"content"), filename="filename", content_type="application/octet-stream")
response_schema = pytest.helpers.schema_response(UploadView.post, code=400)
response = await client.post("/api/v1/service/upload", data=data)
assert response.status == 400
assert not response_schema.validate(await response.json())