Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ options = KsefClientOptions(
base_url="https://api-test.ksef.mf.gov.pl",
timeout_seconds=30.0,
verify_ssl=True,
require_export_part_hash=True,
proxy=None,
custom_headers={"X-Custom-Header": "value"},
follow_redirects=False,
Expand Down Expand Up @@ -65,6 +66,15 @@ Opcja zwykle nie jest potrzebna. Włączenie ma uzasadnienie wyłącznie w środ

Domyślnie `True`. Wyłączenie ma uzasadnienie wyłącznie w specyficznych środowiskach testowych (np. z własnym MITM/proxy).

### `require_export_part_hash`

Domyślnie `True`. Dotyczy pobierania partów eksportu (`ExportWorkflow`, `AsyncExportWorkflow`):

- dla każdego pobranego, zaszyfrowanego partu biblioteka liczy `SHA-256` i porównuje z nagłówkiem `x-ms-meta-hash` (base64), jeśli nagłówek jest obecny;
- jeśli `x-ms-meta-hash` nie ma i opcja jest `True`, biblioteka zgłasza `ValueError`;
- jeśli hash się nie zgadza, biblioteka zgłasza `ValueError`;
- ustawienie `False` pozwala przejść dalej, gdy nagłówek hash nie został zwrócony (nadal występuje walidacja, gdy hash jest obecny).

### `strict_presigned_url_validation`

Domyślnie `True`. Dla absolutnych URL używanych z `skip_auth=True` wymusza `https`. Przy wyłączeniu możliwe są URL `http`, ale nadal działa walidacja hosta/IP.
Expand Down
4 changes: 4 additions & 0 deletions docs/workflows/export.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,10 @@ print(len(result.metadata_summaries), len(result.invoice_xml_files))
## Uwagi

- Części paczki są dostępne pod `package.parts[].url` i są pobierane **bez Bearer tokena** (pre-signed URL).
- Dla każdego pobranego (zaszyfrowanego) partu workflow liczy hash `SHA-256` (base64) i porównuje z `x-ms-meta-hash`, jeśli nagłówek jest obecny.
- Domyślnie (`KsefClientOptions.require_export_part_hash=True`) brak `x-ms-meta-hash` powoduje `ValueError`.
- Niezgodność hash (`x-ms-meta-hash` vs. wyliczony hash) zawsze powoduje `ValueError`.
- Jeśli integracja wymaga tolerowania braku nagłówka, ustaw `require_export_part_hash=False` w `KsefClientOptions` lub podczas tworzenia workflow.
- Linki do partów wygasają; pobranie powinno nastąpić bez zbędnej zwłoki.
- Paczka eksportu zawiera `_metadata.json` (dla deduplikacji i synchronizacji przyrostowej).

Expand Down
1 change: 1 addition & 0 deletions src/ksef_client/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ class KsefClientOptions:
custom_headers: dict[str, str] | None = None
follow_redirects: bool = False
verify_ssl: bool = True
require_export_part_hash: bool = True
strict_presigned_url_validation: bool = True
allowed_presigned_hosts: list[str] | None = None
allow_private_network_presigned_urls: bool = False
Expand Down
68 changes: 65 additions & 3 deletions src/ksef_client/services/workflows.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from __future__ import annotations

import asyncio
import base64
import hashlib
import time
from collections.abc import Sequence
from concurrent.futures import ThreadPoolExecutor
Expand Down Expand Up @@ -777,17 +779,34 @@ class PackageProcessingResult:


class ExportWorkflow:
def __init__(self, invoices_client: InvoicesClient, http_client: _RequestHttpClient) -> None:
def __init__(
self,
invoices_client: InvoicesClient,
http_client: _RequestHttpClient,
require_export_part_hash: bool | None = None,
) -> None:
self._invoices = invoices_client
self._download_helper = ExportDownloadHelper(http_client)
self._require_export_part_hash = _resolve_export_part_hash_requirement(
http_client=http_client,
explicit_value=require_export_part_hash,
)

def download_and_process_package(
self,
package: dict[str, Any],
encryption_data: EncryptionData,
) -> PackageProcessingResult:
parts = package.get("parts") or []
encrypted_parts = self._download_helper.download_parts(parts)
encrypted_parts_with_hash = self._download_helper.download_parts_with_hash(parts)
for index, (part_bytes, part_hash) in enumerate(encrypted_parts_with_hash, start=1):
_validate_export_part_hash(
part_bytes,
part_hash,
require_export_part_hash=self._require_export_part_hash,
part_index=index,
)
encrypted_parts = [part_bytes for part_bytes, _ in encrypted_parts_with_hash]
decrypted_parts = [
decrypt_aes_cbc_pkcs7(part, encryption_data.key, encryption_data.iv)
for part in encrypted_parts
Expand Down Expand Up @@ -816,17 +835,30 @@ def __init__(
self,
invoices_client: AsyncInvoicesClient,
http_client: _AsyncRequestHttpClient,
require_export_part_hash: bool | None = None,
) -> None:
self._invoices = invoices_client
self._download_helper = AsyncExportDownloadHelper(http_client)
self._require_export_part_hash = _resolve_export_part_hash_requirement(
http_client=http_client,
explicit_value=require_export_part_hash,
)

async def download_and_process_package(
self,
package: dict[str, Any],
encryption_data: EncryptionData,
) -> PackageProcessingResult:
parts = package.get("parts") or []
encrypted_parts = await self._download_helper.download_parts(parts)
encrypted_parts_with_hash = await self._download_helper.download_parts_with_hash(parts)
for index, (part_bytes, part_hash) in enumerate(encrypted_parts_with_hash, start=1):
_validate_export_part_hash(
part_bytes,
part_hash,
require_export_part_hash=self._require_export_part_hash,
part_index=index,
)
encrypted_parts = [part_bytes for part_bytes, _ in encrypted_parts_with_hash]
decrypted_parts = [
decrypt_aes_cbc_pkcs7(part, encryption_data.key, encryption_data.iv)
for part in encrypted_parts
Expand All @@ -848,3 +880,33 @@ async def download_and_process_package(
invoice_xml_files[name] = content.decode("utf-8")

return PackageProcessingResult(metadata_summaries, invoice_xml_files)


def _resolve_export_part_hash_requirement(
*, http_client: _RequestHttpClient | _AsyncRequestHttpClient, explicit_value: bool | None
) -> bool:
if explicit_value is not None:
return explicit_value
options = getattr(http_client, "_options", None)
if options is None:
return True
return bool(getattr(options, "require_export_part_hash", True))


def _validate_export_part_hash(
part_bytes: bytes,
expected_hash: str | None,
*,
require_export_part_hash: bool,
part_index: int,
) -> None:
if expected_hash is None:
if require_export_part_hash:
raise ValueError(f"Missing export part hash for part #{part_index}.")
return
actual_hash = base64.b64encode(hashlib.sha256(part_bytes).digest()).decode("ascii")
if expected_hash != actual_hash:
raise ValueError(
f"Export part hash mismatch for part #{part_index}: "
f"expected '{expected_hash}', got '{actual_hash}'."
)
181 changes: 178 additions & 3 deletions tests/test_services_workflows.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import base64
import hashlib
import json
import unittest
from dataclasses import dataclass
Expand All @@ -7,6 +9,7 @@
import httpx

from ksef_client.clients.invoices import AsyncInvoicesClient, InvoicesClient
from ksef_client.config import KsefClientOptions
from ksef_client.http import HttpResponse
from ksef_client.services import workflows
from ksef_client.services.crypto import encrypt_aes_cbc_pkcs7, generate_iv, generate_symmetric_key
Expand All @@ -15,6 +18,10 @@
from tests.helpers import generate_rsa_cert


def _sha256_b64(payload: bytes) -> str:
return base64.b64encode(hashlib.sha256(payload).digest()).decode("ascii")


class RecordingHttp:
def __init__(self, content: bytes = b"ok", headers: dict | None = None) -> None:
self.calls: list[tuple[tuple[Any, ...], dict[str, Any]]] = []
Expand Down Expand Up @@ -368,11 +375,106 @@ class DummyInvoices:
pass

workflow = workflows.ExportWorkflow(cast(InvoicesClient, DummyInvoices()), RecordingHttp())
with patch.object(workflow._download_helper, "download_parts", return_value=[encrypted]):
with patch.object(
workflow._download_helper,
"download_parts_with_hash",
return_value=[(encrypted, _sha256_b64(encrypted))],
):
result = workflow.download_and_process_package({"parts": [{"url": "u"}]}, encryption)
self.assertEqual(result.metadata_summaries[0]["ksefNumber"], "1")
self.assertIn("inv.xml", result.invoice_xml_files)

def test_export_workflow_rejects_missing_hash_by_default(self):
key = generate_symmetric_key()
iv = generate_iv()
encrypted = encrypt_aes_cbc_pkcs7(build_zip({"inv.xml": b"<xml/>"}), key, iv)
encryption = workflows.EncryptionData(key=key, iv=iv, encryption_info=None) # type: ignore[arg-type]

class DummyInvoices:
pass

workflow = workflows.ExportWorkflow(cast(InvoicesClient, DummyInvoices()), RecordingHttp())
with (
patch.object(
workflow._download_helper,
"download_parts_with_hash",
return_value=[(encrypted, None)],
),
self.assertRaises(ValueError) as exc,
):
workflow.download_and_process_package({"parts": [{"url": "u"}]}, encryption)
self.assertIn("Missing export part hash", str(exc.exception))

def test_export_workflow_allows_missing_hash_when_disabled(self):
key = generate_symmetric_key()
iv = generate_iv()
encrypted = encrypt_aes_cbc_pkcs7(build_zip({"inv.xml": b"<xml/>"}), key, iv)
encryption = workflows.EncryptionData(key=key, iv=iv, encryption_info=None) # type: ignore[arg-type]

class DummyInvoices:
pass

workflow = workflows.ExportWorkflow(
cast(InvoicesClient, DummyInvoices()),
RecordingHttp(),
require_export_part_hash=False,
)
with patch.object(
workflow._download_helper,
"download_parts_with_hash",
return_value=[(encrypted, None)],
):
result = workflow.download_and_process_package({"parts": [{"url": "u"}]}, encryption)
self.assertIn("inv.xml", result.invoice_xml_files)

def test_export_workflow_rejects_hash_mismatch(self):
key = generate_symmetric_key()
iv = generate_iv()
encrypted = encrypt_aes_cbc_pkcs7(build_zip({"inv.xml": b"<xml/>"}), key, iv)
encryption = workflows.EncryptionData(key=key, iv=iv, encryption_info=None) # type: ignore[arg-type]

class DummyInvoices:
pass

workflow = workflows.ExportWorkflow(cast(InvoicesClient, DummyInvoices()), RecordingHttp())
with (
patch.object(
workflow._download_helper,
"download_parts_with_hash",
return_value=[(encrypted, "bad-hash")],
),
self.assertRaises(ValueError) as exc,
):
workflow.download_and_process_package({"parts": [{"url": "u"}]}, encryption)
self.assertIn("Export part hash mismatch", str(exc.exception))

def test_export_workflow_reads_hash_requirement_from_client_options(self):
key = generate_symmetric_key()
iv = generate_iv()
encrypted = encrypt_aes_cbc_pkcs7(build_zip({"inv.xml": b"<xml/>"}), key, iv)
encryption = workflows.EncryptionData(key=key, iv=iv, encryption_info=None) # type: ignore[arg-type]

class DummyInvoices:
pass

class RecordingHttpWithOptions(RecordingHttp):
def __init__(self) -> None:
super().__init__()
self._options = KsefClientOptions(
base_url="https://api-test.ksef.mf.gov.pl",
require_export_part_hash=False,
)

http = RecordingHttpWithOptions()
workflow = workflows.ExportWorkflow(cast(InvoicesClient, DummyInvoices()), http)
with patch.object(
workflow._download_helper,
"download_parts_with_hash",
return_value=[(encrypted, None)],
):
result = workflow.download_and_process_package({"parts": [{"url": "u"}]}, encryption)
self.assertIn("inv.xml", result.invoice_xml_files)


class AsyncWorkflowsTests(unittest.IsolatedAsyncioTestCase):
async def test_async_batch_upload_helper(self):
Expand Down Expand Up @@ -565,14 +667,87 @@ class DummyInvoices:
)
with patch.object(
workflow._download_helper,
"download_parts",
AsyncMock(return_value=[encrypted]),
"download_parts_with_hash",
AsyncMock(return_value=[(encrypted, _sha256_b64(encrypted))]),
):
result = await workflow.download_and_process_package(
{"parts": [{"url": "u"}]}, encryption
)
self.assertEqual(result.metadata_summaries[0]["ksefNumber"], "1")

async def test_async_export_workflow_rejects_missing_hash_by_default(self):
key = generate_symmetric_key()
iv = generate_iv()
encrypted = encrypt_aes_cbc_pkcs7(build_zip({"inv.xml": b"<xml/>"}), key, iv)
encryption = workflows.EncryptionData(key=key, iv=iv, encryption_info=None) # type: ignore[arg-type]

class DummyInvoices:
pass

workflow = workflows.AsyncExportWorkflow(
cast(AsyncInvoicesClient, DummyInvoices()),
RecordingAsyncHttp(),
)
with (
patch.object(
workflow._download_helper,
"download_parts_with_hash",
AsyncMock(return_value=[(encrypted, None)]),
),
self.assertRaises(ValueError) as exc,
):
await workflow.download_and_process_package({"parts": [{"url": "u"}]}, encryption)
self.assertIn("Missing export part hash", str(exc.exception))

async def test_async_export_workflow_allows_missing_hash_when_disabled(self):
key = generate_symmetric_key()
iv = generate_iv()
encrypted = encrypt_aes_cbc_pkcs7(build_zip({"inv.xml": b"<xml/>"}), key, iv)
encryption = workflows.EncryptionData(key=key, iv=iv, encryption_info=None) # type: ignore[arg-type]

class DummyInvoices:
pass

workflow = workflows.AsyncExportWorkflow(
cast(AsyncInvoicesClient, DummyInvoices()),
RecordingAsyncHttp(),
require_export_part_hash=False,
)
with patch.object(
workflow._download_helper,
"download_parts_with_hash",
AsyncMock(return_value=[(encrypted, None)]),
):
result = await workflow.download_and_process_package(
{"parts": [{"url": "u"}]},
encryption,
)
self.assertIn("inv.xml", result.invoice_xml_files)

async def test_async_export_workflow_rejects_hash_mismatch(self):
key = generate_symmetric_key()
iv = generate_iv()
encrypted = encrypt_aes_cbc_pkcs7(build_zip({"inv.xml": b"<xml/>"}), key, iv)
encryption = workflows.EncryptionData(key=key, iv=iv, encryption_info=None) # type: ignore[arg-type]

class DummyInvoices:
pass

workflow = workflows.AsyncExportWorkflow(
cast(AsyncInvoicesClient, DummyInvoices()),
RecordingAsyncHttp(),
)
with (
patch.object(
workflow._download_helper,
"download_parts_with_hash",
AsyncMock(return_value=[(encrypted, "bad-hash")]),
),
self.assertRaises(ValueError) as exc,
):
await workflow.download_and_process_package({"parts": [{"url": "u"}]}, encryption)
self.assertIn("Export part hash mismatch", str(exc.exception))


if __name__ == "__main__":
unittest.main()
Loading