From b25cd217a5b3ff0745a55a3430d26bb866e763cf Mon Sep 17 00:00:00 2001 From: Piotr Figwer Date: Thu, 14 Aug 2025 09:22:19 +0200 Subject: [PATCH 01/13] Receipts module --- .../validator/organic_jobs/miner_driver.py | 14 +- .../validator/receipts/README.md | 147 +++--- .../validator/receipts/__init__.py | 9 +- .../validator/receipts/base.py | 115 ++++- .../validator/receipts/default.py | 352 +++++++++++--- .../validator/receipts/tasks.py | 81 ++++ .../validator/receipts/tests/test_receipts.py | 447 ++++++++++++++++++ .../validator/receipts/types.py | 18 - .../validator/routing/default.py | 17 +- .../validator/synthetic_jobs/batch_run.py | 7 +- 10 files changed, 985 insertions(+), 222 deletions(-) create mode 100644 validator/app/src/compute_horde_validator/validator/receipts/tasks.py create mode 100644 validator/app/src/compute_horde_validator/validator/receipts/tests/test_receipts.py diff --git a/validator/app/src/compute_horde_validator/validator/organic_jobs/miner_driver.py b/validator/app/src/compute_horde_validator/validator/organic_jobs/miner_driver.py index 4a365db7e..923d0348c 100644 --- a/validator/app/src/compute_horde_validator/validator/organic_jobs/miner_driver.py +++ b/validator/app/src/compute_horde_validator/validator/organic_jobs/miner_driver.py @@ -34,7 +34,6 @@ MinerToValidatorMessage, V0StreamingJobReadyRequest, ) -from compute_horde.receipts.models import JobStartedReceipt from compute_horde_core.executor_class import ExecutorClass from django.conf import settings from django.db.models import F @@ -51,6 +50,7 @@ SystemEvent, ) from compute_horde_validator.validator.organic_jobs.miner_client import MinerClient +from compute_horde_validator.validator.receipts.default import Receipts from compute_horde_validator.validator.routing.types import JobRoute from compute_horde_validator.validator.utils import TRUSTED_MINER_FAKE_KEY @@ -374,9 +374,15 @@ async def streaming_ready_callback(msg: V0StreamingJobReadyRequest) -> None: ) # As far as the validator is concerned, the job is as good as failed system_event_subtype = SystemEvent.EventSubType.JOB_REJECTED else: # rejection.msg.reason == JobRejectionReason.BUSY - job_request_time = ( - await JobStartedReceipt.objects.aget(job_uuid=job.job_uuid) - ).timestamp + job_started_receipt = await Receipts().get_job_started_receipt_by_uuid( + str(job.job_uuid) + ) + if job_started_receipt is None: + logger.error(f"No job started receipt found for job {job.job_uuid}") + comment = "Miner failed to excuse job" + status = OrganicJob.Status.FAILED + system_event_subtype = SystemEvent.EventSubType.JOB_REJECTED + job_request_time = job_started_receipt.timestamp valid_excuses = await job_excuses.filter_valid_excuse_receipts( receipts_to_check=rejection.msg.receipts or [], check_time=job_request_time, diff --git a/validator/app/src/compute_horde_validator/validator/receipts/README.md b/validator/app/src/compute_horde_validator/validator/receipts/README.md index 1ac15a818..459e42189 100644 --- a/validator/app/src/compute_horde_validator/validator/receipts/README.md +++ b/validator/app/src/compute_horde_validator/validator/receipts/README.md @@ -2,122 +2,97 @@ This module provides an interface for managing receipts in the validator. -## Basic Usage +## Public Interface + +### Receipts + +The main service class `Receipts` implements the `ReceiptsBase` interface and provides these key methods: + +#### Core Methods + +- **`scrape_receipts_from_miners(miner_hotkeys, start_block, end_block)`** - Fetch receipts from miners for a block range +- **`create_job_finished_receipt(job_uuid, miner_hotkey, validator_hotkey, time_started, time_took_us, score_str)`** - Create a new job finished receipt +- **`create_job_started_receipt(job_uuid, miner_hotkey, validator_hotkey, executor_class, is_organic, ttl)`** - Create a new job started receipt +- **`get_job_started_receipt_by_uuid(job_uuid)`** - Retrieve a specific job started receipt +- **`get_valid_job_started_receipts_for_miner(miner_hotkey, at_time)`** - Get valid receipts for a miner at a specific time +- **`get_job_finished_receipts_for_miner(miner_hotkey, job_uuids)`** - Get finished receipts for specific jobs from a miner +- **`get_completed_job_receipts_for_block_range(start_block, end_block)`** - Get all completed job receipts within a block range + +## Usage Examples + +### Basic Receipt Creation ```python from compute_horde_validator.validator.receipts import Receipts -# Create receipts manager receipts = Receipts() -# Get completed job receipts for scoring -completed_receipts = receipts.get_completed_job_receipts_for_block_range( - start_block=1000, - end_block=2000 +# Create a job started receipt +payload, signature = await receipts.create_job_started_receipt( + job_uuid="job-123", + miner_hotkey="miner-key", + validator_hotkey="validator-key", + executor_class="spin_up-4min.gpu-24gb", + is_organic=True, + ttl=300 ) # Create a job finished receipt -receipt = receipts.create_job_finished_receipt( +finished_receipt = receipts.create_job_finished_receipt( job_uuid="job-123", - miner_hotkey="miner_hotkey", - validator_hotkey="validator_hotkey", - time_started=1640995200, + miner_hotkey="miner-key", + validator_hotkey="validator-key", + time_started=datetime.now(), time_took_us=5000000, score_str="0.85" ) - -# Save the receipt -receipts.save_receipt(receipt.to_receipt()) - -# Scrape receipts from miners -scraped_receipts = await receipts.scrape_receipts_from_miners(["miner1", "miner2"]) ``` -## Core Functionality - -### Receipts Retrieval - -The primary method for retrieve methods in given block range: +### Receipt Retrieval ```python -# Get completed job receipts for scoring -completed_receipts = manager.get_completed_job_receipts_for_block_range( - start_block=1000, - end_block=2000 +# Get a specific job started receipt +receipt = await receipts.get_job_started_receipt_by_uuid("job-123") +if receipt: + print(f"Job started at: {receipt.timestamp}") + print(f"Miner: {receipt.miner_hotkey}") + +# Get valid receipts for a miner +valid_receipts = await receipts.get_valid_job_started_receipts_for_miner( + miner_hotkey="miner-key", + at_time=datetime.now() ) -``` - -### Receipt Creation -The module can create receipts for completed jobs: - -```python -# Create job finished receipt -receipt = receipts.create_job_finished_receipt( - job_uuid="job-123", - miner_hotkey="miner_hotkey", - validator_hotkey="validator_hotkey", - time_started=1640995200, # Unix timestamp - time_took_us=5000000, # 5 seconds in microseconds - score_str="0.85" +# Get finished receipts for specific jobs +finished_receipts = await receipts.get_job_finished_receipts_for_miner( + miner_hotkey="miner-key", + job_uuids=["job-123", "job-456"] ) ``` ### Receipt Scraping -The module can scrape receipts from miners: - -```python -# Scrape receipts from specific miners -scraped_receipts = await receipts.scrape_receipts_from_miners([ - "miner_hotkey_1", - "miner_hotkey_2" -], start_block=1000, end_block=2000) -``` - -### Receipt Persistence - -The module provides methods to save and retrieve receipts: - ```python -# Save a receipt to the database -receipts.save_receipt(receipt) - -# Get a receipt by job UUID -receipt = receipts.get_receipt_by_job_uuid("job-123") +# Scrape receipts from miners for a block range +scraped_receipts = await receipts.scrape_receipts_from_miners( + miner_hotkeys=["miner1", "miner2"], + start_block=1000, + end_block=2000 +) ``` -## Integration with compute_horde - -The receipts module uses the `compute_horde.receipts` module internally for: -- Receipt models and schemas -- Receipt validation and serialization -- Receipt transfer functionality -- Database models and migrations +## Background Tasks -## Integration with Scoring +### Receipt Scraping Task -The receipts module is designed to work seamlessly with the scoring system: +The module includes a Celery task for periodic receipt scraping: -1. **Block-based filtering**: Provides method to get receipts for specific block ranges -2. **Completed job receipts**: Specialized method for getting receipts of completed jobs -3. **Scoring data extraction**: Receipts contain all necessary data for scoring calculations -4. **Performance metrics**: Job finished receipts include timing and score information +**Task Name**: `scrape_receipts_from_miners` -## Error Handling - -The module provides specific exceptions for different error scenarios: - -```python -from compute_horde_validator.validator.receipts.exceptions import ( - ReceiptsConfigurationError, - ReceiptsScrapingError, - ReceiptsGenerationError, -) +**Purpose**: Automatically fetch and process receipts from miners across the network -try: - receipt = receipts.create_job_finished_receipt(...) -except ReceiptsGenerationError as e: - # Handle generation error - pass +**Manual Execution**: +```bash +# Run the task manually (if needed) +celery -A compute_horde_validator call compute_horde_validator.validator.receipts.tasks.scrape_receipts_from_miners ``` diff --git a/validator/app/src/compute_horde_validator/validator/receipts/__init__.py b/validator/app/src/compute_horde_validator/validator/receipts/__init__.py index 3b8c52002..bfca5b69c 100644 --- a/validator/app/src/compute_horde_validator/validator/receipts/__init__.py +++ b/validator/app/src/compute_horde_validator/validator/receipts/__init__.py @@ -4,14 +4,13 @@ This module provides receipts management functionality for the validator. """ -from .exceptions import ReceiptsConfigurationError, ReceiptsGenerationError, ReceiptsScrapingError -from .interface import ReceiptsBase -from .manager import Receipts +from . import tasks as _tasks # noqa: F401 +from .base import ReceiptsBase +from .default import Receipts +from .types import ReceiptsGenerationError __all__ = [ "ReceiptsBase", "Receipts", - "ReceiptsConfigurationError", "ReceiptsGenerationError", - "ReceiptsScrapingError", ] diff --git a/validator/app/src/compute_horde_validator/validator/receipts/base.py b/validator/app/src/compute_horde_validator/validator/receipts/base.py index 793e318f8..31962dc22 100644 --- a/validator/app/src/compute_horde_validator/validator/receipts/base.py +++ b/validator/app/src/compute_horde_validator/validator/receipts/base.py @@ -1,7 +1,9 @@ +import datetime from abc import ABC, abstractmethod from compute_horde.receipts import Receipt -from compute_horde.receipts.models import JobFinishedReceipt +from compute_horde.receipts.models import JobFinishedReceipt, JobStartedReceipt +from compute_horde.receipts.schemas import JobStartedReceiptPayload class ReceiptsBase(ABC): @@ -11,28 +13,12 @@ class ReceiptsBase(ABC): This class defines the interface for managing receipts in the validator. """ - @abstractmethod - def get_completed_job_receipts_for_block_range( - self, start_block: int, end_block: int - ) -> list[Receipt]: - """ - Get all receipts for jobs that were completed between the specified blocks. - - Args: - start_block: Start block (inclusive) - end_block: End block (exclusive) - - Returns: - List of receipts for completed jobs in the block range - """ - pass - @abstractmethod async def scrape_receipts_from_miners( self, miner_hotkeys: list[str], start_block: int, end_block: int ) -> list[Receipt]: """ - Scrape receipts from specified miners within a block range. + Scrape receipts from miners for a block range. Args: miner_hotkeys: List of miner hotkeys to scrape receipts from @@ -50,7 +36,7 @@ def create_job_finished_receipt( job_uuid: str, miner_hotkey: str, validator_hotkey: str, - time_started: int, + time_started: datetime.datetime, time_took_us: int, score_str: str, ) -> JobFinishedReceipt: @@ -61,21 +47,98 @@ def create_job_finished_receipt( job_uuid: UUID of the job miner_hotkey: Hotkey of the miner validator_hotkey: Hotkey of the validator - time_started: Timestamp when job started - time_took_us: Time taken in microseconds - score_str: Score as string + time_started: Time the job started + time_took_us: Time the job took in microseconds + score_str: Score of the job + + Returns: + JobFinishedReceipt + """ + pass + + @abstractmethod + def create_job_started_receipt( + self, + job_uuid: str, + miner_hotkey: str, + validator_hotkey: str, + executor_class: str, + is_organic: bool, + ttl: int, + ) -> tuple[JobStartedReceiptPayload, str]: + """ + Create a job started receipt payload. + + Args: + job_uuid: UUID of the job + miner_hotkey: Hotkey of the miner + validator_hotkey: Hotkey of the validator + executor_class: Executor class for the job + is_organic: Whether this is an organic job + ttl: Time to live in seconds Returns: - Created job finished receipt + Tuple of (payload, signature_hex) """ pass @abstractmethod - def save_receipt(self, receipt: Receipt) -> None: + async def get_valid_job_started_receipts_for_miner( + self, miner_hotkey: str, at_time: datetime.datetime + ) -> list[JobStartedReceipt]: """ - Save a receipt to the database. + Get valid job started receipts for a miner at a specific time. Args: - receipt: Receipt to save + miner_hotkey: Miner's hotkey + at_time: Time to check validity against + + Returns: + List of valid receipts + """ + pass + + @abstractmethod + async def get_job_finished_receipts_for_miner( + self, miner_hotkey: str, job_uuids: list[str] + ) -> list[JobFinishedReceipt]: + """ + Get job finished receipts for specific jobs from a miner. + + Args: + miner_hotkey: Miner's hotkey + job_uuids: List of job UUIDs to get receipts for + + Returns: + List of receipts + """ + pass + + @abstractmethod + async def get_job_started_receipt_by_uuid(self, job_uuid: str) -> JobStartedReceipt | None: + """ + Get a job started receipt by UUID. + + Args: + job_uuid: UUID of the job + + Returns: + Receipt if found, None otherwise + """ + pass + + @abstractmethod + async def get_completed_job_receipts_for_block_range( + self, start_block: int, end_block: int + ) -> list[Receipt]: + """ + Get all receipts for jobs that were completed between the specified blocks. + + Args: + start_block: Start block (inclusive) + end_block: End block (exclusive) + + Returns: + List of receipts for completed jobs in the block range """ pass diff --git a/validator/app/src/compute_horde_validator/validator/receipts/default.py b/validator/app/src/compute_horde_validator/validator/receipts/default.py index 48003a62e..5ddcf9977 100644 --- a/validator/app/src/compute_horde_validator/validator/receipts/default.py +++ b/validator/app/src/compute_horde_validator/validator/receipts/default.py @@ -1,14 +1,27 @@ +import asyncio import datetime import logging +import aiohttp +from asgiref.sync import sync_to_async from compute_horde.receipts import Receipt -from compute_horde.receipts.models import JobFinishedReceipt, receipt_to_django_model -from compute_horde.receipts.schemas import JobFinishedReceiptPayload +from compute_horde.receipts.models import ( + JobFinishedReceipt, + JobStartedReceipt, +) +from compute_horde.receipts.schemas import JobFinishedReceiptPayload, JobStartedReceiptPayload +from compute_horde.receipts.store.local import LocalFilesystemPagedReceiptStore +from compute_horde.receipts.transfer import ReceiptsTransfer +from compute_horde.utils import sign_blob +from django.conf import settings -from compute_horde_validator.validator.receipts.exceptions import ( +from compute_horde_validator.validator.allowance.utils.supertensor import supertensor +from compute_horde_validator.validator.models import Miner +from compute_horde_validator.validator.models.allowance.internal import Block +from compute_horde_validator.validator.receipts.base import ReceiptsBase +from compute_horde_validator.validator.receipts.types import ( ReceiptsGenerationError, ) -from compute_horde_validator.validator.receipts.interface import ReceiptsBase logger = logging.getLogger(__name__) @@ -18,99 +31,294 @@ class Receipts(ReceiptsBase): Default implementation of receipts manager. """ - def get_completed_job_receipts_for_block_range( - self, start_block: int, end_block: int + async def scrape_receipts_from_miners( + self, miner_hotkeys: list[str], start_block: int, end_block: int ) -> list[Receipt]: - """ - Get all receipts for jobs that were completed between the specified blocks. - - Args: - start_block: Start block (inclusive) - end_block: End block (exclusive) - - Returns: - List of receipts for completed jobs in the block range - """ - # TODO: Implement block-based filtering - # For now, return all job finished receipts - finished_receipts = list(JobFinishedReceipt.objects.all()) + if not miner_hotkeys: + logger.info("No miner hotkeys provided for scraping") + return [] + if start_block >= end_block: + logger.warning( + "Invalid block range provided: start_block (%s) >= end_block (%s)", + start_block, + end_block, + ) + return [] - # Convert to Receipt objects - receipts = [] - for receipt in finished_receipts: - receipts.append(receipt.to_receipt()) + try: + start_ts = await sync_to_async(self._get_block_timestamp, thread_sensitive=True)( + start_block + ) + end_ts = await sync_to_async(self._get_block_timestamp, thread_sensitive=True)( + end_block + ) - return receipts + start_page = LocalFilesystemPagedReceiptStore.current_page_at(start_ts) + end_page = LocalFilesystemPagedReceiptStore.current_page_at(end_ts) + if end_page < start_page: + logger.warning( + "Computed page range is empty: start_page=%s end_page=%s", + start_page, + end_page, + ) + return [] + pages = list(range(start_page, end_page + 1)) - async def scrape_receipts_from_miners( - self, miner_hotkeys: list[str], start_block: int, end_block: int - ) -> list[Receipt]: - """ - Scrape receipts from specified miners. + miners = await self._fetch_miners(miner_hotkeys) + miner_infos: list[tuple[str, str, int]] = [ + (m[0], m[1], m[2]) for m in miners if m[1] and m[2] and m[1] != "0.0.0.0" + ] + if not miner_infos: + logger.info("No valid miner endpoints resolved for scraping") + return [] - Args: - miner_hotkeys: List of miner hotkeys to scrape receipts from + semaphore = asyncio.Semaphore(25) + async with aiohttp.ClientSession() as session: + result = await ReceiptsTransfer.transfer( + miners=miner_infos, + pages=pages, + session=session, + semaphore=semaphore, + request_timeout=3.0, + ) + logger.info( + "Scrape finished: receipts=%s successful_transfers=%s transfer_errors=%s line_errors=%s", + result.n_receipts, + result.n_successful_transfers, + len(result.transfer_errors), + len(result.line_errors), + ) - Returns: - List of scraped receipts - """ - # TODO: Implement actual scraping logic - logger.info(f"Scraping receipts from {len(miner_hotkeys)} miners") - return [] + receipts = await self._fetch_receipts_for_range(start_ts, end_ts, miner_hotkeys) + return receipts + except Exception as ex: + logger.error( + "Failed to scrape receipts for block range %s-%s: %s", + start_block, + end_block, + ex, + ) + return [] def create_job_finished_receipt( self, job_uuid: str, miner_hotkey: str, validator_hotkey: str, - time_started: int, + time_started: datetime.datetime, time_took_us: int, score_str: str, ) -> JobFinishedReceipt: - """ - Create a job finished receipt for a completed job. - - Args: - job_uuid: UUID of the job - miner_hotkey: Hotkey of the miner - validator_hotkey: Hotkey of the validator - time_started: Timestamp when job started - time_took_us: Time taken in microseconds - score_str: Score as string - - Returns: - Created job finished receipt - """ + payload = JobFinishedReceiptPayload( + job_uuid=job_uuid, + miner_hotkey=miner_hotkey, + validator_hotkey=validator_hotkey, + timestamp=datetime.datetime.now(datetime.UTC), + time_started=time_started, + time_took_us=time_took_us, + score_str=score_str, + ) + + validator_kp = settings.BITTENSOR_WALLET().get_hotkey() + validator_signature = sign_blob(validator_kp, payload.blob_for_signing()) + + return JobFinishedReceipt( + job_uuid=job_uuid, + miner_hotkey=miner_hotkey, + validator_hotkey=validator_hotkey, + validator_signature=validator_signature, + timestamp=payload.timestamp, + time_started=time_started, + time_took_us=time_took_us, + score_str=score_str, + ) + + def create_job_started_receipt( + self, + job_uuid: str, + miner_hotkey: str, + validator_hotkey: str, + executor_class: str, + is_organic: bool, + ttl: int, + ) -> tuple[JobStartedReceiptPayload, str]: try: - payload = JobFinishedReceiptPayload( + payload = JobStartedReceiptPayload( job_uuid=job_uuid, miner_hotkey=miner_hotkey, validator_hotkey=validator_hotkey, - timestamp=datetime.datetime.now(), - time_started=datetime.datetime.fromtimestamp(time_started), - time_took_us=time_took_us, - score_str=score_str, + timestamp=datetime.datetime.now(datetime.UTC), + executor_class=executor_class, + is_organic=is_organic, + ttl=ttl, ) - # TODO: Add proper signature generation - validator_signature = "placeholder_signature" + validator_kp = settings.BITTENSOR_WALLET().get_hotkey() + validator_signature = sign_blob(validator_kp, payload.blob_for_signing()) + + logger.debug( + "Created JobStartedReceipt payload for job %s (miner: %s, validator: %s, organic: %s)", + job_uuid, + miner_hotkey, + validator_hotkey, + is_organic, + ) - receipt = JobFinishedReceipt.from_payload(payload, validator_signature) + return payload, validator_signature - return receipt except Exception as e: - raise ReceiptsGenerationError(f"Failed to create job finished receipt: {e}") from e + raise ReceiptsGenerationError(f"Failed to create job started receipt: {e}") from e + + async def get_valid_job_started_receipts_for_miner( + self, miner_hotkey: str, at_time: datetime.datetime + ) -> list[JobStartedReceipt]: + try: + qs = JobStartedReceipt.objects.valid_at(at_time).filter(miner_hotkey=miner_hotkey) + receipts: list[JobStartedReceipt] = [r async for r in qs.all()] + + logger.debug( + "Retrieved %s valid job started receipts for miner %s at %s", + len(receipts), + miner_hotkey, + at_time, + ) + + return receipts + + except Exception as e: + logger.error("Failed to get valid job started receipts for miner: %s", e) + return [] + + async def get_job_finished_receipts_for_miner( + self, miner_hotkey: str, job_uuids: list[str] + ) -> list[JobFinishedReceipt]: + try: + if not job_uuids: + return [] + qs = JobFinishedReceipt.objects.filter( + miner_hotkey=miner_hotkey, job_uuid__in=job_uuids + ) + receipts: list[JobFinishedReceipt] = [r async for r in qs.all()] + + logger.debug( + "Retrieved %s job finished receipts for miner %s (jobs: %s)", + len(receipts), + miner_hotkey, + len(job_uuids), + ) - def save_receipt(self, receipt: Receipt) -> None: - """ - Save a receipt to the database. + return receipts + + except Exception as e: + logger.error("Failed to get job finished receipts for miner: %s", e) + return [] - Args: - receipt: Receipt to save - """ + async def get_job_started_receipt_by_uuid(self, job_uuid: str) -> JobStartedReceipt | None: try: - django_model = receipt_to_django_model(receipt) - django_model.save() - logger.info(f"Saved receipt for job {receipt.payload.job_uuid}") + django_receipt = await JobStartedReceipt.objects.aget(job_uuid=job_uuid) + logger.debug( + "Retrieved JobStartedReceipt for job %s (miner: %s, validator: %s)", + job_uuid, + django_receipt.miner_hotkey, + django_receipt.validator_hotkey, + ) + return django_receipt + except JobStartedReceipt.DoesNotExist: + logger.debug("No JobStartedReceipt found for job %s", job_uuid) + return None except Exception as e: - raise ReceiptsGenerationError(f"Failed to save receipt: {e}") from e + logger.error("Failed to get JobStartedReceipt for job %s: %s", job_uuid, e) + return None + + async def get_completed_job_receipts_for_block_range( + self, start_block: int, end_block: int + ) -> list[Receipt]: + if start_block >= end_block: + logger.warning( + "Invalid block range provided: start_block (%s) >= end_block (%s)", + start_block, + end_block, + ) + return [] + + try: + start_timestamp = await sync_to_async(self._get_block_timestamp, thread_sensitive=True)( + start_block + ) + end_timestamp = await sync_to_async(self._get_block_timestamp, thread_sensitive=True)( + end_block + ) + + finished_receipts_qs = JobFinishedReceipt.objects.filter( + timestamp__gte=start_timestamp, + timestamp__lt=end_timestamp, + ) + receipts: list[Receipt] = [] + async for django_receipt in finished_receipts_qs: + receipts.append(django_receipt.to_receipt()) + + logger.info( + "Found %s completed job receipts for blocks %s-%s", + len(receipts), + start_block, + end_block, + ) + return receipts + except Exception as ex: + logger.error( + "Failed to list receipts for block range %s-%s: %s", + start_block, + end_block, + ex, + ) + return [] + + async def _fetch_miners(self, hotkeys: list[str]) -> list[tuple[str, str, int]]: + """Fetch miner endpoints (hotkey, address, port) for given hotkeys.""" + + def _query() -> list[tuple[str, str, int]]: + return list( + Miner.objects.filter(hotkey__in=hotkeys).values_list("hotkey", "address", "port") + ) + + return await sync_to_async(_query, thread_sensitive=True)() + + async def _fetch_receipts_for_range( + self, start_ts: datetime.datetime, end_ts: datetime.datetime, hotkeys: list[str] + ) -> list[Receipt]: + """Fetch JobFinished receipts in [start_ts, end_ts) for given miner hotkeys and convert to Receipt objects.""" + + receipts_qs = JobFinishedReceipt.objects.filter( + timestamp__gte=start_ts, + timestamp__lt=end_ts, + miner_hotkey__in=hotkeys, + ) + receipts = [] + async for receipt_data in receipts_qs: + receipts.append(receipt_data.to_receipt()) + return receipts + + def _get_block_timestamp(self, block_number: int) -> datetime.datetime: + try: + block = Block.objects.get(block_number=block_number) + return block.creation_timestamp + except Exception as db_ex: + logger.debug( + "Block %s not found in DB or DB error occurred: %s", + block_number, + db_ex, + ) + + try: + ts = supertensor().get_block_timestamp(block_number) + if isinstance(ts, datetime.datetime): + return ts + else: + raise ValueError(f"Expected datetime, got {type(ts)}") + except Exception as chain_ex: # noqa: BLE001 - broad to surface upstream + logger.warning( + "Failed to resolve timestamp for block %s via chain: %s", + block_number, + chain_ex, + ) + raise diff --git a/validator/app/src/compute_horde_validator/validator/receipts/tasks.py b/validator/app/src/compute_horde_validator/validator/receipts/tasks.py new file mode 100644 index 000000000..c14f7e3d2 --- /dev/null +++ b/validator/app/src/compute_horde_validator/validator/receipts/tasks.py @@ -0,0 +1,81 @@ +import logging + +from asgiref.sync import async_to_sync +from compute_horde.receipts.models import JobFinishedReceipt +from compute_horde.subtensor import get_cycle_containing_block +from django.conf import settings + +from compute_horde_validator.celery import app +from compute_horde_validator.validator.models import MetagraphSnapshot +from compute_horde_validator.validator.models.allowance.internal import Block +from compute_horde_validator.validator.receipts.default import Receipts + +logger = logging.getLogger(__name__) + + +@app.task(name="compute_horde_validator.validator.receipts.scrape_receipts_from_miners") +def scrape_receipts_from_miners() -> None: + """ + Periodic receipts scraping task. + + - Determines serving miners from the latest metagraph snapshot + - Scrapes receipts for the current cycle up to the latest snapshot block + + Returns the number of receipts retrieved in the call. + """ + try: + metagraph = MetagraphSnapshot.get_latest() + except Exception: + logger.warning("No metagraph snapshot available for receipts scraping") + return + + miner_hotkeys = metagraph.get_serving_hotkeys() + if not miner_hotkeys: + logger.info("No serving miners found for receipts scraping") + return + + current_block = metagraph.block + current_cycle = get_cycle_containing_block( + block=current_block, netuid=settings.BITTENSOR_NETUID + ) + + latest_receipt = JobFinishedReceipt.objects.order_by("-timestamp").first() + + if latest_receipt: + try: + cycle_start_block = Block.objects.get(block_number=current_cycle.start) + current_cycle_start_timestamp = cycle_start_block.creation_timestamp + + # If the latest receipt is newer than the current cycle start, we've already scraped this cycle + if latest_receipt.timestamp >= current_cycle_start_timestamp: + logger.debug("Already scraped receipts for cycle %s, skipping", current_cycle.start) + return + except Block.DoesNotExist: + # If the cycle start block doesn't exist in our database, proceed with scraping + logger.debug( + "Cycle start block %s not found in database, proceeding with scraping", + current_cycle.start, + ) + + logger.info( + "New cycle detected or first run, scraping receipts for cycle %s-%s", + current_cycle.start, + current_cycle.stop, + ) + + try: + scraped = async_to_sync(Receipts().scrape_receipts_from_miners)( + miner_hotkeys=miner_hotkeys, + start_block=current_cycle.start, + end_block=current_block, + ) + logger.info( + "Scraped %d receipts for cycle %s-%s", + len(scraped), + current_cycle.start, + current_cycle.stop, + ) + + except Exception as e: + logger.error("Failed to scrape receipts: %s", e, exc_info=True) + return diff --git a/validator/app/src/compute_horde_validator/validator/receipts/tests/test_receipts.py b/validator/app/src/compute_horde_validator/validator/receipts/tests/test_receipts.py new file mode 100644 index 000000000..f873e0911 --- /dev/null +++ b/validator/app/src/compute_horde_validator/validator/receipts/tests/test_receipts.py @@ -0,0 +1,447 @@ +import datetime as dt +import uuid +from unittest.mock import AsyncMock, Mock, patch + +import bittensor_wallet +import pytest +from aiohttp import web +from asgiref.sync import sync_to_async +from compute_horde.receipts import Receipt +from compute_horde.receipts.models import JobFinishedReceipt, JobStartedReceipt +from compute_horde.receipts.schemas import JobFinishedReceiptPayload +from django.utils.timezone import make_aware + +from compute_horde_validator.validator.models import Miner +from compute_horde_validator.validator.models.allowance.internal import Block +from compute_horde_validator.validator.receipts import Receipts + + +@pytest.mark.django_db(transaction=True) +@pytest.mark.asyncio +async def test_scrape_receipts_from_miners_integration(): + with patch("compute_horde.receipts.transfer.checkpoint_backend") as mock_checkpoint: + mock_backend = Mock() + mock_backend.get = AsyncMock(return_value=0) + mock_backend.set = AsyncMock() + mock_checkpoint.return_value = mock_backend + + miner_keypair1 = bittensor_wallet.Keypair.create_from_mnemonic( + "almost fatigue race slim picnic mass better clog deal solve already champion" + ) + miner_keypair2 = bittensor_wallet.Keypair.create_from_mnemonic( + "edit evoke caught tunnel harsh plug august group enact cable govern immense" + ) + validator_keypair = bittensor_wallet.Keypair.create_from_mnemonic( + "slot excuse valid grief praise rifle spoil auction weasel glove pen share" + ) + + await sync_to_async(Miner.objects.create)( + hotkey=miner_keypair1.ss58_address, address="127.0.0.1", port=7001 + ) + await sync_to_async(Miner.objects.create)( + hotkey=miner_keypair2.ss58_address, address="127.0.0.1", port=7002 + ) + + # Use timestamps that will result in page numbers + # The page calculation is: int(timestamp // (60 * 5)) where 60*5 = 300 seconds = 5 minutes + # So we'll use timestamps that result in page numbers like 1, 2, 3 + t0 = make_aware(dt.datetime(2025, 1, 1, 0, 0, 0)) # timestamp 1735689600, page 5785632 + t1 = make_aware(dt.datetime(2025, 1, 1, 0, 5, 0)) # timestamp 1735689900, page 5785633 + + # Let's use much smaller timestamps to get reasonable page numbers + # Use a base timestamp that gives us small page numbers + base_timestamp = 1000 # This will give us page 3 + t0 = make_aware(dt.datetime.fromtimestamp(base_timestamp)) + t1 = make_aware(dt.datetime.fromtimestamp(base_timestamp + 300)) # +5 minutes, page 4 + + await sync_to_async(Block.objects.create)(block_number=1000, creation_timestamp=t0) + await sync_to_async(Block.objects.create)(block_number=2000, creation_timestamp=t1) + + test_receipts = [ + JobFinishedReceiptPayload( + job_uuid="00000000-0000-0000-0000-000000000001", + miner_hotkey=miner_keypair1.ss58_address, + validator_hotkey=validator_keypair.ss58_address, + timestamp=t0 + dt.timedelta(minutes=10), + time_started=t0 + dt.timedelta(minutes=5), + time_took_us=1_000_000, + score_str="0.5", + ), + JobFinishedReceiptPayload( + job_uuid="00000000-0000-0000-0000-000000000002", + miner_hotkey=miner_keypair2.ss58_address, + validator_hotkey=validator_keypair.ss58_address, + timestamp=t0 + dt.timedelta(minutes=15), + time_started=t0 + dt.timedelta(minutes=10), + time_took_us=2_000_000, + score_str="0.8", + ), + ] + + async def mock_receipts_handler(request): + # Extract page number from URL like /receipts/3.jsonl + path = request.path + if not path.startswith("/receipts/") or not path.endswith(".jsonl"): + return web.Response(status=404, text="Endpoint not found") + + try: + page = int(path[10:-6]) + except ValueError: + return web.Response(status=400, text="Invalid page number") + + if page not in [3, 4]: + return web.Response(status=404, text="Page not found") + + receipt_lines = [] + for receipt in test_receipts: + blob = receipt.blob_for_signing() + if receipt.miner_hotkey == miner_keypair1.ss58_address: + miner_signature = f"0x{miner_keypair1.sign(blob).hex()}" + else: + miner_signature = f"0x{miner_keypair2.sign(blob).hex()}" + validator_signature = f"0x{validator_keypair.sign(blob).hex()}" + + mock_receipt = Receipt( + payload=receipt, + validator_signature=validator_signature, + miner_signature=miner_signature, + ) + receipt_lines.append(mock_receipt.model_dump_json()) + + response_text = "\n".join(receipt_lines) + return web.Response(text=response_text, content_type="application/json") + + app = web.Application() + app.router.add_get("/receipts/{page}.jsonl", mock_receipts_handler) + + runner = web.AppRunner(app) + await runner.setup() + site = web.TCPSite(runner, "127.0.0.1", 7001) + await site.start() + + try: + await Receipts().scrape_receipts_from_miners( + miner_hotkeys=[miner_keypair1.ss58_address, miner_keypair2.ss58_address], + start_block=1000, + end_block=2000, + ) + stored_receipts_qs = await sync_to_async(JobFinishedReceipt.objects.filter)( + miner_hotkey__in=[miner_keypair1.ss58_address, miner_keypair2.ss58_address] + ) + + def convert_to_list(qs): + return list(qs) + + stored_receipts: list[JobFinishedReceipt] = await sync_to_async(convert_to_list)( + stored_receipts_qs + ) + + assert len(stored_receipts) == 2 + assert str(stored_receipts[0].job_uuid) == "00000000-0000-0000-0000-000000000001" + assert str(stored_receipts[1].job_uuid) == "00000000-0000-0000-0000-000000000002" + + finally: + await runner.cleanup() + + +@pytest.mark.django_db(transaction=True) +@pytest.mark.asyncio +async def test_scrape_receipts_network_failure_handling(): + await sync_to_async(Miner.objects.create)(hotkey="hk1", address="127.0.0.1", port=7004) + t0 = make_aware(dt.datetime(2025, 1, 1, 0, 0, 0)) + await sync_to_async(Block.objects.create)(block_number=1000, creation_timestamp=t0) + + async def mock_failing_handler(request): + """Mock handler that always raises an exception.""" + raise web.HTTPInternalServerError(text="Server error") + + app = web.Application() + app.router.add_get("/receipts", mock_failing_handler) + + runner = web.AppRunner(app) + await runner.setup() + site = web.TCPSite(runner, "127.0.0.1", 7004) + await site.start() + + try: + result = await Receipts().scrape_receipts_from_miners( + miner_hotkeys=["hk1"], start_block=1000, end_block=2000 + ) + + assert result == [] + + finally: + await runner.cleanup() + + +@pytest.mark.django_db(transaction=True) +@pytest.mark.asyncio +async def test_scrape_receipts_invalid_block_range(): + result = await Receipts().scrape_receipts_from_miners( + miner_hotkeys=["hk1"], start_block=1000, end_block=1000 + ) + assert result == [] + + result = await Receipts().scrape_receipts_from_miners( + miner_hotkeys=["hk1"], start_block=2000, end_block=1000 + ) + assert result == [] + + +@pytest.mark.django_db(transaction=True) +@pytest.mark.asyncio +async def test_scrape_receipts_no_miners(): + result = await Receipts().scrape_receipts_from_miners( + miner_hotkeys=[], start_block=1000, end_block=2000 + ) + assert result == [] + + +@pytest.mark.django_db(transaction=True) +@pytest.mark.asyncio +async def test_scrape_receipts_invalid_miner_endpoints(): + await sync_to_async(Miner.objects.create)(hotkey="hk1", address="127.0.0.1", port=7005) + await sync_to_async(Miner.objects.create)(hotkey="hk2", address="127.0.0.1", port=7006) + await sync_to_async(Miner.objects.create)(hotkey="hk3", address="127.0.0.1", port=7007) + + t0 = make_aware(dt.datetime(2025, 1, 1, 0, 0, 0)) + t1 = make_aware(dt.datetime(2025, 1, 1, 1, 0, 0)) + await sync_to_async(Block.objects.create)(block_number=1000, creation_timestamp=t0) + await sync_to_async(Block.objects.create)(block_number=2000, creation_timestamp=t1) + + result = await Receipts().scrape_receipts_from_miners( + miner_hotkeys=["hk1", "hk2", "hk3"], + start_block=1000, + end_block=2000, + ) + + assert result == [] + + +@pytest.mark.django_db(transaction=True) +@pytest.mark.asyncio +async def test_get_valid_job_started_receipts_for_miner(): + miner_hotkey = "test_miner_hotkey" + validator_hotkey = "test_validator_hotkey" + + valid_receipt = await sync_to_async(JobStartedReceipt.objects.create)( + job_uuid=str(uuid.uuid4()), + miner_hotkey=miner_hotkey, + validator_hotkey=validator_hotkey, + validator_signature="0xv", + miner_signature="0xm", + timestamp=make_aware(dt.datetime.now()), + executor_class="spin_up-4min.gpu-24gb", + is_organic=False, + ttl=300, + ) + + await sync_to_async(JobStartedReceipt.objects.create)( + job_uuid=str(uuid.uuid4()), + miner_hotkey=miner_hotkey, + validator_hotkey=validator_hotkey, + validator_signature="0xv", + miner_signature="0xm", + timestamp=make_aware(dt.datetime.now() - dt.timedelta(minutes=10)), + executor_class="spin_up-4min.gpu-24gb", + is_organic=False, + ttl=300, + ) + + await sync_to_async(JobStartedReceipt.objects.create)( + job_uuid=str(uuid.uuid4()), + miner_hotkey="other_miner", + validator_hotkey=validator_hotkey, + validator_signature="0xv", + miner_signature="0xm", + timestamp=make_aware(dt.datetime.now()), + executor_class="spin_up-4min.gpu-24gb", + is_organic=False, + ttl=300, + ) + + result = await Receipts().get_valid_job_started_receipts_for_miner( + miner_hotkey, make_aware(dt.datetime.now()) + ) + + assert len(result) == 1 + assert result[0].miner_hotkey == miner_hotkey + assert str(result[0].job_uuid) == str(valid_receipt.job_uuid) + + +@pytest.mark.django_db(transaction=True) +@pytest.mark.asyncio +async def test_get_job_finished_receipts_for_miner(): + job_uuid1 = str(uuid.uuid4()) + job_uuid2 = str(uuid.uuid4()) + job_uuid3 = str(uuid.uuid4()) + miner_hotkey = "test_miner_hotkey" + validator_hotkey = "test_validator_hotkey" + + await sync_to_async(JobFinishedReceipt.objects.create)( + job_uuid=job_uuid1, + miner_hotkey=miner_hotkey, + validator_hotkey=validator_hotkey, + validator_signature="0xv", + miner_signature="0xm", + timestamp=make_aware(dt.datetime.now()), + time_started=make_aware(dt.datetime.now() - dt.timedelta(minutes=5)), + time_took_us=5_000_000, + score_str="0.8", + ) + + await sync_to_async(JobFinishedReceipt.objects.create)( + job_uuid=job_uuid2, + miner_hotkey=miner_hotkey, + validator_hotkey=validator_hotkey, + validator_signature="0xv", + miner_signature="0xm", + timestamp=make_aware(dt.datetime.now()), + time_started=make_aware(dt.datetime.now() - dt.timedelta(minutes=3)), + time_took_us=3_000_000, + score_str="0.9", + ) + + await sync_to_async(JobFinishedReceipt.objects.create)( + job_uuid=job_uuid3, + miner_hotkey=miner_hotkey, + validator_hotkey=validator_hotkey, + validator_signature="0xv", + miner_signature="0xm", + timestamp=make_aware(dt.datetime.now()), + time_started=make_aware(dt.datetime.now() - dt.timedelta(minutes=2)), + time_took_us=2_000_000, + score_str="0.7", + ) + + requested_jobs = [job_uuid1, job_uuid2] + result = await Receipts().get_job_finished_receipts_for_miner(miner_hotkey, requested_jobs) + + assert len(result) == 2 + job_uuids = {str(r.job_uuid) for r in result} + assert job_uuids == {job_uuid1, job_uuid2} + + +@pytest.mark.django_db(transaction=True) +@pytest.mark.asyncio +async def test_get_job_started_receipt_by_uuid(): + job_uuid = str(uuid.uuid4()) + miner_hotkey = "test_miner_hotkey" + validator_hotkey = "test_validator_hotkey" + + await sync_to_async(JobStartedReceipt.objects.create)( + job_uuid=job_uuid, + miner_hotkey=miner_hotkey, + validator_hotkey=validator_hotkey, + validator_signature="0xv", + miner_signature="0xm", # Add miner signature + timestamp=make_aware(dt.datetime.now()), + executor_class="spin_up-4min.gpu-24gb", + is_organic=False, + ttl=300, + ) + + result = await Receipts().get_job_started_receipt_by_uuid(job_uuid) + + assert result is not None + assert str(result.job_uuid) == job_uuid + assert result.miner_hotkey == miner_hotkey + assert result.validator_hotkey == validator_hotkey + + non_existent_uuid = str(uuid.uuid4()) + result = await Receipts().get_job_started_receipt_by_uuid(non_existent_uuid) + assert result is None + + +@pytest.mark.django_db(transaction=True) +@pytest.mark.asyncio +async def test_get_completed_job_receipts_for_block_range(): + t0 = make_aware(dt.datetime(2025, 1, 1, 0, 0, 0)) + t1 = make_aware(dt.datetime(2025, 1, 1, 1, 0, 0)) + t2 = make_aware(dt.datetime(2025, 1, 1, 2, 0, 0)) + t3 = make_aware(dt.datetime(2025, 1, 1, 3, 0, 0)) + + await sync_to_async(Block.objects.create)(block_number=1000, creation_timestamp=t0) + await sync_to_async(Block.objects.create)(block_number=1500, creation_timestamp=t1) + await sync_to_async(Block.objects.create)(block_number=2000, creation_timestamp=t2) + await sync_to_async(Block.objects.create)(block_number=3000, creation_timestamp=t3) + + receipt1 = await sync_to_async(JobFinishedReceipt.objects.create)( + job_uuid=str(uuid.uuid4()), + miner_hotkey="miner1", + validator_hotkey="validator1", + validator_signature="0xv1", + miner_signature="0xm1", + timestamp=t0 + dt.timedelta(minutes=30), + time_started=t0 + dt.timedelta(minutes=25), + time_took_us=5_000_000, + score_str="0.8", + ) + + receipt2 = await sync_to_async(JobFinishedReceipt.objects.create)( + job_uuid=str(uuid.uuid4()), + miner_hotkey="miner2", + validator_hotkey="validator2", + validator_signature="0xv2", + miner_signature="0xm2", + timestamp=t2 + dt.timedelta(minutes=30), + time_started=t2 + dt.timedelta(minutes=25), + time_took_us=3_000_000, + score_str="0.9", + ) + + await sync_to_async(JobFinishedReceipt.objects.create)( + job_uuid=str(uuid.uuid4()), + miner_hotkey="miner3", + validator_hotkey="validator3", + validator_signature="0xv3", + miner_signature="0xm3", + timestamp=t0 - dt.timedelta(minutes=1), + time_started=t0 - dt.timedelta(minutes=2), + time_took_us=2_000_000, + score_str="0.7", + ) + + result = await Receipts().get_completed_job_receipts_for_block_range(1000, 1500) + + assert len(result) == 1 + assert str(result[0].payload.job_uuid) == str(receipt1.job_uuid) + + result = await Receipts().get_completed_job_receipts_for_block_range(2000, 3000) + + assert len(result) == 1 + assert str(result[0].payload.job_uuid) == str(receipt2.job_uuid) + + +@pytest.mark.django_db(transaction=True) +def test_create_job_finished_receipt_success(): + job_uuid = str(uuid.uuid4()) + miner_hotkey = "test_miner_hotkey" + validator_hotkey = "test_validator_hotkey" + time_started = dt.datetime.now(dt.UTC) + time_took_us = 5000000 + score_str = "0.85" + + receipt = Receipts().create_job_finished_receipt( + job_uuid=job_uuid, + miner_hotkey=miner_hotkey, + validator_hotkey=validator_hotkey, + time_started=time_started, + time_took_us=time_took_us, + score_str=score_str, + ) + + assert receipt is not None + assert isinstance(receipt, JobFinishedReceipt) + + assert receipt.job_uuid == job_uuid + assert receipt.miner_hotkey == miner_hotkey + assert receipt.validator_hotkey == validator_hotkey + assert receipt.time_started == time_started + assert receipt.time_took_us == time_took_us + assert receipt.score_str == score_str + assert receipt.validator_signature is not None + assert len(receipt.validator_signature) > 0 + + assert receipt.time_took() == dt.timedelta(microseconds=time_took_us) + assert receipt.score() == float(score_str) diff --git a/validator/app/src/compute_horde_validator/validator/receipts/types.py b/validator/app/src/compute_horde_validator/validator/receipts/types.py index 8f9b8dd14..c2e94d09c 100644 --- a/validator/app/src/compute_horde_validator/validator/receipts/types.py +++ b/validator/app/src/compute_horde_validator/validator/receipts/types.py @@ -1,22 +1,4 @@ -class ReceiptsConfigurationError(Exception): - """Raised when there is a configuration error in the receipts module.""" - - pass - - -class ReceiptsScrapingError(Exception): - """Raised when there is an error scraping receipts from miners.""" - - pass - - class ReceiptsGenerationError(Exception): """Raised when there is an error generating receipts.""" pass - - -class ReceiptsValidationError(Exception): - """Raised when there is an error validating receipts.""" - - pass diff --git a/validator/app/src/compute_horde_validator/validator/routing/default.py b/validator/app/src/compute_horde_validator/validator/routing/default.py index cbfb3c11f..503dfeb26 100644 --- a/validator/app/src/compute_horde_validator/validator/routing/default.py +++ b/validator/app/src/compute_horde_validator/validator/routing/default.py @@ -9,7 +9,6 @@ OrganicJobRequest, V2JobRequest, ) -from compute_horde.receipts.models import JobFinishedReceipt, JobStartedReceipt from compute_horde.subtensor import get_cycle_containing_block from compute_horde.utils import async_synchronized from django.conf import settings @@ -25,6 +24,7 @@ MinerManifest, MinerPreliminaryReservation, ) +from compute_horde_validator.validator.receipts.default import Receipts from compute_horde_validator.validator.routing.base import RoutingBase from compute_horde_validator.validator.routing.types import ( AllMinersBusy, @@ -220,17 +220,18 @@ async def _pick_miner_for_job_v2(request: V2JobRequest) -> JobRoute: known_started_jobs: set[str] = { str(job_uuid) - async for job_uuid in JobStartedReceipt.objects.valid_at(timezone.now()) - .filter(miner_hotkey=miner.hotkey) - .values_list("job_uuid", flat=True) + for receipt in await Receipts().get_valid_job_started_receipts_for_miner( + miner.hotkey, timezone.now() + ) + for job_uuid in [receipt.job_uuid] } known_finished_jobs: set[str] = { str(job_uuid) - async for job_uuid in JobFinishedReceipt.objects.filter( - job_uuid__in=known_started_jobs | preliminary_reservation_jobs, - miner_hotkey=miner.hotkey, - ).values_list("job_uuid", flat=True) + for receipt in await Receipts().get_job_finished_receipts_for_miner( + miner.hotkey, list(known_started_jobs | preliminary_reservation_jobs) + ) + for job_uuid in [receipt.job_uuid] } maybe_ongoing_jobs = ( diff --git a/validator/app/src/compute_horde_validator/validator/synthetic_jobs/batch_run.py b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/batch_run.py index c1be4ff1f..f26b6a99d 100644 --- a/validator/app/src/compute_horde_validator/validator/synthetic_jobs/batch_run.py +++ b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/batch_run.py @@ -83,6 +83,7 @@ SyntheticJobBatch, SystemEvent, ) +from compute_horde_validator.validator.receipts.default import Receipts from compute_horde_validator.validator.synthetic_jobs.generator import current from compute_horde_validator.validator.synthetic_jobs.generator.base import ( BaseSyntheticJobGenerator, @@ -926,16 +927,16 @@ def _generate_job_started_receipt(ctx: BatchContext, job: Job) -> None: ttl = job.get_spin_up_time() + job_timeout_seconds + spinup_leeway_seconds ttl_clamped = max(ttl_min, min(ttl_max, ttl)) - payload = JobStartedReceiptPayload( + receipts_service = Receipts() + payload, signature = receipts_service.create_job_started_receipt( job_uuid=job.uuid, miner_hotkey=job.miner_hotkey, validator_hotkey=ctx.own_keypair.ss58_address, - timestamp=datetime.now(tz=UTC), executor_class=ExecutorClass(job.executor_class), is_organic=False, ttl=ttl_clamped, ) - signature = f"0x{ctx.own_keypair.sign(payload.blob_for_signing()).hex()}" + job.job_started_receipt_payload = payload job.job_started_receipt_signature = signature From 444593b2890ca8dd99e8f945b102bd713898bd79 Mon Sep 17 00:00:00 2001 From: Piotr Figwer Date: Fri, 15 Aug 2025 12:36:53 +0200 Subject: [PATCH 02/13] Cleanup async stuff --- .../validator/receipts/base.py | 6 ++-- .../validator/receipts/default.py | 34 +++++++----------- .../validator/receipts/tests/test_receipts.py | 35 +++++++++---------- .../validator/routing/default.py | 5 +-- 4 files changed, 36 insertions(+), 44 deletions(-) diff --git a/validator/app/src/compute_horde_validator/validator/receipts/base.py b/validator/app/src/compute_horde_validator/validator/receipts/base.py index 31962dc22..d0656af99 100644 --- a/validator/app/src/compute_horde_validator/validator/receipts/base.py +++ b/validator/app/src/compute_horde_validator/validator/receipts/base.py @@ -83,7 +83,7 @@ def create_job_started_receipt( pass @abstractmethod - async def get_valid_job_started_receipts_for_miner( + def get_valid_job_started_receipts_for_miner( self, miner_hotkey: str, at_time: datetime.datetime ) -> list[JobStartedReceipt]: """ @@ -99,7 +99,7 @@ async def get_valid_job_started_receipts_for_miner( pass @abstractmethod - async def get_job_finished_receipts_for_miner( + def get_job_finished_receipts_for_miner( self, miner_hotkey: str, job_uuids: list[str] ) -> list[JobFinishedReceipt]: """ @@ -115,7 +115,7 @@ async def get_job_finished_receipts_for_miner( pass @abstractmethod - async def get_job_started_receipt_by_uuid(self, job_uuid: str) -> JobStartedReceipt | None: + def get_job_started_receipt_by_uuid(self, job_uuid: str) -> JobStartedReceipt | None: """ Get a job started receipt by UUID. diff --git a/validator/app/src/compute_horde_validator/validator/receipts/default.py b/validator/app/src/compute_horde_validator/validator/receipts/default.py index 5ddcf9977..3d26df0dd 100644 --- a/validator/app/src/compute_horde_validator/validator/receipts/default.py +++ b/validator/app/src/compute_horde_validator/validator/receipts/default.py @@ -46,12 +46,8 @@ async def scrape_receipts_from_miners( return [] try: - start_ts = await sync_to_async(self._get_block_timestamp, thread_sensitive=True)( - start_block - ) - end_ts = await sync_to_async(self._get_block_timestamp, thread_sensitive=True)( - end_block - ) + start_ts = await self._get_block_timestamp(start_block) + end_ts = await self._get_block_timestamp(end_block) start_page = LocalFilesystemPagedReceiptStore.current_page_at(start_ts) end_page = LocalFilesystemPagedReceiptStore.current_page_at(end_ts) @@ -169,12 +165,12 @@ def create_job_started_receipt( except Exception as e: raise ReceiptsGenerationError(f"Failed to create job started receipt: {e}") from e - async def get_valid_job_started_receipts_for_miner( + def get_valid_job_started_receipts_for_miner( self, miner_hotkey: str, at_time: datetime.datetime ) -> list[JobStartedReceipt]: try: qs = JobStartedReceipt.objects.valid_at(at_time).filter(miner_hotkey=miner_hotkey) - receipts: list[JobStartedReceipt] = [r async for r in qs.all()] + receipts: list[JobStartedReceipt] = [r for r in qs.all()] logger.debug( "Retrieved %s valid job started receipts for miner %s at %s", @@ -189,7 +185,7 @@ async def get_valid_job_started_receipts_for_miner( logger.error("Failed to get valid job started receipts for miner: %s", e) return [] - async def get_job_finished_receipts_for_miner( + def get_job_finished_receipts_for_miner( self, miner_hotkey: str, job_uuids: list[str] ) -> list[JobFinishedReceipt]: try: @@ -198,7 +194,7 @@ async def get_job_finished_receipts_for_miner( qs = JobFinishedReceipt.objects.filter( miner_hotkey=miner_hotkey, job_uuid__in=job_uuids ) - receipts: list[JobFinishedReceipt] = [r async for r in qs.all()] + receipts: list[JobFinishedReceipt] = [r for r in qs.all()] logger.debug( "Retrieved %s job finished receipts for miner %s (jobs: %s)", @@ -213,9 +209,9 @@ async def get_job_finished_receipts_for_miner( logger.error("Failed to get job finished receipts for miner: %s", e) return [] - async def get_job_started_receipt_by_uuid(self, job_uuid: str) -> JobStartedReceipt | None: + def get_job_started_receipt_by_uuid(self, job_uuid: str) -> JobStartedReceipt | None: try: - django_receipt = await JobStartedReceipt.objects.aget(job_uuid=job_uuid) + django_receipt = JobStartedReceipt.objects.get(job_uuid=job_uuid) logger.debug( "Retrieved JobStartedReceipt for job %s (miner: %s, validator: %s)", job_uuid, @@ -242,12 +238,8 @@ async def get_completed_job_receipts_for_block_range( return [] try: - start_timestamp = await sync_to_async(self._get_block_timestamp, thread_sensitive=True)( - start_block - ) - end_timestamp = await sync_to_async(self._get_block_timestamp, thread_sensitive=True)( - end_block - ) + start_timestamp = await self._get_block_timestamp(start_block) + end_timestamp = await self._get_block_timestamp(end_block) finished_receipts_qs = JobFinishedReceipt.objects.filter( timestamp__gte=start_timestamp, @@ -298,9 +290,9 @@ async def _fetch_receipts_for_range( receipts.append(receipt_data.to_receipt()) return receipts - def _get_block_timestamp(self, block_number: int) -> datetime.datetime: + async def _get_block_timestamp(self, block_number: int) -> datetime.datetime: try: - block = Block.objects.get(block_number=block_number) + block = await Block.objects.aget(block_number=block_number) return block.creation_timestamp except Exception as db_ex: logger.debug( @@ -310,7 +302,7 @@ def _get_block_timestamp(self, block_number: int) -> datetime.datetime: ) try: - ts = supertensor().get_block_timestamp(block_number) + ts = await supertensor().get_block_timestamp(block_number) if isinstance(ts, datetime.datetime): return ts else: diff --git a/validator/app/src/compute_horde_validator/validator/receipts/tests/test_receipts.py b/validator/app/src/compute_horde_validator/validator/receipts/tests/test_receipts.py index f873e0911..8ce1ec8b6 100644 --- a/validator/app/src/compute_horde_validator/validator/receipts/tests/test_receipts.py +++ b/validator/app/src/compute_horde_validator/validator/receipts/tests/test_receipts.py @@ -116,7 +116,7 @@ async def mock_receipts_handler(request): runner = web.AppRunner(app) await runner.setup() - site = web.TCPSite(runner, "127.0.0.1", 7001) + site = web.TCPSite(runner, "127.0.0.1", port=7001) await site.start() try: @@ -149,7 +149,9 @@ def convert_to_list(qs): async def test_scrape_receipts_network_failure_handling(): await sync_to_async(Miner.objects.create)(hotkey="hk1", address="127.0.0.1", port=7004) t0 = make_aware(dt.datetime(2025, 1, 1, 0, 0, 0)) + t1 = make_aware(dt.datetime(2025, 1, 1, 1, 0, 0)) await sync_to_async(Block.objects.create)(block_number=1000, creation_timestamp=t0) + await sync_to_async(Block.objects.create)(block_number=2000, creation_timestamp=t1) async def mock_failing_handler(request): """Mock handler that always raises an exception.""" @@ -219,12 +221,11 @@ async def test_scrape_receipts_invalid_miner_endpoints(): @pytest.mark.django_db(transaction=True) -@pytest.mark.asyncio -async def test_get_valid_job_started_receipts_for_miner(): +def test_get_valid_job_started_receipts_for_miner(): miner_hotkey = "test_miner_hotkey" validator_hotkey = "test_validator_hotkey" - valid_receipt = await sync_to_async(JobStartedReceipt.objects.create)( + valid_receipt = JobStartedReceipt.objects.create( job_uuid=str(uuid.uuid4()), miner_hotkey=miner_hotkey, validator_hotkey=validator_hotkey, @@ -236,7 +237,7 @@ async def test_get_valid_job_started_receipts_for_miner(): ttl=300, ) - await sync_to_async(JobStartedReceipt.objects.create)( + JobStartedReceipt.objects.create( job_uuid=str(uuid.uuid4()), miner_hotkey=miner_hotkey, validator_hotkey=validator_hotkey, @@ -248,7 +249,7 @@ async def test_get_valid_job_started_receipts_for_miner(): ttl=300, ) - await sync_to_async(JobStartedReceipt.objects.create)( + JobStartedReceipt.objects.create( job_uuid=str(uuid.uuid4()), miner_hotkey="other_miner", validator_hotkey=validator_hotkey, @@ -260,7 +261,7 @@ async def test_get_valid_job_started_receipts_for_miner(): ttl=300, ) - result = await Receipts().get_valid_job_started_receipts_for_miner( + result = Receipts().get_valid_job_started_receipts_for_miner( miner_hotkey, make_aware(dt.datetime.now()) ) @@ -270,15 +271,14 @@ async def test_get_valid_job_started_receipts_for_miner(): @pytest.mark.django_db(transaction=True) -@pytest.mark.asyncio -async def test_get_job_finished_receipts_for_miner(): +def test_get_job_finished_receipts_for_miner(): job_uuid1 = str(uuid.uuid4()) job_uuid2 = str(uuid.uuid4()) job_uuid3 = str(uuid.uuid4()) miner_hotkey = "test_miner_hotkey" validator_hotkey = "test_validator_hotkey" - await sync_to_async(JobFinishedReceipt.objects.create)( + JobFinishedReceipt.objects.create( job_uuid=job_uuid1, miner_hotkey=miner_hotkey, validator_hotkey=validator_hotkey, @@ -290,7 +290,7 @@ async def test_get_job_finished_receipts_for_miner(): score_str="0.8", ) - await sync_to_async(JobFinishedReceipt.objects.create)( + JobFinishedReceipt.objects.create( job_uuid=job_uuid2, miner_hotkey=miner_hotkey, validator_hotkey=validator_hotkey, @@ -302,7 +302,7 @@ async def test_get_job_finished_receipts_for_miner(): score_str="0.9", ) - await sync_to_async(JobFinishedReceipt.objects.create)( + JobFinishedReceipt.objects.create( job_uuid=job_uuid3, miner_hotkey=miner_hotkey, validator_hotkey=validator_hotkey, @@ -315,7 +315,7 @@ async def test_get_job_finished_receipts_for_miner(): ) requested_jobs = [job_uuid1, job_uuid2] - result = await Receipts().get_job_finished_receipts_for_miner(miner_hotkey, requested_jobs) + result = Receipts().get_job_finished_receipts_for_miner(miner_hotkey, requested_jobs) assert len(result) == 2 job_uuids = {str(r.job_uuid) for r in result} @@ -323,13 +323,12 @@ async def test_get_job_finished_receipts_for_miner(): @pytest.mark.django_db(transaction=True) -@pytest.mark.asyncio -async def test_get_job_started_receipt_by_uuid(): +def test_get_job_started_receipt_by_uuid(): job_uuid = str(uuid.uuid4()) miner_hotkey = "test_miner_hotkey" validator_hotkey = "test_validator_hotkey" - await sync_to_async(JobStartedReceipt.objects.create)( + JobStartedReceipt.objects.create( job_uuid=job_uuid, miner_hotkey=miner_hotkey, validator_hotkey=validator_hotkey, @@ -341,7 +340,7 @@ async def test_get_job_started_receipt_by_uuid(): ttl=300, ) - result = await Receipts().get_job_started_receipt_by_uuid(job_uuid) + result = Receipts().get_job_started_receipt_by_uuid(job_uuid) assert result is not None assert str(result.job_uuid) == job_uuid @@ -349,7 +348,7 @@ async def test_get_job_started_receipt_by_uuid(): assert result.validator_hotkey == validator_hotkey non_existent_uuid = str(uuid.uuid4()) - result = await Receipts().get_job_started_receipt_by_uuid(non_existent_uuid) + result = Receipts().get_job_started_receipt_by_uuid(non_existent_uuid) assert result is None diff --git a/validator/app/src/compute_horde_validator/validator/routing/default.py b/validator/app/src/compute_horde_validator/validator/routing/default.py index 503dfeb26..efdad1f2f 100644 --- a/validator/app/src/compute_horde_validator/validator/routing/default.py +++ b/validator/app/src/compute_horde_validator/validator/routing/default.py @@ -3,6 +3,7 @@ from datetime import timedelta from typing import assert_never +from asgiref.sync import sync_to_async from compute_horde.blockchain.block_cache import aget_current_block from compute_horde.executor_class import EXECUTOR_CLASS from compute_horde.fv_protocol.facilitator_requests import ( @@ -220,7 +221,7 @@ async def _pick_miner_for_job_v2(request: V2JobRequest) -> JobRoute: known_started_jobs: set[str] = { str(job_uuid) - for receipt in await Receipts().get_valid_job_started_receipts_for_miner( + for receipt in await sync_to_async(Receipts().get_valid_job_started_receipts_for_miner)( miner.hotkey, timezone.now() ) for job_uuid in [receipt.job_uuid] @@ -228,7 +229,7 @@ async def _pick_miner_for_job_v2(request: V2JobRequest) -> JobRoute: known_finished_jobs: set[str] = { str(job_uuid) - for receipt in await Receipts().get_job_finished_receipts_for_miner( + for receipt in await sync_to_async(Receipts().get_job_finished_receipts_for_miner)( miner.hotkey, list(known_started_jobs | preliminary_reservation_jobs) ) for job_uuid in [receipt.job_uuid] From a50169b5169fbfe2649a815c8d5afd93f5a62407 Mon Sep 17 00:00:00 2001 From: Piotr Figwer Date: Mon, 18 Aug 2025 14:43:58 +0200 Subject: [PATCH 03/13] Extract receipts transfer logic to moudle --- .../management/commands/transfer_receipts.py | 279 +------------- .../validator/receipts/base.py | 205 +++++++---- .../validator/receipts/default.py | 339 +++++++++++++++--- .../validator/receipts/tasks.py | 81 ----- 4 files changed, 425 insertions(+), 479 deletions(-) delete mode 100644 validator/app/src/compute_horde_validator/validator/receipts/tasks.py diff --git a/validator/app/src/compute_horde_validator/validator/management/commands/transfer_receipts.py b/validator/app/src/compute_horde_validator/validator/management/commands/transfer_receipts.py index bdd43d85d..d66825109 100644 --- a/validator/app/src/compute_horde_validator/validator/management/commands/transfer_receipts.py +++ b/validator/app/src/compute_horde_validator/validator/management/commands/transfer_receipts.py @@ -1,31 +1,6 @@ -import asyncio -import logging -import time -from collections import defaultdict -from collections.abc import Awaitable, Callable, Sequence -from datetime import datetime, timedelta - -import aiohttp from asgiref.sync import async_to_sync -from compute_horde.receipts.store.local import N_ACTIVE_PAGES, LocalFilesystemPagedReceiptStore -from compute_horde.receipts.transfer import ( - MinerInfo, - ReceiptsTransfer, - TransferResult, -) -from django.conf import settings from django.core.management import BaseCommand -from django.utils import timezone -from prometheus_client import Counter, Gauge, Histogram - -from compute_horde_validator.validator.dynamic_config import aget_config -from compute_horde_validator.validator.models import MetagraphSnapshot, Miner - -logger = logging.getLogger(__name__) - - -class TransferIsDisabled(Exception): - pass +from compute_horde_validator.validator.receipts.default import Receipts class Command(BaseCommand): @@ -34,37 +9,6 @@ class Command(BaseCommand): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self.m_receipts = Counter( - "receipttransfer_receipts_total", - documentation="Number of transferred receipts", - ) - self.m_miners = Gauge( - "receipttransfer_miners", - documentation="Number of miners to transfer from", - ) - self.m_successful_transfers = Counter( - "receipttransfer_successful_transfers_total", - documentation="Number of transfers that didn't explicitly fail. (this includes 404s though)", - ) - self.m_line_errors = Counter( - "receipttransfer_line_errors_total", - labelnames=["exc_type"], - documentation="Number of invalid lines in received pages", - ) - self.m_transfer_errors = Counter( - "receipttransfer_transfer_errors_total", - labelnames=["exc_type"], - documentation="Number of completely failed page transfers", - ) - self.m_transfer_duration = Histogram( - "receipttransfer_transfer_duration", - documentation="Total time to transfer latest page deltas from all miners", - ) - self.m_catchup_pages_left = Gauge( - "receipttransfer_catchup_pages_left", - documentation="Pages waiting for catch-up", - ) - def add_arguments(self, parser): parser.add_argument( "--debug-miner-hotkey", @@ -97,217 +41,10 @@ async def handle( debug_miner_port: int | None, **kwargs, ): - if (debug_miner_hotkey, debug_miner_ip, debug_miner_port) != (None, None, None): - # 1st, use explicitly specified miner if available - if None in {debug_miner_hotkey, debug_miner_ip, debug_miner_port}: - raise ValueError("Either none or all of explicit miner details must be provided") - miner = [debug_miner_hotkey, debug_miner_ip, debug_miner_port] - logger.info(f"Will fetch receipts from explicit miner: {miner}") - - async def miners(): - return [miner] - - elif settings.DEBUG_FETCH_RECEIPTS_FROM_MINERS: - # 2nd, if debug miners are specified, they take precedence. - debug_miners = settings.DEBUG_FETCH_RECEIPTS_FROM_MINERS - logger.info(f"Will fetch receipts from {len(debug_miners)} debug miners") - - async def miners(): - return debug_miners - - else: - # 3rd, if no specific miners were specified, get from metagraph snapshot. - logger.info("Will fetch receipts from metagraph snapshot miners") - - async def miners(): - snapshot = await MetagraphSnapshot.aget_latest() - serving_hotkeys = snapshot.serving_hotkeys - serving_miners = [m async for m in Miner.objects.filter(hotkey__in=serving_hotkeys)] - return [(m.hotkey, m.address, m.port) for m in serving_miners] - - # IMPORTANT: This encompasses at least the current and the previous cycle. - cutoff = timezone.now() - timedelta(hours=5) - - """ - General considerations: - - higher concurrency: - - higher bandwidth use - - more parallel CPU-heavy signature check tasks -> steal CPU time from asyncio thread (GIL) - - lower concurrency: - - slows down the process due to higher influence of network latency - - higher allowed request timeout: - - one slow miner may stall the whole process for longer - - less timeouts due to CPU time being stolen by CPU heavy tasks - """ - - if daemon: - while True: - try: - await self.run_in_loop(cutoff, miners) - except TransferIsDisabled: - # Sleep instead of exiting in case the transfer gets dynamically re-enabled. - logger.info("Transfer is currently disabled. Sleeping for a minute.") - await asyncio.sleep(60) - else: - await self.run_once(cutoff, miners) - - async def run_once( - self, cutoff: datetime, miners: Callable[[], Awaitable[list[MinerInfo]]] - ) -> None: - catchup_cutoff_page = LocalFilesystemPagedReceiptStore.current_page_at(cutoff) - current_page = LocalFilesystemPagedReceiptStore.current_page() - async with aiohttp.ClientSession() as session: - await self.catch_up( - # Pull all pages from newest to oldest - pages=list(reversed(range(catchup_cutoff_page, current_page + 1))), - miners=miners, - session=session, - semaphore=asyncio.Semaphore(50), - ) - - async def run_in_loop( - self, cutoff: datetime, miners: Callable[[], Awaitable[list[MinerInfo]]] - ) -> None: - """ - Do a full catch-up + listen for changes in latest 2 pages indefinitely - """ - catchup_cutoff_page = LocalFilesystemPagedReceiptStore.current_page_at(cutoff) - current_page = LocalFilesystemPagedReceiptStore.current_page() - - # TCP adds significant overhead - it's important to reuse connections. - async with aiohttp.ClientSession() as session: - # Catch-up with the latest pages so that the "keep up" loop has easier time later - await self.catch_up( - pages=list(reversed(range(current_page - N_ACTIVE_PAGES + 1, current_page + 1))), - miners=miners, - session=session, - semaphore=asyncio.Semaphore(50), - ) - await asyncio.gather( - # Slowly catch up with non-active pages, newest first - self.catch_up( - pages=list( - reversed(range(catchup_cutoff_page, current_page - N_ACTIVE_PAGES + 1)) - ), - miners=miners, - session=session, - # Throttle this lower so that it doesn't choke the "keep up" loop - semaphore=asyncio.Semaphore(10), - ), - # Keep up with latest pages continuously in parallel - self.keep_up( - miners=miners, - session=session, - semaphore=asyncio.Semaphore(50), - ), - ) - - async def catch_up( - self, - pages: Sequence[int], - miners: Callable[[], Awaitable[list[MinerInfo]]], - session: aiohttp.ClientSession, - semaphore: asyncio.Semaphore, - ) -> None: - """ - Fetches new receipts on given pages one by one. - """ - for idx, page in enumerate(pages): - await self._throw_if_disabled() - - self.m_catchup_pages_left.set(len(pages) - idx) - start_time = time.monotonic() - current_loop_miners = await miners() - result = await ReceiptsTransfer.transfer( - miners=current_loop_miners, - pages=[page], - session=session, - semaphore=semaphore, - # We may need to download a lot of full pages, so the timeout is higher. - request_timeout=3.0, - ) - elapsed = time.monotonic() - start_time - - logger.info( - f"Catching up: " - f"{page=} ({idx + 1}/{len(pages)}) " - f"receipts={result.n_receipts} " - f"{elapsed=:.3f} " - f"successful_transfers={result.n_successful_transfers} " - f"transfer_errors={len(result.transfer_errors)} " - f"line_errors={len(result.line_errors)} " - ) - - self._push_common_metrics(result) - self.m_catchup_pages_left.set(0) - - async def keep_up( - self, - miners: Callable[[], Awaitable[list[MinerInfo]]], - session: aiohttp.ClientSession, - semaphore: asyncio.Semaphore, - ) -> None: - """ - Runs indefinitely and polls for changes in active pages every `interval`. - """ - while True: - await self._throw_if_disabled() - interval: int = await aget_config("DYNAMIC_RECEIPT_TRANSFER_INTERVAL") - - start_time = time.monotonic() - current_page = LocalFilesystemPagedReceiptStore.current_page() - pages = list(reversed(range(current_page - N_ACTIVE_PAGES + 1, current_page + 1))) - current_loop_miners = await miners() - result = await ReceiptsTransfer.transfer( - miners=current_loop_miners, - pages=pages, - session=session, - semaphore=semaphore, - request_timeout=1.0, - ) - elapsed = time.monotonic() - start_time - - logger.info( - f"Keeping up: " - f"{pages=} " - f"receipts={result.n_receipts} " - f"{elapsed=:.3f} " - f"successful_transfers={result.n_successful_transfers} " - f"transfer_errors={len(result.transfer_errors)} " - f"line_errors={len(result.line_errors)} " - ) - - self._push_common_metrics(result) - self.m_miners.set(len(current_loop_miners)) - self.m_transfer_duration.observe(elapsed) - - # Sleep for the remainder of the time if any - if elapsed < interval: - time.sleep(interval - elapsed) - - def _push_common_metrics(self, result: TransferResult) -> None: - # Push line error counts grouped by the exception type - n_line_errors: defaultdict[type[Exception], int] = defaultdict(int) - for line_error in result.line_errors: - n_line_errors[type(line_error)] += 1 - for exc_type, exc_count in n_line_errors.items(): - self.m_line_errors.labels(exc_type=exc_type.__name__).inc(exc_count) - - # Push transfer error counts grouped by the exception type - n_transfer_errors: defaultdict[type[Exception], int] = defaultdict(int) - for transfer_error in result.transfer_errors: - n_transfer_errors[type(transfer_error)] += 1 - for exc_type, exc_count in n_transfer_errors.items(): - self.m_transfer_errors.labels(exc_type=exc_type.__name__).inc(exc_count) - - self.m_receipts.inc(result.n_receipts) - self.m_successful_transfers.inc(result.n_successful_transfers) - - async def _throw_if_disabled(self): - try: - if await aget_config("DYNAMIC_RECEIPT_TRANSFER_ENABLED"): - return - except KeyError: - logger.warning("DYNAMIC_RECEIPT_TRANSFER_ENABLED dynamic config is not set up!") - - raise TransferIsDisabled + await Receipts().run_receipts_transfer( + daemon=daemon, + debug_miner_hotkey=debug_miner_hotkey, + debug_miner_ip=debug_miner_ip, + debug_miner_port=debug_miner_port, + ) + return diff --git a/validator/app/src/compute_horde_validator/validator/receipts/base.py b/validator/app/src/compute_horde_validator/validator/receipts/base.py index d0656af99..0dce4e466 100644 --- a/validator/app/src/compute_horde_validator/validator/receipts/base.py +++ b/validator/app/src/compute_horde_validator/validator/receipts/base.py @@ -1,9 +1,11 @@ import datetime from abc import ABC, abstractmethod +from collections.abc import Callable from compute_horde.receipts import Receipt from compute_horde.receipts.models import JobFinishedReceipt, JobStartedReceipt from compute_horde.receipts.schemas import JobStartedReceiptPayload +from compute_horde.receipts.transfer import TransferResult class ReceiptsBase(ABC): @@ -14,116 +16,87 @@ class ReceiptsBase(ABC): """ @abstractmethod - async def scrape_receipts_from_miners( - self, miner_hotkeys: list[str], start_block: int, end_block: int - ) -> list[Receipt]: - """ - Scrape receipts from miners for a block range. - - Args: - miner_hotkeys: List of miner hotkeys to scrape receipts from - start_block: Start block (inclusive) - if None, scrapes all available receipts - end_block: End block (exclusive) - if None, scrapes all available receipts - - Returns: - List of scraped receipts - """ - pass - - @abstractmethod - def create_job_finished_receipt( + async def transfer_receipts_from_miners( self, - job_uuid: str, - miner_hotkey: str, - validator_hotkey: str, - time_started: datetime.datetime, - time_took_us: int, - score_str: str, - ) -> JobFinishedReceipt: + miner_hotkeys: list[str], + pages: list[int], + semaphore_limit: int = 50, + request_timeout: float = 3.0, + ) -> TransferResult: """ - Create a job finished receipt for a completed job. - + Transfer receipts from miners for specific pages. + Args: - job_uuid: UUID of the job - miner_hotkey: Hotkey of the miner - validator_hotkey: Hotkey of the validator - time_started: Time the job started - time_took_us: Time the job took in microseconds - score_str: Score of the job - + miner_hotkeys: List of miner hotkeys to transfer from + pages: List of page numbers to transfer + semaphore_limit: Maximum concurrent requests + request_timeout: Timeout for each request in seconds + Returns: - JobFinishedReceipt + TransferResult with transfer statistics """ pass @abstractmethod - def create_job_started_receipt( + async def run_full_transfer_cycle( self, - job_uuid: str, - miner_hotkey: str, - validator_hotkey: str, - executor_class: str, - is_organic: bool, - ttl: int, - ) -> tuple[JobStartedReceiptPayload, str]: + miner_hotkeys: list[str], + cutoff_hours: int = 5, + n_active_pages: int = 2, + active_semaphore_limit: int = 50, + catchup_semaphore_limit: int = 10, + active_timeout: float = 1.0, + catchup_timeout: float = 3.0, + ) -> tuple[TransferResult, TransferResult]: """ - Create a job started receipt payload. - - Args: - job_uuid: UUID of the job - miner_hotkey: Hotkey of the miner - validator_hotkey: Hotkey of the validator - executor_class: Executor class for the job - is_organic: Whether this is an organic job - ttl: Time to live in seconds - - Returns: - Tuple of (payload, signature_hex) + Run a full transfer cycle: catch up on active pages, then run catch-up and keep-up in parallel. """ pass @abstractmethod - def get_valid_job_started_receipts_for_miner( + async def get_valid_job_started_receipts_for_miner( self, miner_hotkey: str, at_time: datetime.datetime ) -> list[JobStartedReceipt]: """ Get valid job started receipts for a miner at a specific time. - + Args: - miner_hotkey: Miner's hotkey - at_time: Time to check validity against - + miner_hotkey: Hotkey of the miner + at_time: Time to check validity at + Returns: - List of valid receipts + List of valid JobStartedReceipt objects """ pass @abstractmethod - def get_job_finished_receipts_for_miner( + async def get_job_finished_receipts_for_miner( self, miner_hotkey: str, job_uuids: list[str] ) -> list[JobFinishedReceipt]: """ - Get job finished receipts for specific jobs from a miner. - + Get job finished receipts for a miner and specific job UUIDs. + Args: - miner_hotkey: Miner's hotkey + miner_hotkey: Hotkey of the miner job_uuids: List of job UUIDs to get receipts for - + Returns: - List of receipts + List of JobFinishedReceipt objects """ pass @abstractmethod - def get_job_started_receipt_by_uuid(self, job_uuid: str) -> JobStartedReceipt | None: + async def get_job_started_receipt_by_uuid( + self, job_uuid: str + ) -> JobStartedReceipt | None: """ Get a job started receipt by UUID. - + Args: job_uuid: UUID of the job - + Returns: - Receipt if found, None otherwise + JobStartedReceipt if found, None otherwise """ pass @@ -133,12 +106,100 @@ async def get_completed_job_receipts_for_block_range( ) -> list[Receipt]: """ Get all receipts for jobs that were completed between the specified blocks. + + Args: + start_block: Start block (inclusive) + end_block: End block (exclusive) + + Returns: + List of receipts for completed jobs in the block range + """ + pass + @abstractmethod + async def _fetch_receipts_for_range( + self, start_block: int, end_block: int + ) -> list[Receipt]: + """ + Fetch receipts for a block range from local storage. + Args: start_block: Start block (inclusive) end_block: End block (exclusive) + + Returns: + List of receipts for the block range + """ + pass + + @abstractmethod + async def run_receipts_transfer( + self, + daemon: bool, + debug_miner_hotkey: str | None, + debug_miner_ip: str | None, + debug_miner_port: int | None, + ) -> None: + """ + Run the receipts transfer loop (or a single iteration) using the same logic + that was previously implemented in the management command. + + Args: + daemon: If True, run indefinitely; otherwise perform a single transfer + debug_miner_hotkey: Explicit miner hotkey to fetch from (debug only) + debug_miner_ip: Explicit miner IP to fetch from (debug only) + debug_miner_port: Explicit miner port to fetch from (debug only) + """ + pass + + @abstractmethod + def create_job_started_receipt( + self, + job_uuid: str, + miner_hotkey: str, + validator_hotkey: str, + executor_class: str, + is_organic: bool, + ttl: int, + ) -> tuple[JobStartedReceiptPayload, str]: + """ + Create a job started receipt. + + Args: + job_uuid: UUID of the job + miner_hotkey: Hotkey of the miner + validator_hotkey: Hotkey of the validator + executor_class: Executor class for the job + is_organic: Whether the job is organic + ttl: Time to live for the receipt + + Returns: + Tuple of (payload, signature) + """ + pass + @abstractmethod + def create_job_finished_receipt( + self, + job_uuid: str, + miner_hotkey: str, + validator_hotkey: str, + time_started: datetime.datetime, + time_took_us: int, + score_str: str, + ) -> JobFinishedReceipt: + """ + Create a job finished receipt. + + Args: + job_uuid: UUID of the job + miner_hotkey: Hotkey of the miner + validator_hotkey: Hotkey of the validator + time_started: When the job started + time_took_us: How long the job took in microseconds + score_str: Score string for the job + Returns: - List of receipts for completed jobs in the block range + Created JobFinishedReceipt """ pass diff --git a/validator/app/src/compute_horde_validator/validator/receipts/default.py b/validator/app/src/compute_horde_validator/validator/receipts/default.py index 3d26df0dd..67ca03e57 100644 --- a/validator/app/src/compute_horde_validator/validator/receipts/default.py +++ b/validator/app/src/compute_horde_validator/validator/receipts/default.py @@ -1,6 +1,7 @@ import asyncio import datetime import logging +import time import aiohttp from asgiref.sync import sync_to_async @@ -10,18 +11,21 @@ JobStartedReceipt, ) from compute_horde.receipts.schemas import JobFinishedReceiptPayload, JobStartedReceiptPayload -from compute_horde.receipts.store.local import LocalFilesystemPagedReceiptStore -from compute_horde.receipts.transfer import ReceiptsTransfer +from compute_horde.receipts.store.local import N_ACTIVE_PAGES, LocalFilesystemPagedReceiptStore +from compute_horde.receipts.transfer import ReceiptsTransfer, TransferResult from compute_horde.utils import sign_blob from django.conf import settings +from django.utils import timezone from compute_horde_validator.validator.allowance.utils.supertensor import supertensor -from compute_horde_validator.validator.models import Miner +from compute_horde_validator.validator.dynamic_config import aget_config +from compute_horde_validator.validator.models import MetagraphSnapshot, Miner from compute_horde_validator.validator.models.allowance.internal import Block from compute_horde_validator.validator.receipts.base import ReceiptsBase from compute_horde_validator.validator.receipts.types import ( ReceiptsGenerationError, ) +from prometheus_client import Counter, Gauge, Histogram logger = logging.getLogger(__name__) @@ -31,70 +35,295 @@ class Receipts(ReceiptsBase): Default implementation of receipts manager. """ - async def scrape_receipts_from_miners( - self, miner_hotkeys: list[str], start_block: int, end_block: int - ) -> list[Receipt]: - if not miner_hotkeys: - logger.info("No miner hotkeys provided for scraping") - return [] - if start_block >= end_block: - logger.warning( - "Invalid block range provided: start_block (%s) >= end_block (%s)", - start_block, - end_block, - ) - return [] + async def run_receipts_transfer( + self, + daemon: bool, + debug_miner_hotkey: str | None, + debug_miner_ip: str | None, + debug_miner_port: int | None, + ) -> None: + class TransferIsDisabled(Exception): + pass + + # Metrics mirror the management command ones (declared locally to avoid duplicate registration) + m_receipts = Counter( + "receipttransfer_receipts_total", + documentation="Number of transferred receipts", + ) + m_miners = Gauge( + "receipttransfer_miners", + documentation="Number of miners to transfer from", + ) + m_successful_transfers = Counter( + "receipttransfer_successful_transfers_total", + documentation="Number of transfers that didn't explicitly fail. (this includes 404s though)", + ) + m_line_errors = Counter( + "receipttransfer_line_errors_total", + labelnames=["exc_type"], + documentation="Number of invalid lines in received pages", + ) + m_transfer_errors = Counter( + "receipttransfer_transfer_errors_total", + labelnames=["exc_type"], + documentation="Number of completely failed page transfers", + ) + m_transfer_duration = Histogram( + "receipttransfer_transfer_duration", + documentation="Total time to transfer latest page deltas from all miners", + ) + m_catchup_pages_left = Gauge( + "receipttransfer_catchup_pages_left", + documentation="Pages waiting for catch-up", + ) - try: - start_ts = await self._get_block_timestamp(start_block) - end_ts = await self._get_block_timestamp(end_block) - - start_page = LocalFilesystemPagedReceiptStore.current_page_at(start_ts) - end_page = LocalFilesystemPagedReceiptStore.current_page_at(end_ts) - if end_page < start_page: - logger.warning( - "Computed page range is empty: start_page=%s end_page=%s", - start_page, - end_page, + # Select miners source identical to the command's logic + if (debug_miner_hotkey, debug_miner_ip, debug_miner_port) != (None, None, None): + if None in {debug_miner_hotkey, debug_miner_ip, debug_miner_port}: + raise ValueError("Either none or all of explicit miner details must be provided") + miner = [debug_miner_hotkey, debug_miner_ip, debug_miner_port] + logger.info(f"Will fetch receipts from explicit miner: {miner}") + + async def miners() -> list[tuple[str, str, int]]: + return [tuple(miner)] # type: ignore[return-value] + + elif settings.DEBUG_FETCH_RECEIPTS_FROM_MINERS: + debug_miners = settings.DEBUG_FETCH_RECEIPTS_FROM_MINERS + logger.info(f"Will fetch receipts from {len(debug_miners)} debug miners") + + async def miners() -> list[tuple[str, str, int]]: + return debug_miners + + else: + logger.info("Will fetch receipts from metagraph snapshot miners") + + async def miners() -> list[tuple[str, str, int]]: + snapshot = await MetagraphSnapshot.aget_latest() + serving_hotkeys = snapshot.serving_hotkeys + serving_miners = [ + m async for m in Miner.objects.filter(hotkey__in=serving_hotkeys) + ] + return [(m.hotkey, m.address, m.port) for m in serving_miners] + + cutoff = timezone.now() - datetime.timedelta(hours=5) + + async def _throw_if_disabled() -> None: + try: + if await aget_config("DYNAMIC_RECEIPT_TRANSFER_ENABLED"): + return + except KeyError: + logger.warning("DYNAMIC_RECEIPT_TRANSFER_ENABLED dynamic config is not set up!") + raise TransferIsDisabled + + def _push_common_metrics(result: TransferResult) -> None: + from collections import defaultdict + + n_line_errors: defaultdict[type[Exception], int] = defaultdict(int) + for line_error in result.line_errors: + n_line_errors[type(line_error)] += 1 + for exc_type, exc_count in n_line_errors.items(): + m_line_errors.labels(exc_type=exc_type.__name__).inc(exc_count) + + n_transfer_errors: defaultdict[type[Exception], int] = defaultdict(int) + for transfer_error in result.transfer_errors: + n_transfer_errors[type(transfer_error)] += 1 + for exc_type, exc_count in n_transfer_errors.items(): + m_transfer_errors.labels(exc_type=exc_type.__name__).inc(exc_count) + + m_receipts.inc(result.n_receipts) + m_successful_transfers.inc(result.n_successful_transfers) + + async def catch_up( + pages: list[int], + miners_fn, + session: aiohttp.ClientSession, + semaphore: asyncio.Semaphore, + ) -> None: + for idx, page in enumerate(pages): + await _throw_if_disabled() + + m_catchup_pages_left.set(len(pages) - idx) + start_time = time.monotonic() + current_loop_miners = await miners_fn() + result = await ReceiptsTransfer.transfer( + miners=current_loop_miners, + pages=[page], + session=session, + semaphore=semaphore, + request_timeout=3.0, ) - return [] - pages = list(range(start_page, end_page + 1)) - - miners = await self._fetch_miners(miner_hotkeys) - miner_infos: list[tuple[str, str, int]] = [ - (m[0], m[1], m[2]) for m in miners if m[1] and m[2] and m[1] != "0.0.0.0" - ] - if not miner_infos: - logger.info("No valid miner endpoints resolved for scraping") - return [] + elapsed = time.monotonic() - start_time - semaphore = asyncio.Semaphore(25) - async with aiohttp.ClientSession() as session: + logger.info( + f"Catching up: " + f"{page=} ({idx + 1}/{len(pages)}) " + f"receipts={result.n_receipts} " + f"{elapsed=:.3f} " + f"successful_transfers={result.n_successful_transfers} " + f"transfer_errors={len(result.transfer_errors)} " + f"line_errors={len(result.line_errors)} " + ) + + _push_common_metrics(result) + m_catchup_pages_left.set(0) + + async def keep_up( + miners_fn, + session: aiohttp.ClientSession, + semaphore: asyncio.Semaphore, + ) -> None: + while True: + await _throw_if_disabled() + interval: int = await aget_config("DYNAMIC_RECEIPT_TRANSFER_INTERVAL") + + start_time = time.monotonic() + current_page = LocalFilesystemPagedReceiptStore.current_page() + pages = list(reversed(range(current_page - N_ACTIVE_PAGES + 1, current_page + 1))) + current_loop_miners = await miners_fn() result = await ReceiptsTransfer.transfer( - miners=miner_infos, + miners=current_loop_miners, pages=pages, session=session, semaphore=semaphore, - request_timeout=3.0, + request_timeout=1.0, ) + elapsed = time.monotonic() - start_time + logger.info( - "Scrape finished: receipts=%s successful_transfers=%s transfer_errors=%s line_errors=%s", - result.n_receipts, - result.n_successful_transfers, - len(result.transfer_errors), - len(result.line_errors), + f"Keeping up: " + f"{pages=} " + f"receipts={result.n_receipts} " + f"{elapsed=:.3f} " + f"successful_transfers={result.n_successful_transfers} " + f"transfer_errors={len(result.transfer_errors)} " + f"line_errors={len(result.line_errors)} " ) - receipts = await self._fetch_receipts_for_range(start_ts, end_ts, miner_hotkeys) - return receipts - except Exception as ex: - logger.error( - "Failed to scrape receipts for block range %s-%s: %s", - start_block, - end_block, - ex, + _push_common_metrics(result) + m_miners.set(len(current_loop_miners)) + m_transfer_duration.observe(elapsed) + + if elapsed < interval: + time.sleep(interval - elapsed) + + async def run_once(cutoff_ts: datetime.datetime) -> None: + catchup_cutoff_page = LocalFilesystemPagedReceiptStore.current_page_at(cutoff_ts) + current_page = LocalFilesystemPagedReceiptStore.current_page() + async with aiohttp.ClientSession() as session: + await catch_up( + pages=list(reversed(range(catchup_cutoff_page, current_page + 1))), + miners_fn=miners, + session=session, + semaphore=asyncio.Semaphore(50), + ) + + async def run_in_loop(cutoff_ts: datetime.datetime) -> None: + catchup_cutoff_page = LocalFilesystemPagedReceiptStore.current_page_at(cutoff_ts) + current_page = LocalFilesystemPagedReceiptStore.current_page() + async with aiohttp.ClientSession() as session: + await catch_up( + pages=list(reversed(range(current_page - N_ACTIVE_PAGES + 1, current_page + 1))), + miners_fn=miners, + session=session, + semaphore=asyncio.Semaphore(50), + ) + await asyncio.gather( + catch_up( + pages=list( + reversed(range(catchup_cutoff_page, current_page - N_ACTIVE_PAGES + 1)) + ), + miners_fn=miners, + session=session, + semaphore=asyncio.Semaphore(10), + ), + keep_up( + miners_fn=miners, + session=session, + semaphore=asyncio.Semaphore(50), + ), + ) + + if daemon: + while True: + try: + await run_in_loop(cutoff) + except TransferIsDisabled: + logger.info("Transfer is currently disabled. Sleeping for a minute.") + await asyncio.sleep(60) + else: + await run_once(cutoff) + + async def transfer_receipts_from_miners( + self, + miner_hotkeys: list[str], + pages: list[int], + semaphore_limit: int = 50, + request_timeout: float = 3.0, + ) -> TransferResult: + if not miner_hotkeys or not pages: + return TransferResult(0, 0, [], []) + + miners = await self._fetch_miners(miner_hotkeys) + miner_infos: list[tuple[str, str, int]] = [ + (m[0], m[1], m[2]) for m in miners if m[1] and m[2] and m[1] != "0.0.0.0" + ] + if not miner_infos: + return TransferResult(0, 0, [], []) + + semaphore = asyncio.Semaphore(semaphore_limit) + async with aiohttp.ClientSession() as session: + return await ReceiptsTransfer.transfer( + miners=miner_infos, + pages=pages, + session=session, + semaphore=semaphore, + request_timeout=request_timeout, ) - return [] + + async def run_full_transfer_cycle( + self, + miner_hotkeys: list[str], + cutoff_hours: int = 5, + n_active_pages: int = 2, + active_semaphore_limit: int = 50, + catchup_semaphore_limit: int = 10, + active_timeout: float = 1.0, + catchup_timeout: float = 3.0, + ) -> tuple[TransferResult, TransferResult]: + # Compute page windows + cutoff_ts = timezone.now() - datetime.timedelta(hours=cutoff_hours) + catchup_cutoff_page = LocalFilesystemPagedReceiptStore.current_page_at(cutoff_ts) + current_page = LocalFilesystemPagedReceiptStore.current_page() + + active_pages = list(reversed(range(current_page - n_active_pages + 1, current_page + 1))) + catchup_pages = list( + reversed(range(catchup_cutoff_page, max(catchup_cutoff_page, current_page - n_active_pages + 1))) + ) + + miners = await self._fetch_miners(miner_hotkeys) + miner_infos: list[tuple[str, str, int]] = [ + (m[0], m[1], m[2]) for m in miners if m[1] and m[2] and m[1] != "0.0.0.0" + ] + if not miner_infos: + return TransferResult(0, 0, [], []), TransferResult(0, 0, [], []) + + async with aiohttp.ClientSession() as session: + active_result = await ReceiptsTransfer.transfer( + miners=miner_infos, + pages=active_pages, + session=session, + semaphore=asyncio.Semaphore(active_semaphore_limit), + request_timeout=active_timeout, + ) + catchup_result = await ReceiptsTransfer.transfer( + miners=miner_infos, + pages=catchup_pages, + session=session, + semaphore=asyncio.Semaphore(catchup_semaphore_limit), + request_timeout=catchup_timeout, + ) + + return active_result, catchup_result def create_job_finished_receipt( self, diff --git a/validator/app/src/compute_horde_validator/validator/receipts/tasks.py b/validator/app/src/compute_horde_validator/validator/receipts/tasks.py deleted file mode 100644 index c14f7e3d2..000000000 --- a/validator/app/src/compute_horde_validator/validator/receipts/tasks.py +++ /dev/null @@ -1,81 +0,0 @@ -import logging - -from asgiref.sync import async_to_sync -from compute_horde.receipts.models import JobFinishedReceipt -from compute_horde.subtensor import get_cycle_containing_block -from django.conf import settings - -from compute_horde_validator.celery import app -from compute_horde_validator.validator.models import MetagraphSnapshot -from compute_horde_validator.validator.models.allowance.internal import Block -from compute_horde_validator.validator.receipts.default import Receipts - -logger = logging.getLogger(__name__) - - -@app.task(name="compute_horde_validator.validator.receipts.scrape_receipts_from_miners") -def scrape_receipts_from_miners() -> None: - """ - Periodic receipts scraping task. - - - Determines serving miners from the latest metagraph snapshot - - Scrapes receipts for the current cycle up to the latest snapshot block - - Returns the number of receipts retrieved in the call. - """ - try: - metagraph = MetagraphSnapshot.get_latest() - except Exception: - logger.warning("No metagraph snapshot available for receipts scraping") - return - - miner_hotkeys = metagraph.get_serving_hotkeys() - if not miner_hotkeys: - logger.info("No serving miners found for receipts scraping") - return - - current_block = metagraph.block - current_cycle = get_cycle_containing_block( - block=current_block, netuid=settings.BITTENSOR_NETUID - ) - - latest_receipt = JobFinishedReceipt.objects.order_by("-timestamp").first() - - if latest_receipt: - try: - cycle_start_block = Block.objects.get(block_number=current_cycle.start) - current_cycle_start_timestamp = cycle_start_block.creation_timestamp - - # If the latest receipt is newer than the current cycle start, we've already scraped this cycle - if latest_receipt.timestamp >= current_cycle_start_timestamp: - logger.debug("Already scraped receipts for cycle %s, skipping", current_cycle.start) - return - except Block.DoesNotExist: - # If the cycle start block doesn't exist in our database, proceed with scraping - logger.debug( - "Cycle start block %s not found in database, proceeding with scraping", - current_cycle.start, - ) - - logger.info( - "New cycle detected or first run, scraping receipts for cycle %s-%s", - current_cycle.start, - current_cycle.stop, - ) - - try: - scraped = async_to_sync(Receipts().scrape_receipts_from_miners)( - miner_hotkeys=miner_hotkeys, - start_block=current_cycle.start, - end_block=current_block, - ) - logger.info( - "Scraped %d receipts for cycle %s-%s", - len(scraped), - current_cycle.start, - current_cycle.stop, - ) - - except Exception as e: - logger.error("Failed to scrape receipts: %s", e, exc_info=True) - return From 02938b9731d89e6ccaf9f78268e17c6c2421dc59 Mon Sep 17 00:00:00 2001 From: Piotr Figwer Date: Tue, 19 Aug 2025 09:21:50 +0200 Subject: [PATCH 04/13] Refactor --- .../validator/allowance/tests/mockchain.py | 7 +- .../management/commands/transfer_receipts.py | 1 + .../validator/receipts/__init__.py | 1 - .../validator/receipts/base.py | 195 +++-- .../validator/receipts/default.py | 461 ++++++------ .../validator/receipts/tests/test_receipts.py | 693 +++++++++--------- .../validator/routing/default.py | 22 +- 7 files changed, 696 insertions(+), 684 deletions(-) diff --git a/validator/app/src/compute_horde_validator/validator/allowance/tests/mockchain.py b/validator/app/src/compute_horde_validator/validator/allowance/tests/mockchain.py index 75d4035ae..95c5d73ee 100644 --- a/validator/app/src/compute_horde_validator/validator/allowance/tests/mockchain.py +++ b/validator/app/src/compute_horde_validator/validator/allowance/tests/mockchain.py @@ -1,7 +1,9 @@ import asyncio import datetime +import tempfile from contextlib import contextmanager from functools import lru_cache +from pathlib import Path from unittest import mock from unittest.mock import patch @@ -57,7 +59,10 @@ def cmbm(block_number): @lru_cache def wallet(): - wallet_ = bittensor_wallet.Wallet(name="test_mock_validator") + # Use an isolated temp directory for test wallets to avoid reading any real/local keyfiles + wallets_root = Path(tempfile.gettempdir()) / "compute_horde_test_wallets" + wallets_root.mkdir(parents=True, exist_ok=True) + wallet_ = bittensor_wallet.Wallet(name="test_mock_validator", path=str(wallets_root)) wallet_.regenerate_coldkey( mnemonic="local ghost evil lizard decade own lecture absurd vote despair predict cage", use_password=False, diff --git a/validator/app/src/compute_horde_validator/validator/management/commands/transfer_receipts.py b/validator/app/src/compute_horde_validator/validator/management/commands/transfer_receipts.py index d66825109..567fc33b4 100644 --- a/validator/app/src/compute_horde_validator/validator/management/commands/transfer_receipts.py +++ b/validator/app/src/compute_horde_validator/validator/management/commands/transfer_receipts.py @@ -1,5 +1,6 @@ from asgiref.sync import async_to_sync from django.core.management import BaseCommand + from compute_horde_validator.validator.receipts.default import Receipts diff --git a/validator/app/src/compute_horde_validator/validator/receipts/__init__.py b/validator/app/src/compute_horde_validator/validator/receipts/__init__.py index bfca5b69c..48691a81b 100644 --- a/validator/app/src/compute_horde_validator/validator/receipts/__init__.py +++ b/validator/app/src/compute_horde_validator/validator/receipts/__init__.py @@ -4,7 +4,6 @@ This module provides receipts management functionality for the validator. """ -from . import tasks as _tasks # noqa: F401 from .base import ReceiptsBase from .default import Receipts from .types import ReceiptsGenerationError diff --git a/validator/app/src/compute_horde_validator/validator/receipts/base.py b/validator/app/src/compute_horde_validator/validator/receipts/base.py index 0dce4e466..b402223c3 100644 --- a/validator/app/src/compute_horde_validator/validator/receipts/base.py +++ b/validator/app/src/compute_horde_validator/validator/receipts/base.py @@ -1,6 +1,5 @@ import datetime from abc import ABC, abstractmethod -from collections.abc import Callable from compute_horde.receipts import Receipt from compute_horde.receipts.models import JobFinishedReceipt, JobStartedReceipt @@ -16,40 +15,73 @@ class ReceiptsBase(ABC): """ @abstractmethod - async def transfer_receipts_from_miners( + async def run_receipts_transfer( self, - miner_hotkeys: list[str], - pages: list[int], - semaphore_limit: int = 50, - request_timeout: float = 3.0, - ) -> TransferResult: + daemon: bool, + debug_miner_hotkey: str | None, + debug_miner_ip: str | None, + debug_miner_port: int | None, + ) -> None: """ - Transfer receipts from miners for specific pages. - + Run the receipts transfer loop (or a single iteration). + Args: - miner_hotkeys: List of miner hotkeys to transfer from - pages: List of page numbers to transfer - semaphore_limit: Maximum concurrent requests - request_timeout: Timeout for each request in seconds - + daemon: If True, run indefinitely; otherwise perform a single transfer + debug_miner_hotkey: Explicit miner hotkey to fetch from (debug only) + debug_miner_ip: Explicit miner IP to fetch from (debug only) + debug_miner_port: Explicit miner port to fetch from (debug only) + """ + pass + + @abstractmethod + def create_job_started_receipt( + self, + job_uuid: str, + miner_hotkey: str, + validator_hotkey: str, + executor_class: str, + is_organic: bool, + ttl: int, + ) -> tuple[JobStartedReceiptPayload, str]: + """ + Create a job started receipt. + + Args: + job_uuid: UUID of the job + miner_hotkey: Hotkey of the miner + validator_hotkey: Hotkey of the validator + executor_class: Executor class for the job + is_organic: Whether the job is organic + ttl: Time to live for the receipt + Returns: - TransferResult with transfer statistics + Tuple of (payload, signature) """ pass @abstractmethod - async def run_full_transfer_cycle( + def create_job_finished_receipt( self, - miner_hotkeys: list[str], - cutoff_hours: int = 5, - n_active_pages: int = 2, - active_semaphore_limit: int = 50, - catchup_semaphore_limit: int = 10, - active_timeout: float = 1.0, - catchup_timeout: float = 3.0, - ) -> tuple[TransferResult, TransferResult]: + job_uuid: str, + miner_hotkey: str, + validator_hotkey: str, + time_started: datetime.datetime, + time_took_us: int, + score_str: str, + ) -> JobFinishedReceipt: """ - Run a full transfer cycle: catch up on active pages, then run catch-up and keep-up in parallel. + Create a job finished receipt. + + Args: + job_uuid: UUID of the job + miner_hotkey: Hotkey of the miner + validator_hotkey: Hotkey of the validator + time_started: When the job started + time_took_us: How long the job took in microseconds + score_str: Score string for the job + + Returns: + Created JobFinishedReceipt """ pass @@ -59,11 +91,11 @@ async def get_valid_job_started_receipts_for_miner( ) -> list[JobStartedReceipt]: """ Get valid job started receipts for a miner at a specific time. - + Args: miner_hotkey: Hotkey of the miner at_time: Time to check validity at - + Returns: List of valid JobStartedReceipt objects """ @@ -75,26 +107,24 @@ async def get_job_finished_receipts_for_miner( ) -> list[JobFinishedReceipt]: """ Get job finished receipts for a miner and specific job UUIDs. - + Args: miner_hotkey: Hotkey of the miner job_uuids: List of job UUIDs to get receipts for - + Returns: List of JobFinishedReceipt objects """ pass @abstractmethod - async def get_job_started_receipt_by_uuid( - self, job_uuid: str - ) -> JobStartedReceipt | None: + async def get_job_started_receipt_by_uuid(self, job_uuid: str) -> JobStartedReceipt | None: """ Get a job started receipt by UUID. - + Args: job_uuid: UUID of the job - + Returns: JobStartedReceipt if found, None otherwise """ @@ -106,100 +136,55 @@ async def get_completed_job_receipts_for_block_range( ) -> list[Receipt]: """ Get all receipts for jobs that were completed between the specified blocks. - - Args: - start_block: Start block (inclusive) - end_block: End block (exclusive) - - Returns: - List of receipts for completed jobs in the block range - """ - pass - @abstractmethod - async def _fetch_receipts_for_range( - self, start_block: int, end_block: int - ) -> list[Receipt]: - """ - Fetch receipts for a block range from local storage. - Args: start_block: Start block (inclusive) end_block: End block (exclusive) - + Returns: - List of receipts for the block range + List of receipts for completed jobs in the block range """ pass @abstractmethod - async def run_receipts_transfer( + async def _transfer_receipts_from_miners( self, - daemon: bool, - debug_miner_hotkey: str | None, - debug_miner_ip: str | None, - debug_miner_port: int | None, - ) -> None: + miner_hotkeys: list[str], + pages: list[int], + semaphore_limit: int = 50, + request_timeout: float = 3.0, + ) -> TransferResult: """ - Run the receipts transfer loop (or a single iteration) using the same logic - that was previously implemented in the management command. - - Args: - daemon: If True, run indefinitely; otherwise perform a single transfer - debug_miner_hotkey: Explicit miner hotkey to fetch from (debug only) - debug_miner_ip: Explicit miner IP to fetch from (debug only) - debug_miner_port: Explicit miner port to fetch from (debug only) + Private helper: transfer receipts from miners for specific pages. """ pass @abstractmethod - def create_job_started_receipt( + async def _run_full_transfer_cycle( self, - job_uuid: str, - miner_hotkey: str, - validator_hotkey: str, - executor_class: str, - is_organic: bool, - ttl: int, - ) -> tuple[JobStartedReceiptPayload, str]: + miner_hotkeys: list[str], + cutoff_hours: int = 5, + n_active_pages: int = 2, + active_semaphore_limit: int = 50, + catchup_semaphore_limit: int = 10, + active_timeout: float = 1.0, + catchup_timeout: float = 3.0, + ) -> tuple[TransferResult, TransferResult]: """ - Create a job started receipt. - - Args: - job_uuid: UUID of the job - miner_hotkey: Hotkey of the miner - validator_hotkey: Hotkey of the validator - executor_class: Executor class for the job - is_organic: Whether the job is organic - ttl: Time to live for the receipt - - Returns: - Tuple of (payload, signature) + Private helper: run a full transfer cycle for tests or internal orchestration. """ pass @abstractmethod - def create_job_finished_receipt( - self, - job_uuid: str, - miner_hotkey: str, - validator_hotkey: str, - time_started: datetime.datetime, - time_took_us: int, - score_str: str, - ) -> JobFinishedReceipt: + async def _fetch_receipts_for_range(self, start_block: int, end_block: int) -> list[Receipt]: """ - Create a job finished receipt. - + Fetch receipts for a block range from local storage. + Args: - job_uuid: UUID of the job - miner_hotkey: Hotkey of the miner - validator_hotkey: Hotkey of the validator - time_started: When the job started - time_took_us: How long the job took in microseconds - score_str: Score string for the job - + start_block: Start block (inclusive) + end_block: End block (exclusive) + Returns: - Created JobFinishedReceipt + List of receipts for the block range """ pass diff --git a/validator/app/src/compute_horde_validator/validator/receipts/default.py b/validator/app/src/compute_horde_validator/validator/receipts/default.py index 67ca03e57..4b8893d02 100644 --- a/validator/app/src/compute_horde_validator/validator/receipts/default.py +++ b/validator/app/src/compute_horde_validator/validator/receipts/default.py @@ -2,6 +2,7 @@ import datetime import logging import time +from dataclasses import dataclass import aiohttp from asgiref.sync import sync_to_async @@ -30,6 +31,21 @@ logger = logging.getLogger(__name__) +class _TransferIsDisabled(Exception): + pass + + +@dataclass +class _Metrics: + receipts: Counter + miners: Gauge + successful_transfers: Counter + line_errors: Counter + transfer_errors: Counter + transfer_duration: Histogram + catchup_pages_left: Gauge + + class Receipts(ReceiptsBase): """ Default implementation of receipts manager. @@ -42,218 +58,238 @@ async def run_receipts_transfer( debug_miner_ip: str | None, debug_miner_port: int | None, ) -> None: - class TransferIsDisabled(Exception): - pass - - # Metrics mirror the management command ones (declared locally to avoid duplicate registration) - m_receipts = Counter( - "receipttransfer_receipts_total", - documentation="Number of transferred receipts", - ) - m_miners = Gauge( - "receipttransfer_miners", - documentation="Number of miners to transfer from", - ) - m_successful_transfers = Counter( - "receipttransfer_successful_transfers_total", - documentation="Number of transfers that didn't explicitly fail. (this includes 404s though)", - ) - m_line_errors = Counter( - "receipttransfer_line_errors_total", - labelnames=["exc_type"], - documentation="Number of invalid lines in received pages", - ) - m_transfer_errors = Counter( - "receipttransfer_transfer_errors_total", - labelnames=["exc_type"], - documentation="Number of completely failed page transfers", - ) - m_transfer_duration = Histogram( - "receipttransfer_transfer_duration", - documentation="Total time to transfer latest page deltas from all miners", - ) - m_catchup_pages_left = Gauge( - "receipttransfer_catchup_pages_left", - documentation="Pages waiting for catch-up", + metrics = _Metrics( + receipts=Counter("receipttransfer_receipts_total", documentation="Number of transferred receipts"), + miners=Gauge("receipttransfer_miners", documentation="Number of miners to transfer from"), + successful_transfers=Counter( + "receipttransfer_successful_transfers_total", + documentation="Number of transfers that didn't explicitly fail. (this includes 404s though)", + ), + line_errors=Counter( + "receipttransfer_line_errors_total", + labelnames=["exc_type"], + documentation="Number of invalid lines in received pages", + ), + transfer_errors=Counter( + "receipttransfer_transfer_errors_total", + labelnames=["exc_type"], + documentation="Number of completely failed page transfers", + ), + transfer_duration=Histogram( + "receipttransfer_transfer_duration", + documentation="Total time to transfer latest page deltas from all miners", + ), + catchup_pages_left=Gauge( + "receipttransfer_catchup_pages_left", + documentation="Pages waiting for catch-up", + ), ) - # Select miners source identical to the command's logic + mode, explicit_miner = await self._determine_miners_mode(debug_miner_hotkey, debug_miner_ip, debug_miner_port) + cutoff = timezone.now() - datetime.timedelta(hours=5) + + if daemon: + while True: + try: + await self._run_in_loop(cutoff, mode, explicit_miner, metrics) + except _TransferIsDisabled: + logger.info("Transfer is currently disabled. Sleeping for a minute.") + await asyncio.sleep(60) + else: + await self._run_once(cutoff, mode, explicit_miner, metrics) + + async def _determine_miners_mode( + self, + debug_miner_hotkey: str | None, + debug_miner_ip: str | None, + debug_miner_port: int | None, + ) -> tuple[str, tuple[str, str, int] | None]: if (debug_miner_hotkey, debug_miner_ip, debug_miner_port) != (None, None, None): if None in {debug_miner_hotkey, debug_miner_ip, debug_miner_port}: raise ValueError("Either none or all of explicit miner details must be provided") - miner = [debug_miner_hotkey, debug_miner_ip, debug_miner_port] - logger.info(f"Will fetch receipts from explicit miner: {miner}") - - async def miners() -> list[tuple[str, str, int]]: - return [tuple(miner)] # type: ignore[return-value] - - elif settings.DEBUG_FETCH_RECEIPTS_FROM_MINERS: + miner = (debug_miner_hotkey, debug_miner_ip, int(debug_miner_port)) # type: ignore[arg-type] + logger.info(f"Will fetch receipts from explicit miner: {list(miner)}") + return "explicit", miner + if settings.DEBUG_FETCH_RECEIPTS_FROM_MINERS: debug_miners = settings.DEBUG_FETCH_RECEIPTS_FROM_MINERS logger.info(f"Will fetch receipts from {len(debug_miners)} debug miners") + return "debug_settings", None + logger.info("Will fetch receipts from metagraph snapshot miners") + return "metagraph", None + + async def _list_miners(self, mode: str, explicit_miner: tuple[str, str, int] | None) -> list[tuple[str, str, int]]: + if mode == "explicit": + assert explicit_miner is not None + return [explicit_miner] + if mode == "debug_settings": + return settings.DEBUG_FETCH_RECEIPTS_FROM_MINERS + # metagraph mode + snapshot = await MetagraphSnapshot.aget_latest() + serving_hotkeys = snapshot.serving_hotkeys + serving_miners = [m async for m in Miner.objects.filter(hotkey__in=serving_hotkeys)] + return [(m.hotkey, m.address, m.port) for m in serving_miners] + + async def _throw_if_disabled(self) -> None: + try: + if await aget_config("DYNAMIC_RECEIPT_TRANSFER_ENABLED"): + return + except KeyError: + logger.warning("DYNAMIC_RECEIPT_TRANSFER_ENABLED dynamic config is not set up!") + raise _TransferIsDisabled + + def _push_common_metrics(self, result: TransferResult, metrics: _Metrics) -> None: + from collections import defaultdict + + n_line_errors: defaultdict[type[Exception], int] = defaultdict(int) + for line_error in result.line_errors: + n_line_errors[type(line_error)] += 1 + for exc_type, exc_count in n_line_errors.items(): + metrics.line_errors.labels(exc_type=exc_type.__name__).inc(exc_count) + + n_transfer_errors: defaultdict[type[Exception], int] = defaultdict(int) + for transfer_error in result.transfer_errors: + n_transfer_errors[type(transfer_error)] += 1 + for exc_type, exc_count in n_transfer_errors.items(): + metrics.transfer_errors.labels(exc_type=exc_type.__name__).inc(exc_count) + + metrics.receipts.inc(result.n_receipts) + metrics.successful_transfers.inc(result.n_successful_transfers) + + async def _catch_up( + self, + pages: list[int], + mode: str, + explicit_miner: tuple[str, str, int] | None, + session: aiohttp.ClientSession, + semaphore: asyncio.Semaphore, + metrics: _Metrics, + ) -> None: + for idx, page in enumerate(pages): + await self._throw_if_disabled() + + metrics.catchup_pages_left.set(len(pages) - idx) + start_time = time.monotonic() + current_loop_miners = await self._list_miners(mode, explicit_miner) + result = await ReceiptsTransfer.transfer( + miners=current_loop_miners, + pages=[page], + session=session, + semaphore=semaphore, + request_timeout=3.0, + ) + elapsed = time.monotonic() - start_time - async def miners() -> list[tuple[str, str, int]]: - return debug_miners - - else: - logger.info("Will fetch receipts from metagraph snapshot miners") + logger.info( + f"Catching up: " + f"{page=} ({idx + 1}/{len(pages)}) " + f"receipts={result.n_receipts} " + f"{elapsed=:.3f} " + f"successful_transfers={result.n_successful_transfers} " + f"transfer_errors={len(result.transfer_errors)} " + f"line_errors={len(result.line_errors)} " + ) - async def miners() -> list[tuple[str, str, int]]: - snapshot = await MetagraphSnapshot.aget_latest() - serving_hotkeys = snapshot.serving_hotkeys - serving_miners = [ - m async for m in Miner.objects.filter(hotkey__in=serving_hotkeys) - ] - return [(m.hotkey, m.address, m.port) for m in serving_miners] + self._push_common_metrics(result, metrics) + metrics.catchup_pages_left.set(0) - cutoff = timezone.now() - datetime.timedelta(hours=5) + async def _keep_up( + self, + mode: str, + explicit_miner: tuple[str, str, int] | None, + session: aiohttp.ClientSession, + semaphore: asyncio.Semaphore, + metrics: _Metrics, + ) -> None: + while True: + await self._throw_if_disabled() + interval: int = await aget_config("DYNAMIC_RECEIPT_TRANSFER_INTERVAL") - async def _throw_if_disabled() -> None: - try: - if await aget_config("DYNAMIC_RECEIPT_TRANSFER_ENABLED"): - return - except KeyError: - logger.warning("DYNAMIC_RECEIPT_TRANSFER_ENABLED dynamic config is not set up!") - raise TransferIsDisabled - - def _push_common_metrics(result: TransferResult) -> None: - from collections import defaultdict - - n_line_errors: defaultdict[type[Exception], int] = defaultdict(int) - for line_error in result.line_errors: - n_line_errors[type(line_error)] += 1 - for exc_type, exc_count in n_line_errors.items(): - m_line_errors.labels(exc_type=exc_type.__name__).inc(exc_count) - - n_transfer_errors: defaultdict[type[Exception], int] = defaultdict(int) - for transfer_error in result.transfer_errors: - n_transfer_errors[type(transfer_error)] += 1 - for exc_type, exc_count in n_transfer_errors.items(): - m_transfer_errors.labels(exc_type=exc_type.__name__).inc(exc_count) - - m_receipts.inc(result.n_receipts) - m_successful_transfers.inc(result.n_successful_transfers) - - async def catch_up( - pages: list[int], - miners_fn, - session: aiohttp.ClientSession, - semaphore: asyncio.Semaphore, - ) -> None: - for idx, page in enumerate(pages): - await _throw_if_disabled() - - m_catchup_pages_left.set(len(pages) - idx) - start_time = time.monotonic() - current_loop_miners = await miners_fn() - result = await ReceiptsTransfer.transfer( - miners=current_loop_miners, - pages=[page], - session=session, - semaphore=semaphore, - request_timeout=3.0, - ) - elapsed = time.monotonic() - start_time - - logger.info( - f"Catching up: " - f"{page=} ({idx + 1}/{len(pages)}) " - f"receipts={result.n_receipts} " - f"{elapsed=:.3f} " - f"successful_transfers={result.n_successful_transfers} " - f"transfer_errors={len(result.transfer_errors)} " - f"line_errors={len(result.line_errors)} " - ) + start_time = time.monotonic() + current_page = LocalFilesystemPagedReceiptStore.current_page() + pages = list(reversed(range(current_page - N_ACTIVE_PAGES + 1, current_page + 1))) + current_loop_miners = await self._list_miners(mode, explicit_miner) + result = await ReceiptsTransfer.transfer( + miners=current_loop_miners, + pages=pages, + session=session, + semaphore=semaphore, + request_timeout=1.0, + ) + elapsed = time.monotonic() - start_time - _push_common_metrics(result) - m_catchup_pages_left.set(0) + logger.info( + f"Keeping up: " + f"{pages=} " + f"receipts={result.n_receipts} " + f"{elapsed=:.3f} " + f"successful_transfers={result.n_successful_transfers} " + f"transfer_errors={len(result.transfer_errors)} " + f"line_errors={len(result.line_errors)} " + ) - async def keep_up( - miners_fn, - session: aiohttp.ClientSession, - semaphore: asyncio.Semaphore, - ) -> None: - while True: - await _throw_if_disabled() - interval: int = await aget_config("DYNAMIC_RECEIPT_TRANSFER_INTERVAL") - - start_time = time.monotonic() - current_page = LocalFilesystemPagedReceiptStore.current_page() - pages = list(reversed(range(current_page - N_ACTIVE_PAGES + 1, current_page + 1))) - current_loop_miners = await miners_fn() - result = await ReceiptsTransfer.transfer( - miners=current_loop_miners, - pages=pages, - session=session, - semaphore=semaphore, - request_timeout=1.0, - ) - elapsed = time.monotonic() - start_time - - logger.info( - f"Keeping up: " - f"{pages=} " - f"receipts={result.n_receipts} " - f"{elapsed=:.3f} " - f"successful_transfers={result.n_successful_transfers} " - f"transfer_errors={len(result.transfer_errors)} " - f"line_errors={len(result.line_errors)} " - ) + self._push_common_metrics(result, metrics) + metrics.miners.set(len(current_loop_miners)) + metrics.transfer_duration.observe(elapsed) - _push_common_metrics(result) - m_miners.set(len(current_loop_miners)) - m_transfer_duration.observe(elapsed) + if elapsed < interval: + time.sleep(interval - elapsed) - if elapsed < interval: - time.sleep(interval - elapsed) + async def _run_once( + self, + cutoff_ts: datetime.datetime, + mode: str, + explicit_miner: tuple[str, str, int] | None, + metrics: _Metrics, + ) -> None: + catchup_cutoff_page = LocalFilesystemPagedReceiptStore.current_page_at(cutoff_ts) + current_page = LocalFilesystemPagedReceiptStore.current_page() + async with aiohttp.ClientSession() as session: + await self._catch_up( + pages=list(reversed(range(catchup_cutoff_page, current_page + 1))), + mode=mode, + explicit_miner=explicit_miner, + session=session, + semaphore=asyncio.Semaphore(50), + metrics=metrics, + ) - async def run_once(cutoff_ts: datetime.datetime) -> None: - catchup_cutoff_page = LocalFilesystemPagedReceiptStore.current_page_at(cutoff_ts) - current_page = LocalFilesystemPagedReceiptStore.current_page() - async with aiohttp.ClientSession() as session: - await catch_up( - pages=list(reversed(range(catchup_cutoff_page, current_page + 1))), - miners_fn=miners, + async def _run_in_loop( + self, + cutoff_ts: datetime.datetime, + mode: str, + explicit_miner: tuple[str, str, int] | None, + metrics: _Metrics, + ) -> None: + catchup_cutoff_page = LocalFilesystemPagedReceiptStore.current_page_at(cutoff_ts) + current_page = LocalFilesystemPagedReceiptStore.current_page() + async with aiohttp.ClientSession() as session: + await self._catch_up( + pages=list(reversed(range(current_page - N_ACTIVE_PAGES + 1, current_page + 1))), + mode=mode, + explicit_miner=explicit_miner, + session=session, + semaphore=asyncio.Semaphore(50), + metrics=metrics, + ) + await asyncio.gather( + self._catch_up( + pages=list(reversed(range(catchup_cutoff_page, current_page - N_ACTIVE_PAGES + 1))), + mode=mode, + explicit_miner=explicit_miner, session=session, - semaphore=asyncio.Semaphore(50), - ) - - async def run_in_loop(cutoff_ts: datetime.datetime) -> None: - catchup_cutoff_page = LocalFilesystemPagedReceiptStore.current_page_at(cutoff_ts) - current_page = LocalFilesystemPagedReceiptStore.current_page() - async with aiohttp.ClientSession() as session: - await catch_up( - pages=list(reversed(range(current_page - N_ACTIVE_PAGES + 1, current_page + 1))), - miners_fn=miners, + semaphore=asyncio.Semaphore(10), + metrics=metrics, + ), + self._keep_up( + mode=mode, + explicit_miner=explicit_miner, session=session, semaphore=asyncio.Semaphore(50), - ) - await asyncio.gather( - catch_up( - pages=list( - reversed(range(catchup_cutoff_page, current_page - N_ACTIVE_PAGES + 1)) - ), - miners_fn=miners, - session=session, - semaphore=asyncio.Semaphore(10), - ), - keep_up( - miners_fn=miners, - session=session, - semaphore=asyncio.Semaphore(50), - ), - ) - - if daemon: - while True: - try: - await run_in_loop(cutoff) - except TransferIsDisabled: - logger.info("Transfer is currently disabled. Sleeping for a minute.") - await asyncio.sleep(60) - else: - await run_once(cutoff) + metrics=metrics, + ), + ) - async def transfer_receipts_from_miners( + async def _transfer_receipts_from_miners( self, miner_hotkeys: list[str], pages: list[int], @@ -280,7 +316,7 @@ async def transfer_receipts_from_miners( request_timeout=request_timeout, ) - async def run_full_transfer_cycle( + async def _run_full_transfer_cycle( self, miner_hotkeys: list[str], cutoff_hours: int = 5, @@ -394,12 +430,15 @@ def create_job_started_receipt( except Exception as e: raise ReceiptsGenerationError(f"Failed to create job started receipt: {e}") from e - def get_valid_job_started_receipts_for_miner( + async def get_valid_job_started_receipts_for_miner( self, miner_hotkey: str, at_time: datetime.datetime ) -> list[JobStartedReceipt]: try: - qs = JobStartedReceipt.objects.valid_at(at_time).filter(miner_hotkey=miner_hotkey) - receipts: list[JobStartedReceipt] = [r for r in qs.all()] + def _query() -> list[JobStartedReceipt]: + qs = JobStartedReceipt.objects.valid_at(at_time).filter(miner_hotkey=miner_hotkey) + return list(qs.all()) + + receipts: list[JobStartedReceipt] = await sync_to_async(_query, thread_sensitive=True)() logger.debug( "Retrieved %s valid job started receipts for miner %s at %s", @@ -414,16 +453,20 @@ def get_valid_job_started_receipts_for_miner( logger.error("Failed to get valid job started receipts for miner: %s", e) return [] - def get_job_finished_receipts_for_miner( + async def get_job_finished_receipts_for_miner( self, miner_hotkey: str, job_uuids: list[str] ) -> list[JobFinishedReceipt]: try: if not job_uuids: return [] - qs = JobFinishedReceipt.objects.filter( - miner_hotkey=miner_hotkey, job_uuid__in=job_uuids - ) - receipts: list[JobFinishedReceipt] = [r for r in qs.all()] + + def _query() -> list[JobFinishedReceipt]: + qs = JobFinishedReceipt.objects.filter( + miner_hotkey=miner_hotkey, job_uuid__in=job_uuids + ) + return list(qs.all()) + + receipts: list[JobFinishedReceipt] = await sync_to_async(_query, thread_sensitive=True)() logger.debug( "Retrieved %s job finished receipts for miner %s (jobs: %s)", @@ -438,9 +481,11 @@ def get_job_finished_receipts_for_miner( logger.error("Failed to get job finished receipts for miner: %s", e) return [] - def get_job_started_receipt_by_uuid(self, job_uuid: str) -> JobStartedReceipt | None: + async def get_job_started_receipt_by_uuid(self, job_uuid: str) -> JobStartedReceipt | None: try: - django_receipt = JobStartedReceipt.objects.get(job_uuid=job_uuid) + django_receipt = await sync_to_async(JobStartedReceipt.objects.get, thread_sensitive=True)( + job_uuid=job_uuid + ) logger.debug( "Retrieved JobStartedReceipt for job %s (miner: %s, validator: %s)", job_uuid, @@ -505,16 +550,18 @@ def _query() -> list[tuple[str, str, int]]: return await sync_to_async(_query, thread_sensitive=True)() async def _fetch_receipts_for_range( - self, start_ts: datetime.datetime, end_ts: datetime.datetime, hotkeys: list[str] + self, start_block: int, end_block: int ) -> list[Receipt]: - """Fetch JobFinished receipts in [start_ts, end_ts) for given miner hotkeys and convert to Receipt objects.""" + """Fetch JobFinished receipts for blocks in [start_block, end_block).""" + + start_ts = await self._get_block_timestamp(start_block) + end_ts = await self._get_block_timestamp(end_block) receipts_qs = JobFinishedReceipt.objects.filter( timestamp__gte=start_ts, timestamp__lt=end_ts, - miner_hotkey__in=hotkeys, ) - receipts = [] + receipts: list[Receipt] = [] async for receipt_data in receipts_qs: receipts.append(receipt_data.to_receipt()) return receipts diff --git a/validator/app/src/compute_horde_validator/validator/receipts/tests/test_receipts.py b/validator/app/src/compute_horde_validator/validator/receipts/tests/test_receipts.py index 8ce1ec8b6..08e54cc1a 100644 --- a/validator/app/src/compute_horde_validator/validator/receipts/tests/test_receipts.py +++ b/validator/app/src/compute_horde_validator/validator/receipts/tests/test_receipts.py @@ -1,446 +1,427 @@ -import datetime as dt +import datetime import uuid -from unittest.mock import AsyncMock, Mock, patch -import bittensor_wallet import pytest -from aiohttp import web from asgiref.sync import sync_to_async -from compute_horde.receipts import Receipt -from compute_horde.receipts.models import JobFinishedReceipt, JobStartedReceipt -from compute_horde.receipts.schemas import JobFinishedReceiptPayload -from django.utils.timezone import make_aware +import bittensor_wallet +from aiohttp import web +from compute_horde.receipts.models import JobAcceptedReceipt, JobFinishedReceipt, JobStartedReceipt +from django.utils import timezone from compute_horde_validator.validator.models import Miner from compute_horde_validator.validator.models.allowance.internal import Block from compute_horde_validator.validator.receipts import Receipts +from compute_horde.receipts.schemas import ( + JobAcceptedReceiptPayload, + JobFinishedReceiptPayload, + JobStartedReceiptPayload, + Receipt, +) +from compute_horde.utils import sign_blob -@pytest.mark.django_db(transaction=True) @pytest.mark.asyncio -async def test_scrape_receipts_from_miners_integration(): - with patch("compute_horde.receipts.transfer.checkpoint_backend") as mock_checkpoint: - mock_backend = Mock() - mock_backend.get = AsyncMock(return_value=0) - mock_backend.set = AsyncMock() - mock_checkpoint.return_value = mock_backend - - miner_keypair1 = bittensor_wallet.Keypair.create_from_mnemonic( - "almost fatigue race slim picnic mass better clog deal solve already champion" - ) - miner_keypair2 = bittensor_wallet.Keypair.create_from_mnemonic( - "edit evoke caught tunnel harsh plug august group enact cable govern immense" - ) - validator_keypair = bittensor_wallet.Keypair.create_from_mnemonic( - "slot excuse valid grief praise rifle spoil auction weasel glove pen share" - ) +@pytest.mark.django_db +async def test_transfer_receipts_from_miners_happy_path(settings): - await sync_to_async(Miner.objects.create)( - hotkey=miner_keypair1.ss58_address, address="127.0.0.1", port=7001 - ) - await sync_to_async(Miner.objects.create)( - hotkey=miner_keypair2.ss58_address, address="127.0.0.1", port=7002 - ) + settings.RECEIPT_TRANSFER_CHECKPOINT_CACHE = "default" - # Use timestamps that will result in page numbers - # The page calculation is: int(timestamp // (60 * 5)) where 60*5 = 300 seconds = 5 minutes - # So we'll use timestamps that result in page numbers like 1, 2, 3 - t0 = make_aware(dt.datetime(2025, 1, 1, 0, 0, 0)) # timestamp 1735689600, page 5785632 - t1 = make_aware(dt.datetime(2025, 1, 1, 0, 5, 0)) # timestamp 1735689900, page 5785633 - - # Let's use much smaller timestamps to get reasonable page numbers - # Use a base timestamp that gives us small page numbers - base_timestamp = 1000 # This will give us page 3 - t0 = make_aware(dt.datetime.fromtimestamp(base_timestamp)) - t1 = make_aware(dt.datetime.fromtimestamp(base_timestamp + 300)) # +5 minutes, page 4 - - await sync_to_async(Block.objects.create)(block_number=1000, creation_timestamp=t0) - await sync_to_async(Block.objects.create)(block_number=2000, creation_timestamp=t1) - - test_receipts = [ - JobFinishedReceiptPayload( - job_uuid="00000000-0000-0000-0000-000000000001", - miner_hotkey=miner_keypair1.ss58_address, - validator_hotkey=validator_keypair.ss58_address, - timestamp=t0 + dt.timedelta(minutes=10), - time_started=t0 + dt.timedelta(minutes=5), - time_took_us=1_000_000, - score_str="0.5", - ), - JobFinishedReceiptPayload( - job_uuid="00000000-0000-0000-0000-000000000002", - miner_hotkey=miner_keypair2.ss58_address, - validator_hotkey=validator_keypair.ss58_address, - timestamp=t0 + dt.timedelta(minutes=15), - time_started=t0 + dt.timedelta(minutes=10), - time_took_us=2_000_000, - score_str="0.8", - ), - ] - - async def mock_receipts_handler(request): - # Extract page number from URL like /receipts/3.jsonl - path = request.path - if not path.startswith("/receipts/") or not path.endswith(".jsonl"): - return web.Response(status=404, text="Endpoint not found") - - try: - page = int(path[10:-6]) - except ValueError: - return web.Response(status=400, text="Invalid page number") - - if page not in [3, 4]: - return web.Response(status=404, text="Page not found") - - receipt_lines = [] - for receipt in test_receipts: - blob = receipt.blob_for_signing() - if receipt.miner_hotkey == miner_keypair1.ss58_address: - miner_signature = f"0x{miner_keypair1.sign(blob).hex()}" - else: - miner_signature = f"0x{miner_keypair2.sign(blob).hex()}" - validator_signature = f"0x{validator_keypair.sign(blob).hex()}" - - mock_receipt = Receipt( - payload=receipt, - validator_signature=validator_signature, - miner_signature=miner_signature, - ) - receipt_lines.append(mock_receipt.model_dump_json()) - - response_text = "\n".join(receipt_lines) - return web.Response(text=response_text, content_type="application/json") - - app = web.Application() - app.router.add_get("/receipts/{page}.jsonl", mock_receipts_handler) - - runner = web.AppRunner(app) - await runner.setup() - site = web.TCPSite(runner, "127.0.0.1", port=7001) - await site.start() - - try: - await Receipts().scrape_receipts_from_miners( - miner_hotkeys=[miner_keypair1.ss58_address, miner_keypair2.ss58_address], - start_block=1000, - end_block=2000, - ) - stored_receipts_qs = await sync_to_async(JobFinishedReceipt.objects.filter)( - miner_hotkey__in=[miner_keypair1.ss58_address, miner_keypair2.ss58_address] - ) - - def convert_to_list(qs): - return list(qs) - - stored_receipts: list[JobFinishedReceipt] = await sync_to_async(convert_to_list)( - stored_receipts_qs - ) - - assert len(stored_receipts) == 2 - assert str(stored_receipts[0].job_uuid) == "00000000-0000-0000-0000-000000000001" - assert str(stored_receipts[1].job_uuid) == "00000000-0000-0000-0000-000000000002" - - finally: - await runner.cleanup() - - -@pytest.mark.django_db(transaction=True) -@pytest.mark.asyncio -async def test_scrape_receipts_network_failure_handling(): - await sync_to_async(Miner.objects.create)(hotkey="hk1", address="127.0.0.1", port=7004) - t0 = make_aware(dt.datetime(2025, 1, 1, 0, 0, 0)) - t1 = make_aware(dt.datetime(2025, 1, 1, 1, 0, 0)) - await sync_to_async(Block.objects.create)(block_number=1000, creation_timestamp=t0) - await sync_to_async(Block.objects.create)(block_number=2000, creation_timestamp=t1) + miner_kp = bittensor_wallet.Keypair.create_from_mnemonic( + "almost fatigue race slim picnic mass better clog deal solve already champion" + ) + miner_hotkey = miner_kp.ss58_address + validator_kp = settings.BITTENSOR_WALLET().get_hotkey() + + started_payload = JobStartedReceiptPayload( + job_uuid=str(uuid.uuid4()), + miner_hotkey=miner_hotkey, + validator_hotkey=validator_kp.ss58_address, + timestamp=datetime.datetime.now(datetime.UTC), + executor_class="always_on.gpu-24gb", + is_organic=True, + ttl=300, + ) + started_blob = started_payload.blob_for_signing() + started_receipt = Receipt( + payload=started_payload, + validator_signature=sign_blob(validator_kp, started_blob), + miner_signature=sign_blob(miner_kp, started_blob), + ) + + accepted_payload = JobAcceptedReceiptPayload( + job_uuid=str(uuid.uuid4()), + miner_hotkey=miner_hotkey, + validator_hotkey=validator_kp.ss58_address, + timestamp=datetime.datetime.now(datetime.UTC), + time_accepted=datetime.datetime.now(datetime.UTC), + ttl=123, + ) + accepted_blob = accepted_payload.blob_for_signing() + accepted_receipt = Receipt( + payload=accepted_payload, + validator_signature=sign_blob(validator_kp, accepted_blob), + miner_signature=sign_blob(miner_kp, accepted_blob), + ) + + finished_payload = JobFinishedReceiptPayload( + job_uuid=str(uuid.uuid4()), + miner_hotkey=miner_hotkey, + validator_hotkey=validator_kp.ss58_address, + timestamp=datetime.datetime.now(datetime.UTC), + time_started=datetime.datetime.now(datetime.UTC) - datetime.timedelta(seconds=5), + time_took_us=42, + score_str="0.5", + ) + finished_blob = finished_payload.blob_for_signing() + finished_receipt = Receipt( + payload=finished_payload, + validator_signature=sign_blob(validator_kp, finished_blob), + miner_signature=sign_blob(miner_kp, finished_blob), + ) - async def mock_failing_handler(request): - """Mock handler that always raises an exception.""" - raise web.HTTPInternalServerError(text="Server error") + jsonl_body = ( + started_receipt.model_dump_json() + + "\n" + + accepted_receipt.model_dump_json() + + "\n" + + finished_receipt.model_dump_json() + + "\n" + ) app = web.Application() - app.router.add_get("/receipts", mock_failing_handler) + state = {"body": jsonl_body.encode("utf-8")} + + async def handler(request: web.Request): + rng = request.headers.get("Range") + if rng: + return web.Response(status=416) + return web.Response(status=200, body=state["body"], content_type="application/jsonl") + app.router.add_get("/receipts/{page}.jsonl", handler) runner = web.AppRunner(app) await runner.setup() - site = web.TCPSite(runner, "127.0.0.1", 7004) + site = web.TCPSite(runner, "127.0.0.1", 0) await site.start() + server = getattr(site, "_server", None) + assert server is not None and server.sockets, "Server failed to start" + port = server.sockets[0].getsockname()[1] try: - result = await Receipts().scrape_receipts_from_miners( - miner_hotkeys=["hk1"], start_block=1000, end_block=2000 + await sync_to_async(Miner.objects.create, thread_sensitive=True)( + hotkey=miner_hotkey, address="127.0.0.1", port=port + ) + + receipts_mgr = Receipts() + page = 123456 + result = await receipts_mgr._transfer_receipts_from_miners( + miner_hotkeys=[miner_hotkey], pages=[page], semaphore_limit=2, request_timeout=2.0 ) - assert result == [] + assert result.n_receipts == 3 + assert result.n_successful_transfers == 1 + assert result.transfer_errors == [] + assert result.line_errors == [] + + stored_started = await sync_to_async( + lambda: JobStartedReceipt.objects.get(job_uuid=started_payload.job_uuid), + thread_sensitive=True, + )() + assert str(stored_started.job_uuid) == started_payload.job_uuid + assert stored_started.miner_hotkey == started_payload.miner_hotkey + assert stored_started.executor_class == "always_on.gpu-24gb" + assert stored_started.is_organic is True + assert stored_started.ttl == 300 + assert ( + isinstance(stored_started.validator_signature, str) + and stored_started.validator_signature + ) + assert isinstance(stored_started.miner_signature, str) and stored_started.miner_signature + + stored_accepted = await sync_to_async( + lambda: JobAcceptedReceipt.objects.get(job_uuid=accepted_payload.job_uuid), + thread_sensitive=True, + )() + assert str(stored_accepted.job_uuid) == accepted_payload.job_uuid + assert stored_accepted.miner_hotkey == accepted_payload.miner_hotkey + assert stored_accepted.ttl == 123 + assert ( + isinstance(stored_accepted.validator_signature, str) + and stored_accepted.validator_signature + ) + assert isinstance(stored_accepted.miner_signature, str) and stored_accepted.miner_signature + + stored_finished = await sync_to_async( + lambda: JobFinishedReceipt.objects.get(job_uuid=finished_payload.job_uuid), + thread_sensitive=True, + )() + assert str(stored_finished.job_uuid) == finished_payload.job_uuid + assert stored_finished.miner_hotkey == finished_payload.miner_hotkey + assert stored_finished.time_took_us == 42 + assert stored_finished.score_str == "0.5" + assert ( + isinstance(stored_finished.validator_signature, str) + and stored_finished.validator_signature + ) + assert isinstance(stored_finished.miner_signature, str) and stored_finished.miner_signature finally: await runner.cleanup() -@pytest.mark.django_db(transaction=True) -@pytest.mark.asyncio -async def test_scrape_receipts_invalid_block_range(): - result = await Receipts().scrape_receipts_from_miners( - miner_hotkeys=["hk1"], start_block=1000, end_block=1000 - ) - assert result == [] +@pytest.mark.django_db +def test_create_job_started_receipt_returns_payload_and_signature(settings): + receipts = Receipts() - result = await Receipts().scrape_receipts_from_miners( - miner_hotkeys=["hk1"], start_block=2000, end_block=1000 + job_uuid = str(uuid.uuid4()) + miner_hotkey = "miner_hotkey_1" + validator_hotkey = settings.BITTENSOR_WALLET().get_hotkey().ss58_address + executor_class = "always_on.gpu-24gb" + is_organic = True + ttl = 300 + + payload, signature = receipts.create_job_started_receipt( + job_uuid=job_uuid, + miner_hotkey=miner_hotkey, + validator_hotkey=validator_hotkey, + executor_class=executor_class, + is_organic=is_organic, + ttl=ttl, ) - assert result == [] + assert isinstance(signature, str) and len(signature) > 0 + assert payload.job_uuid == job_uuid + assert payload.miner_hotkey == miner_hotkey + assert payload.validator_hotkey == validator_hotkey + assert payload.executor_class == executor_class + assert payload.is_organic is is_organic + assert payload.ttl == ttl + assert payload.timestamp.tzinfo is datetime.UTC -@pytest.mark.django_db(transaction=True) -@pytest.mark.asyncio -async def test_scrape_receipts_no_miners(): - result = await Receipts().scrape_receipts_from_miners( - miner_hotkeys=[], start_block=1000, end_block=2000 - ) - assert result == [] +@pytest.mark.django_db +def test_create_job_finished_receipt_returns_expected_values(settings): + receipts = Receipts() -@pytest.mark.django_db(transaction=True) -@pytest.mark.asyncio -async def test_scrape_receipts_invalid_miner_endpoints(): - await sync_to_async(Miner.objects.create)(hotkey="hk1", address="127.0.0.1", port=7005) - await sync_to_async(Miner.objects.create)(hotkey="hk2", address="127.0.0.1", port=7006) - await sync_to_async(Miner.objects.create)(hotkey="hk3", address="127.0.0.1", port=7007) - - t0 = make_aware(dt.datetime(2025, 1, 1, 0, 0, 0)) - t1 = make_aware(dt.datetime(2025, 1, 1, 1, 0, 0)) - await sync_to_async(Block.objects.create)(block_number=1000, creation_timestamp=t0) - await sync_to_async(Block.objects.create)(block_number=2000, creation_timestamp=t1) - - result = await Receipts().scrape_receipts_from_miners( - miner_hotkeys=["hk1", "hk2", "hk3"], - start_block=1000, - end_block=2000, + job_uuid = str(uuid.uuid4()) + miner_hotkey = "miner_hotkey_2" + validator_hotkey = settings.BITTENSOR_WALLET().get_hotkey().ss58_address + time_started = datetime.datetime.now(datetime.UTC) - datetime.timedelta(seconds=5) + time_took_us = 1_234_567 + score_str = "0.987" + + finished = receipts.create_job_finished_receipt( + job_uuid=job_uuid, + miner_hotkey=miner_hotkey, + validator_hotkey=validator_hotkey, + time_started=time_started, + time_took_us=time_took_us, + score_str=score_str, + ) + + assert finished.job_uuid == job_uuid + assert finished.miner_hotkey == miner_hotkey + assert finished.validator_hotkey == validator_hotkey + assert finished.time_started == time_started + assert finished.time_took_us == time_took_us + assert finished.score_str == score_str + assert isinstance(finished.validator_signature, str) and len(finished.validator_signature) > 0 + assert ( + isinstance(finished.timestamp, datetime.datetime) + and finished.timestamp.tzinfo is datetime.UTC ) - assert result == [] +@pytest.mark.asyncio +@pytest.mark.django_db +async def test_get_valid_job_started_receipts_for_miner_filters_correctly(settings): + miner_hotkey = "miner_hotkey_valid" + other_miner = "miner_hotkey_other" + validator_hotkey = settings.BITTENSOR_WALLET().get_hotkey().ss58_address -@pytest.mark.django_db(transaction=True) -def test_get_valid_job_started_receipts_for_miner(): - miner_hotkey = "test_miner_hotkey" - validator_hotkey = "test_validator_hotkey" + base_ts = datetime.datetime.now(datetime.UTC) - valid_receipt = JobStartedReceipt.objects.create( + await sync_to_async(JobStartedReceipt.objects.create, thread_sensitive=True)( job_uuid=str(uuid.uuid4()), miner_hotkey=miner_hotkey, validator_hotkey=validator_hotkey, - validator_signature="0xv", - miner_signature="0xm", - timestamp=make_aware(dt.datetime.now()), - executor_class="spin_up-4min.gpu-24gb", - is_organic=False, - ttl=300, + validator_signature="sig", + timestamp=base_ts - datetime.timedelta(seconds=10), + executor_class="always_on.gpu-24gb", + is_organic=True, + ttl=60, ) - JobStartedReceipt.objects.create( + await sync_to_async(JobStartedReceipt.objects.create, thread_sensitive=True)( job_uuid=str(uuid.uuid4()), miner_hotkey=miner_hotkey, validator_hotkey=validator_hotkey, - validator_signature="0xv", - miner_signature="0xm", - timestamp=make_aware(dt.datetime.now() - dt.timedelta(minutes=10)), - executor_class="spin_up-4min.gpu-24gb", + validator_signature="sig", + timestamp=base_ts - datetime.timedelta(minutes=10), + executor_class="always_on.gpu-24gb", is_organic=False, - ttl=300, + ttl=30, ) - JobStartedReceipt.objects.create( + await sync_to_async(JobStartedReceipt.objects.create, thread_sensitive=True)( job_uuid=str(uuid.uuid4()), - miner_hotkey="other_miner", + miner_hotkey=other_miner, validator_hotkey=validator_hotkey, - validator_signature="0xv", - miner_signature="0xm", - timestamp=make_aware(dt.datetime.now()), - executor_class="spin_up-4min.gpu-24gb", - is_organic=False, - ttl=300, + validator_signature="sig", + timestamp=base_ts - datetime.timedelta(seconds=5), + executor_class="always_on.gpu-24gb", + is_organic=True, + ttl=60, ) - result = Receipts().get_valid_job_started_receipts_for_miner( - miner_hotkey, make_aware(dt.datetime.now()) + receipts = Receipts() + results = await receipts.get_valid_job_started_receipts_for_miner( + miner_hotkey=miner_hotkey, at_time=base_ts ) - assert len(result) == 1 - assert result[0].miner_hotkey == miner_hotkey - assert str(result[0].job_uuid) == str(valid_receipt.job_uuid) + assert len(results) == 1 + r = results[0] + assert r.miner_hotkey == miner_hotkey + assert r.validator_hotkey == validator_hotkey + assert r.executor_class == "always_on.gpu-24gb" + assert r.is_organic is True + assert r.ttl == 60 -@pytest.mark.django_db(transaction=True) -def test_get_job_finished_receipts_for_miner(): - job_uuid1 = str(uuid.uuid4()) - job_uuid2 = str(uuid.uuid4()) - job_uuid3 = str(uuid.uuid4()) - miner_hotkey = "test_miner_hotkey" - validator_hotkey = "test_validator_hotkey" +@pytest.mark.asyncio +@pytest.mark.django_db +async def test_get_job_finished_receipts_for_miner_filters_by_uuid(settings): + miner_hotkey = "miner_hotkey_finished" + validator_hotkey = settings.BITTENSOR_WALLET().get_hotkey().ss58_address + common_ts = timezone.now() - JobFinishedReceipt.objects.create( - job_uuid=job_uuid1, - miner_hotkey=miner_hotkey, - validator_hotkey=validator_hotkey, - validator_signature="0xv", - miner_signature="0xm", - timestamp=make_aware(dt.datetime.now()), - time_started=make_aware(dt.datetime.now() - dt.timedelta(minutes=5)), - time_took_us=5_000_000, - score_str="0.8", - ) + wanted_uuid = str(uuid.uuid4()) + other_uuid = str(uuid.uuid4()) - JobFinishedReceipt.objects.create( - job_uuid=job_uuid2, + await sync_to_async(JobFinishedReceipt.objects.create, thread_sensitive=True)( + job_uuid=wanted_uuid, miner_hotkey=miner_hotkey, validator_hotkey=validator_hotkey, - validator_signature="0xv", - miner_signature="0xm", - timestamp=make_aware(dt.datetime.now()), - time_started=make_aware(dt.datetime.now() - dt.timedelta(minutes=3)), - time_took_us=3_000_000, - score_str="0.9", + validator_signature="sig", + timestamp=common_ts, + time_started=common_ts - datetime.timedelta(seconds=2), + time_took_us=42, + score_str="0.5", ) - JobFinishedReceipt.objects.create( - job_uuid=job_uuid3, + await sync_to_async(JobFinishedReceipt.objects.create, thread_sensitive=True)( + job_uuid=other_uuid, miner_hotkey=miner_hotkey, validator_hotkey=validator_hotkey, - validator_signature="0xv", - miner_signature="0xm", - timestamp=make_aware(dt.datetime.now()), - time_started=make_aware(dt.datetime.now() - dt.timedelta(minutes=2)), - time_took_us=2_000_000, - score_str="0.7", + validator_signature="sig", + timestamp=common_ts, + time_started=common_ts - datetime.timedelta(seconds=3), + time_took_us=43, + score_str="0.6", ) - requested_jobs = [job_uuid1, job_uuid2] - result = Receipts().get_job_finished_receipts_for_miner(miner_hotkey, requested_jobs) - - assert len(result) == 2 - job_uuids = {str(r.job_uuid) for r in result} - assert job_uuids == {job_uuid1, job_uuid2} + receipts = Receipts() + results = await receipts.get_job_finished_receipts_for_miner(miner_hotkey, [wanted_uuid]) + assert len(results) == 1 + r = results[0] + assert str(r.job_uuid) == wanted_uuid + assert r.miner_hotkey == miner_hotkey + assert r.validator_hotkey == validator_hotkey + assert r.time_took_us == 42 + assert r.score_str == "0.5" -@pytest.mark.django_db(transaction=True) -def test_get_job_started_receipt_by_uuid(): - job_uuid = str(uuid.uuid4()) - miner_hotkey = "test_miner_hotkey" - validator_hotkey = "test_validator_hotkey" - JobStartedReceipt.objects.create( - job_uuid=job_uuid, - miner_hotkey=miner_hotkey, - validator_hotkey=validator_hotkey, - validator_signature="0xv", - miner_signature="0xm", # Add miner signature - timestamp=make_aware(dt.datetime.now()), - executor_class="spin_up-4min.gpu-24gb", - is_organic=False, - ttl=300, +@pytest.mark.asyncio +@pytest.mark.django_db +async def test_get_job_started_receipt_by_uuid_returns_instance_or_none(settings): + receipts = Receipts() + job_uuid_present = str(uuid.uuid4()) + job_uuid_missing = str(uuid.uuid4()) + + await sync_to_async(JobStartedReceipt.objects.create, thread_sensitive=True)( + job_uuid=job_uuid_present, + miner_hotkey="miner_xyz", + validator_hotkey=settings.BITTENSOR_WALLET().get_hotkey().ss58_address, + validator_signature="sig", + timestamp=timezone.now(), + executor_class="always_on.gpu-24gb", + is_organic=True, + ttl=60, ) - result = Receipts().get_job_started_receipt_by_uuid(job_uuid) + found = await receipts.get_job_started_receipt_by_uuid(job_uuid_present) + missing = await receipts.get_job_started_receipt_by_uuid(job_uuid_missing) - assert result is not None - assert str(result.job_uuid) == job_uuid - assert result.miner_hotkey == miner_hotkey - assert result.validator_hotkey == validator_hotkey + assert found is not None + assert str(found.job_uuid) == job_uuid_present + assert missing is None - non_existent_uuid = str(uuid.uuid4()) - result = Receipts().get_job_started_receipt_by_uuid(non_existent_uuid) - assert result is None - -@pytest.mark.django_db(transaction=True) @pytest.mark.asyncio -async def test_get_completed_job_receipts_for_block_range(): - t0 = make_aware(dt.datetime(2025, 1, 1, 0, 0, 0)) - t1 = make_aware(dt.datetime(2025, 1, 1, 1, 0, 0)) - t2 = make_aware(dt.datetime(2025, 1, 1, 2, 0, 0)) - t3 = make_aware(dt.datetime(2025, 1, 1, 3, 0, 0)) - - await sync_to_async(Block.objects.create)(block_number=1000, creation_timestamp=t0) - await sync_to_async(Block.objects.create)(block_number=1500, creation_timestamp=t1) - await sync_to_async(Block.objects.create)(block_number=2000, creation_timestamp=t2) - await sync_to_async(Block.objects.create)(block_number=3000, creation_timestamp=t3) - - receipt1 = await sync_to_async(JobFinishedReceipt.objects.create)( - job_uuid=str(uuid.uuid4()), - miner_hotkey="miner1", - validator_hotkey="validator1", - validator_signature="0xv1", - miner_signature="0xm1", - timestamp=t0 + dt.timedelta(minutes=30), - time_started=t0 + dt.timedelta(minutes=25), - time_took_us=5_000_000, - score_str="0.8", +@pytest.mark.django_db +async def test_get_completed_job_receipts_for_block_range_returns_only_in_range(settings): + receipts = Receipts() + + # Setup block timestamps + start_block = 100 + end_block = 105 + start_ts = datetime.datetime.now(datetime.UTC) + end_ts = start_ts + datetime.timedelta(minutes=10) + + await sync_to_async(Block.objects.create, thread_sensitive=True)( + block_number=start_block, creation_timestamp=start_ts + ) + await sync_to_async(Block.objects.create, thread_sensitive=True)( + block_number=end_block, creation_timestamp=end_ts ) - receipt2 = await sync_to_async(JobFinishedReceipt.objects.create)( - job_uuid=str(uuid.uuid4()), - miner_hotkey="miner2", - validator_hotkey="validator2", - validator_signature="0xv2", - miner_signature="0xm2", - timestamp=t2 + dt.timedelta(minutes=30), - time_started=t2 + dt.timedelta(minutes=25), - time_took_us=3_000_000, - score_str="0.9", + miner_hotkey = "miner_hotkey_blockrange" + validator_hotkey = settings.BITTENSOR_WALLET().get_hotkey().ss58_address + + in_uuid = str(uuid.uuid4()) + await sync_to_async(JobFinishedReceipt.objects.create, thread_sensitive=True)( + job_uuid=in_uuid, + miner_hotkey=miner_hotkey, + validator_hotkey=validator_hotkey, + validator_signature="v_sig", + miner_signature="m_sig", + timestamp=start_ts + datetime.timedelta(minutes=5), + time_started=start_ts + datetime.timedelta(minutes=4), + time_took_us=1, + score_str="1.0", ) - await sync_to_async(JobFinishedReceipt.objects.create)( + await sync_to_async(JobFinishedReceipt.objects.create, thread_sensitive=True)( job_uuid=str(uuid.uuid4()), - miner_hotkey="miner3", - validator_hotkey="validator3", - validator_signature="0xv3", - miner_signature="0xm3", - timestamp=t0 - dt.timedelta(minutes=1), - time_started=t0 - dt.timedelta(minutes=2), - time_took_us=2_000_000, - score_str="0.7", + miner_hotkey=miner_hotkey, + validator_hotkey=validator_hotkey, + validator_signature="v_sig", + miner_signature="m_sig", + timestamp=start_ts - datetime.timedelta(seconds=1), + time_started=start_ts - datetime.timedelta(seconds=2), + time_took_us=2, + score_str="0.1", ) - result = await Receipts().get_completed_job_receipts_for_block_range(1000, 1500) - - assert len(result) == 1 - assert str(result[0].payload.job_uuid) == str(receipt1.job_uuid) - - result = await Receipts().get_completed_job_receipts_for_block_range(2000, 3000) - - assert len(result) == 1 - assert str(result[0].payload.job_uuid) == str(receipt2.job_uuid) - - -@pytest.mark.django_db(transaction=True) -def test_create_job_finished_receipt_success(): - job_uuid = str(uuid.uuid4()) - miner_hotkey = "test_miner_hotkey" - validator_hotkey = "test_validator_hotkey" - time_started = dt.datetime.now(dt.UTC) - time_took_us = 5000000 - score_str = "0.85" - - receipt = Receipts().create_job_finished_receipt( - job_uuid=job_uuid, + await sync_to_async(JobFinishedReceipt.objects.create, thread_sensitive=True)( + job_uuid=str(uuid.uuid4()), miner_hotkey=miner_hotkey, validator_hotkey=validator_hotkey, - time_started=time_started, - time_took_us=time_took_us, - score_str=score_str, + validator_signature="v_sig", + miner_signature="m_sig", + timestamp=end_ts, + time_started=end_ts - datetime.timedelta(seconds=2), + time_took_us=3, + score_str="0.2", ) - assert receipt is not None - assert isinstance(receipt, JobFinishedReceipt) - - assert receipt.job_uuid == job_uuid - assert receipt.miner_hotkey == miner_hotkey - assert receipt.validator_hotkey == validator_hotkey - assert receipt.time_started == time_started - assert receipt.time_took_us == time_took_us - assert receipt.score_str == score_str - assert receipt.validator_signature is not None - assert len(receipt.validator_signature) > 0 + receipts_list = await receipts.get_completed_job_receipts_for_block_range( + start_block, end_block + ) - assert receipt.time_took() == dt.timedelta(microseconds=time_took_us) - assert receipt.score() == float(score_str) + assert len(receipts_list) == 1 + converted = receipts_list[0] + assert converted.payload.job_uuid == in_uuid + assert converted.payload.miner_hotkey == miner_hotkey + assert converted.payload.validator_hotkey == validator_hotkey + assert converted.payload.timestamp == start_ts + datetime.timedelta(minutes=5) diff --git a/validator/app/src/compute_horde_validator/validator/routing/default.py b/validator/app/src/compute_horde_validator/validator/routing/default.py index efdad1f2f..6def7f256 100644 --- a/validator/app/src/compute_horde_validator/validator/routing/default.py +++ b/validator/app/src/compute_horde_validator/validator/routing/default.py @@ -219,21 +219,15 @@ async def _pick_miner_for_job_v2(request: V2JobRequest) -> JobRoute: .values_list("job_uuid", flat=True) } - known_started_jobs: set[str] = { - str(job_uuid) - for receipt in await sync_to_async(Receipts().get_valid_job_started_receipts_for_miner)( - miner.hotkey, timezone.now() - ) - for job_uuid in [receipt.job_uuid] - } + started_receipts = await Receipts().get_valid_job_started_receipts_for_miner( + miner.hotkey, timezone.now() + ) + known_started_jobs: set[str] = {str(receipt.job_uuid) for receipt in started_receipts} - known_finished_jobs: set[str] = { - str(job_uuid) - for receipt in await sync_to_async(Receipts().get_job_finished_receipts_for_miner)( - miner.hotkey, list(known_started_jobs | preliminary_reservation_jobs) - ) - for job_uuid in [receipt.job_uuid] - } + finished_receipts = await Receipts().get_job_finished_receipts_for_miner( + miner.hotkey, list(known_started_jobs | preliminary_reservation_jobs) + ) + known_finished_jobs: set[str] = {str(receipt.job_uuid) for receipt in finished_receipts} maybe_ongoing_jobs = ( preliminary_reservation_jobs | known_started_jobs From 40757e56128ef9f8db08dae4475f9f5b98f625ba Mon Sep 17 00:00:00 2001 From: Piotr Figwer Date: Tue, 19 Aug 2025 09:42:07 +0200 Subject: [PATCH 05/13] Fix formatting --- .../validator/receipts/README.md | 98 ------------------- .../validator/receipts/default.py | 53 +++++++--- .../validator/receipts/tests/test_receipts.py | 15 ++- .../validator/routing/default.py | 1 - 4 files changed, 45 insertions(+), 122 deletions(-) diff --git a/validator/app/src/compute_horde_validator/validator/receipts/README.md b/validator/app/src/compute_horde_validator/validator/receipts/README.md index 459e42189..e69de29bb 100644 --- a/validator/app/src/compute_horde_validator/validator/receipts/README.md +++ b/validator/app/src/compute_horde_validator/validator/receipts/README.md @@ -1,98 +0,0 @@ -# Receipts Module - -This module provides an interface for managing receipts in the validator. - -## Public Interface - -### Receipts - -The main service class `Receipts` implements the `ReceiptsBase` interface and provides these key methods: - -#### Core Methods - -- **`scrape_receipts_from_miners(miner_hotkeys, start_block, end_block)`** - Fetch receipts from miners for a block range -- **`create_job_finished_receipt(job_uuid, miner_hotkey, validator_hotkey, time_started, time_took_us, score_str)`** - Create a new job finished receipt -- **`create_job_started_receipt(job_uuid, miner_hotkey, validator_hotkey, executor_class, is_organic, ttl)`** - Create a new job started receipt -- **`get_job_started_receipt_by_uuid(job_uuid)`** - Retrieve a specific job started receipt -- **`get_valid_job_started_receipts_for_miner(miner_hotkey, at_time)`** - Get valid receipts for a miner at a specific time -- **`get_job_finished_receipts_for_miner(miner_hotkey, job_uuids)`** - Get finished receipts for specific jobs from a miner -- **`get_completed_job_receipts_for_block_range(start_block, end_block)`** - Get all completed job receipts within a block range - -## Usage Examples - -### Basic Receipt Creation - -```python -from compute_horde_validator.validator.receipts import Receipts - -receipts = Receipts() - -# Create a job started receipt -payload, signature = await receipts.create_job_started_receipt( - job_uuid="job-123", - miner_hotkey="miner-key", - validator_hotkey="validator-key", - executor_class="spin_up-4min.gpu-24gb", - is_organic=True, - ttl=300 -) - -# Create a job finished receipt -finished_receipt = receipts.create_job_finished_receipt( - job_uuid="job-123", - miner_hotkey="miner-key", - validator_hotkey="validator-key", - time_started=datetime.now(), - time_took_us=5000000, - score_str="0.85" -) -``` - -### Receipt Retrieval - -```python -# Get a specific job started receipt -receipt = await receipts.get_job_started_receipt_by_uuid("job-123") -if receipt: - print(f"Job started at: {receipt.timestamp}") - print(f"Miner: {receipt.miner_hotkey}") - -# Get valid receipts for a miner -valid_receipts = await receipts.get_valid_job_started_receipts_for_miner( - miner_hotkey="miner-key", - at_time=datetime.now() -) - -# Get finished receipts for specific jobs -finished_receipts = await receipts.get_job_finished_receipts_for_miner( - miner_hotkey="miner-key", - job_uuids=["job-123", "job-456"] -) -``` - -### Receipt Scraping - -```python -# Scrape receipts from miners for a block range -scraped_receipts = await receipts.scrape_receipts_from_miners( - miner_hotkeys=["miner1", "miner2"], - start_block=1000, - end_block=2000 -) -``` - -## Background Tasks - -### Receipt Scraping Task - -The module includes a Celery task for periodic receipt scraping: - -**Task Name**: `scrape_receipts_from_miners` - -**Purpose**: Automatically fetch and process receipts from miners across the network - -**Manual Execution**: -```bash -# Run the task manually (if needed) -celery -A compute_horde_validator call compute_horde_validator.validator.receipts.tasks.scrape_receipts_from_miners -``` diff --git a/validator/app/src/compute_horde_validator/validator/receipts/default.py b/validator/app/src/compute_horde_validator/validator/receipts/default.py index 4b8893d02..74289050a 100644 --- a/validator/app/src/compute_horde_validator/validator/receipts/default.py +++ b/validator/app/src/compute_horde_validator/validator/receipts/default.py @@ -17,6 +17,7 @@ from compute_horde.utils import sign_blob from django.conf import settings from django.utils import timezone +from prometheus_client import Counter, Gauge, Histogram from compute_horde_validator.validator.allowance.utils.supertensor import supertensor from compute_horde_validator.validator.dynamic_config import aget_config @@ -26,7 +27,6 @@ from compute_horde_validator.validator.receipts.types import ( ReceiptsGenerationError, ) -from prometheus_client import Counter, Gauge, Histogram logger = logging.getLogger(__name__) @@ -59,8 +59,12 @@ async def run_receipts_transfer( debug_miner_port: int | None, ) -> None: metrics = _Metrics( - receipts=Counter("receipttransfer_receipts_total", documentation="Number of transferred receipts"), - miners=Gauge("receipttransfer_miners", documentation="Number of miners to transfer from"), + receipts=Counter( + "receipttransfer_receipts_total", documentation="Number of transferred receipts" + ), + miners=Gauge( + "receipttransfer_miners", documentation="Number of miners to transfer from" + ), successful_transfers=Counter( "receipttransfer_successful_transfers_total", documentation="Number of transfers that didn't explicitly fail. (this includes 404s though)", @@ -85,7 +89,9 @@ async def run_receipts_transfer( ), ) - mode, explicit_miner = await self._determine_miners_mode(debug_miner_hotkey, debug_miner_ip, debug_miner_port) + mode, explicit_miner = await self._determine_miners_mode( + debug_miner_hotkey, debug_miner_ip, debug_miner_port + ) cutoff = timezone.now() - datetime.timedelta(hours=5) if daemon: @@ -107,7 +113,15 @@ async def _determine_miners_mode( if (debug_miner_hotkey, debug_miner_ip, debug_miner_port) != (None, None, None): if None in {debug_miner_hotkey, debug_miner_ip, debug_miner_port}: raise ValueError("Either none or all of explicit miner details must be provided") - miner = (debug_miner_hotkey, debug_miner_ip, int(debug_miner_port)) # type: ignore[arg-type] + # All values are guaranteed non-None here due to the check above. + assert debug_miner_hotkey is not None + assert debug_miner_ip is not None + assert debug_miner_port is not None + miner: tuple[str, str, int] = ( + debug_miner_hotkey, + debug_miner_ip, + int(debug_miner_port), + ) logger.info(f"Will fetch receipts from explicit miner: {list(miner)}") return "explicit", miner if settings.DEBUG_FETCH_RECEIPTS_FROM_MINERS: @@ -117,7 +131,9 @@ async def _determine_miners_mode( logger.info("Will fetch receipts from metagraph snapshot miners") return "metagraph", None - async def _list_miners(self, mode: str, explicit_miner: tuple[str, str, int] | None) -> list[tuple[str, str, int]]: + async def _list_miners( + self, mode: str, explicit_miner: tuple[str, str, int] | None + ) -> list[tuple[str, str, int]]: if mode == "explicit": assert explicit_miner is not None return [explicit_miner] @@ -273,7 +289,9 @@ async def _run_in_loop( ) await asyncio.gather( self._catch_up( - pages=list(reversed(range(catchup_cutoff_page, current_page - N_ACTIVE_PAGES + 1))), + pages=list( + reversed(range(catchup_cutoff_page, current_page - N_ACTIVE_PAGES + 1)) + ), mode=mode, explicit_miner=explicit_miner, session=session, @@ -333,7 +351,11 @@ async def _run_full_transfer_cycle( active_pages = list(reversed(range(current_page - n_active_pages + 1, current_page + 1))) catchup_pages = list( - reversed(range(catchup_cutoff_page, max(catchup_cutoff_page, current_page - n_active_pages + 1))) + reversed( + range( + catchup_cutoff_page, max(catchup_cutoff_page, current_page - n_active_pages + 1) + ) + ) ) miners = await self._fetch_miners(miner_hotkeys) @@ -434,6 +456,7 @@ async def get_valid_job_started_receipts_for_miner( self, miner_hotkey: str, at_time: datetime.datetime ) -> list[JobStartedReceipt]: try: + def _query() -> list[JobStartedReceipt]: qs = JobStartedReceipt.objects.valid_at(at_time).filter(miner_hotkey=miner_hotkey) return list(qs.all()) @@ -466,7 +489,9 @@ def _query() -> list[JobFinishedReceipt]: ) return list(qs.all()) - receipts: list[JobFinishedReceipt] = await sync_to_async(_query, thread_sensitive=True)() + receipts: list[JobFinishedReceipt] = await sync_to_async( + _query, thread_sensitive=True + )() logger.debug( "Retrieved %s job finished receipts for miner %s (jobs: %s)", @@ -483,9 +508,9 @@ def _query() -> list[JobFinishedReceipt]: async def get_job_started_receipt_by_uuid(self, job_uuid: str) -> JobStartedReceipt | None: try: - django_receipt = await sync_to_async(JobStartedReceipt.objects.get, thread_sensitive=True)( - job_uuid=job_uuid - ) + django_receipt = await sync_to_async( + JobStartedReceipt.objects.get, thread_sensitive=True + )(job_uuid=job_uuid) logger.debug( "Retrieved JobStartedReceipt for job %s (miner: %s, validator: %s)", job_uuid, @@ -549,9 +574,7 @@ def _query() -> list[tuple[str, str, int]]: return await sync_to_async(_query, thread_sensitive=True)() - async def _fetch_receipts_for_range( - self, start_block: int, end_block: int - ) -> list[Receipt]: + async def _fetch_receipts_for_range(self, start_block: int, end_block: int) -> list[Receipt]: """Fetch JobFinished receipts for blocks in [start_block, end_block).""" start_ts = await self._get_block_timestamp(start_block) diff --git a/validator/app/src/compute_horde_validator/validator/receipts/tests/test_receipts.py b/validator/app/src/compute_horde_validator/validator/receipts/tests/test_receipts.py index 08e54cc1a..d79e45251 100644 --- a/validator/app/src/compute_horde_validator/validator/receipts/tests/test_receipts.py +++ b/validator/app/src/compute_horde_validator/validator/receipts/tests/test_receipts.py @@ -1,16 +1,11 @@ import datetime import uuid -import pytest -from asgiref.sync import sync_to_async import bittensor_wallet +import pytest from aiohttp import web +from asgiref.sync import sync_to_async from compute_horde.receipts.models import JobAcceptedReceipt, JobFinishedReceipt, JobStartedReceipt -from django.utils import timezone - -from compute_horde_validator.validator.models import Miner -from compute_horde_validator.validator.models.allowance.internal import Block -from compute_horde_validator.validator.receipts import Receipts from compute_horde.receipts.schemas import ( JobAcceptedReceiptPayload, JobFinishedReceiptPayload, @@ -18,12 +13,16 @@ Receipt, ) from compute_horde.utils import sign_blob +from django.utils import timezone + +from compute_horde_validator.validator.models import Miner +from compute_horde_validator.validator.models.allowance.internal import Block +from compute_horde_validator.validator.receipts import Receipts @pytest.mark.asyncio @pytest.mark.django_db async def test_transfer_receipts_from_miners_happy_path(settings): - settings.RECEIPT_TRANSFER_CHECKPOINT_CACHE = "default" miner_kp = bittensor_wallet.Keypair.create_from_mnemonic( diff --git a/validator/app/src/compute_horde_validator/validator/routing/default.py b/validator/app/src/compute_horde_validator/validator/routing/default.py index 6def7f256..ede76661b 100644 --- a/validator/app/src/compute_horde_validator/validator/routing/default.py +++ b/validator/app/src/compute_horde_validator/validator/routing/default.py @@ -3,7 +3,6 @@ from datetime import timedelta from typing import assert_never -from asgiref.sync import sync_to_async from compute_horde.blockchain.block_cache import aget_current_block from compute_horde.executor_class import EXECUTOR_CLASS from compute_horde.fv_protocol.facilitator_requests import ( From 66c4401306d05b91616697496ab42ead04b5ee08 Mon Sep 17 00:00:00 2001 From: Piotr Figwer Date: Tue, 19 Aug 2025 09:52:36 +0200 Subject: [PATCH 06/13] Add readme --- .../validator/receipts/README.md | 105 ++++++++++++++++++ 1 file changed, 105 insertions(+) diff --git a/validator/app/src/compute_horde_validator/validator/receipts/README.md b/validator/app/src/compute_horde_validator/validator/receipts/README.md index e69de29bb..15a37c50c 100644 --- a/validator/app/src/compute_horde_validator/validator/receipts/README.md +++ b/validator/app/src/compute_horde_validator/validator/receipts/README.md @@ -0,0 +1,105 @@ +### Receipts module: public interface + +This module manages receipt creation and transfer between validators and miners. + +## CLI entry point + +- **Command**: `python manage.py transfer_receipts` +- **Args**: + - `--daemon` (flag): run continuously; otherwise runs a single transfer cycle + - `--debug-miner-hotkey `: fetch only from this miner (debug) + - `--debug-miner-ip `: debug miner IP + - `--debug-miner-port `: debug miner port + +When all three debug miner parameters are provided, transfer runs in explicit mode for that miner. If not provided and `DEBUG_FETCH_RECEIPTS_FROM_MINERS` is set in settings, transfer runs against those debug miners. Otherwise, miners are resolved from the latest metagraph snapshot. + +## Python API + +Default implementation lives in `compute_horde_validator.validator.receipts.default.Receipts` and implements the abstract interface in `compute_horde_validator.validator.receipts.base.ReceiptsBase`. + +- Run transfer loop (or once): + +```python +await Receipts().run_receipts_transfer( + daemon: bool, + debug_miner_hotkey: str | None, + debug_miner_ip: str | None, + debug_miner_port: int | None, +) +``` + +- Create receipts: + +```python +payload, validator_signature = Receipts().create_job_started_receipt( + job_uuid: str, + miner_hotkey: str, + validator_hotkey: str, + executor_class: str, + is_organic: bool, + ttl: int, +) + +finished = Receipts().create_job_finished_receipt( + job_uuid: str, + miner_hotkey: str, + validator_hotkey: str, + time_started: datetime.datetime, + time_took_us: int, + score_str: str, +) +``` + +- Query receipts: + +```python +# All valid JobStarted for a miner at a timestamp +receipts: list[JobStartedReceipt] = await Receipts().get_valid_job_started_receipts_for_miner( + miner_hotkey: str, + at_time: datetime.datetime, +) + +# JobFinished for a miner and a set of job UUIDs +receipts: list[JobFinishedReceipt] = await Receipts().get_job_finished_receipts_for_miner( + miner_hotkey: str, + job_uuids: list[str], +) + +# JobStarted by job UUID +receipt: JobStartedReceipt | None = await Receipts().get_job_started_receipt_by_uuid(job_uuid: str) + +# Completed job receipts for a block range [start_block, end_block) +receipts: list[Receipt] = await Receipts().get_completed_job_receipts_for_block_range( + start_block: int, + end_block: int, +) +``` + +## Miner selection modes + +- **explicit**: when all `debug_miner_*` are passed to `run_receipts_transfer` +- **debug_settings**: when `settings.DEBUG_FETCH_RECEIPTS_FROM_MINERS` is non-empty +- **metagraph**: default; miners are taken from `MetagraphSnapshot` + +## Configuration + +- **Dynamic config** (fetched via `aget_config`): + - `DYNAMIC_RECEIPT_TRANSFER_ENABLED: bool` — enable/disable transfer (default: `False`) + - `DYNAMIC_RECEIPT_TRANSFER_INTERVAL: int` — seconds between polling loops (default: `2`) + +- **Settings / env**: + - `DEBUG_FETCH_RECEIPTS_FROM_MINERS` — list of `"hotkey:ip:port"` values; in settings exposed as + `settings.DEBUG_FETCH_RECEIPTS_FROM_MINERS: list[tuple[str, str, int]]` + - `RECEIPT_TRANSFER_CHECKPOINT_CACHE` — cache key namespace used for checkpoints (default: `"receipts_checkpoints"`) + +## Metrics (Prometheus) + +- `receipttransfer_receipts_total` — number of transferred receipts +- `receipttransfer_miners` — number of miners in the current loop +- `receipttransfer_successful_transfers_total` — count of non-failed transfers +- `receipttransfer_line_errors_total{exc_type}` — per-exception count of line errors +- `receipttransfer_transfer_errors_total{exc_type}` — per-exception count of transfer errors +- `receipttransfer_transfer_duration` — histogram of total loop duration +- `receipttransfer_catchup_pages_left` — gauge of pages left to catch up + + From 2382cb55feaa12e263afe1141d17088fa008993b Mon Sep 17 00:00:00 2001 From: Piotr Figwer Date: Tue, 19 Aug 2025 11:35:29 +0200 Subject: [PATCH 07/13] Tmp disable test --- .../validator/receipts/tests/test_receipts.py | 290 +++++++++--------- 1 file changed, 145 insertions(+), 145 deletions(-) diff --git a/validator/app/src/compute_horde_validator/validator/receipts/tests/test_receipts.py b/validator/app/src/compute_horde_validator/validator/receipts/tests/test_receipts.py index d79e45251..47accb060 100644 --- a/validator/app/src/compute_horde_validator/validator/receipts/tests/test_receipts.py +++ b/validator/app/src/compute_horde_validator/validator/receipts/tests/test_receipts.py @@ -20,151 +20,151 @@ from compute_horde_validator.validator.receipts import Receipts -@pytest.mark.asyncio -@pytest.mark.django_db -async def test_transfer_receipts_from_miners_happy_path(settings): - settings.RECEIPT_TRANSFER_CHECKPOINT_CACHE = "default" - - miner_kp = bittensor_wallet.Keypair.create_from_mnemonic( - "almost fatigue race slim picnic mass better clog deal solve already champion" - ) - miner_hotkey = miner_kp.ss58_address - validator_kp = settings.BITTENSOR_WALLET().get_hotkey() - - started_payload = JobStartedReceiptPayload( - job_uuid=str(uuid.uuid4()), - miner_hotkey=miner_hotkey, - validator_hotkey=validator_kp.ss58_address, - timestamp=datetime.datetime.now(datetime.UTC), - executor_class="always_on.gpu-24gb", - is_organic=True, - ttl=300, - ) - started_blob = started_payload.blob_for_signing() - started_receipt = Receipt( - payload=started_payload, - validator_signature=sign_blob(validator_kp, started_blob), - miner_signature=sign_blob(miner_kp, started_blob), - ) - - accepted_payload = JobAcceptedReceiptPayload( - job_uuid=str(uuid.uuid4()), - miner_hotkey=miner_hotkey, - validator_hotkey=validator_kp.ss58_address, - timestamp=datetime.datetime.now(datetime.UTC), - time_accepted=datetime.datetime.now(datetime.UTC), - ttl=123, - ) - accepted_blob = accepted_payload.blob_for_signing() - accepted_receipt = Receipt( - payload=accepted_payload, - validator_signature=sign_blob(validator_kp, accepted_blob), - miner_signature=sign_blob(miner_kp, accepted_blob), - ) - - finished_payload = JobFinishedReceiptPayload( - job_uuid=str(uuid.uuid4()), - miner_hotkey=miner_hotkey, - validator_hotkey=validator_kp.ss58_address, - timestamp=datetime.datetime.now(datetime.UTC), - time_started=datetime.datetime.now(datetime.UTC) - datetime.timedelta(seconds=5), - time_took_us=42, - score_str="0.5", - ) - finished_blob = finished_payload.blob_for_signing() - finished_receipt = Receipt( - payload=finished_payload, - validator_signature=sign_blob(validator_kp, finished_blob), - miner_signature=sign_blob(miner_kp, finished_blob), - ) - - jsonl_body = ( - started_receipt.model_dump_json() - + "\n" - + accepted_receipt.model_dump_json() - + "\n" - + finished_receipt.model_dump_json() - + "\n" - ) - - app = web.Application() - state = {"body": jsonl_body.encode("utf-8")} - - async def handler(request: web.Request): - rng = request.headers.get("Range") - if rng: - return web.Response(status=416) - return web.Response(status=200, body=state["body"], content_type="application/jsonl") - - app.router.add_get("/receipts/{page}.jsonl", handler) - runner = web.AppRunner(app) - await runner.setup() - site = web.TCPSite(runner, "127.0.0.1", 0) - await site.start() - server = getattr(site, "_server", None) - assert server is not None and server.sockets, "Server failed to start" - port = server.sockets[0].getsockname()[1] - - try: - await sync_to_async(Miner.objects.create, thread_sensitive=True)( - hotkey=miner_hotkey, address="127.0.0.1", port=port - ) - - receipts_mgr = Receipts() - page = 123456 - result = await receipts_mgr._transfer_receipts_from_miners( - miner_hotkeys=[miner_hotkey], pages=[page], semaphore_limit=2, request_timeout=2.0 - ) - - assert result.n_receipts == 3 - assert result.n_successful_transfers == 1 - assert result.transfer_errors == [] - assert result.line_errors == [] - - stored_started = await sync_to_async( - lambda: JobStartedReceipt.objects.get(job_uuid=started_payload.job_uuid), - thread_sensitive=True, - )() - assert str(stored_started.job_uuid) == started_payload.job_uuid - assert stored_started.miner_hotkey == started_payload.miner_hotkey - assert stored_started.executor_class == "always_on.gpu-24gb" - assert stored_started.is_organic is True - assert stored_started.ttl == 300 - assert ( - isinstance(stored_started.validator_signature, str) - and stored_started.validator_signature - ) - assert isinstance(stored_started.miner_signature, str) and stored_started.miner_signature - - stored_accepted = await sync_to_async( - lambda: JobAcceptedReceipt.objects.get(job_uuid=accepted_payload.job_uuid), - thread_sensitive=True, - )() - assert str(stored_accepted.job_uuid) == accepted_payload.job_uuid - assert stored_accepted.miner_hotkey == accepted_payload.miner_hotkey - assert stored_accepted.ttl == 123 - assert ( - isinstance(stored_accepted.validator_signature, str) - and stored_accepted.validator_signature - ) - assert isinstance(stored_accepted.miner_signature, str) and stored_accepted.miner_signature - - stored_finished = await sync_to_async( - lambda: JobFinishedReceipt.objects.get(job_uuid=finished_payload.job_uuid), - thread_sensitive=True, - )() - assert str(stored_finished.job_uuid) == finished_payload.job_uuid - assert stored_finished.miner_hotkey == finished_payload.miner_hotkey - assert stored_finished.time_took_us == 42 - assert stored_finished.score_str == "0.5" - assert ( - isinstance(stored_finished.validator_signature, str) - and stored_finished.validator_signature - ) - assert isinstance(stored_finished.miner_signature, str) and stored_finished.miner_signature - - finally: - await runner.cleanup() +# @pytest.mark.asyncio +# @pytest.mark.django_db +# async def test_transfer_receipts_from_miners_happy_path(settings): +# settings.RECEIPT_TRANSFER_CHECKPOINT_CACHE = "default" + +# miner_kp = bittensor_wallet.Keypair.create_from_mnemonic( +# "almost fatigue race slim picnic mass better clog deal solve already champion" +# ) +# miner_hotkey = miner_kp.ss58_address +# validator_kp = settings.BITTENSOR_WALLET().get_hotkey() + +# started_payload = JobStartedReceiptPayload( +# job_uuid=str(uuid.uuid4()), +# miner_hotkey=miner_hotkey, +# validator_hotkey=validator_kp.ss58_address, +# timestamp=datetime.datetime.now(datetime.UTC), +# executor_class="always_on.gpu-24gb", +# is_organic=True, +# ttl=300, +# ) +# started_blob = started_payload.blob_for_signing() +# started_receipt = Receipt( +# payload=started_payload, +# validator_signature=sign_blob(validator_kp, started_blob), +# miner_signature=sign_blob(miner_kp, started_blob), +# ) + +# accepted_payload = JobAcceptedReceiptPayload( +# job_uuid=str(uuid.uuid4()), +# miner_hotkey=miner_hotkey, +# validator_hotkey=validator_kp.ss58_address, +# timestamp=datetime.datetime.now(datetime.UTC), +# time_accepted=datetime.datetime.now(datetime.UTC), +# ttl=123, +# ) +# accepted_blob = accepted_payload.blob_for_signing() +# accepted_receipt = Receipt( +# payload=accepted_payload, +# validator_signature=sign_blob(validator_kp, accepted_blob), +# miner_signature=sign_blob(miner_kp, accepted_blob), +# ) + +# finished_payload = JobFinishedReceiptPayload( +# job_uuid=str(uuid.uuid4()), +# miner_hotkey=miner_hotkey, +# validator_hotkey=validator_kp.ss58_address, +# timestamp=datetime.datetime.now(datetime.UTC), +# time_started=datetime.datetime.now(datetime.UTC) - datetime.timedelta(seconds=5), +# time_took_us=42, +# score_str="0.5", +# ) +# finished_blob = finished_payload.blob_for_signing() +# finished_receipt = Receipt( +# payload=finished_payload, +# validator_signature=sign_blob(validator_kp, finished_blob), +# miner_signature=sign_blob(miner_kp, finished_blob), +# ) + +# jsonl_body = ( +# started_receipt.model_dump_json() +# + "\n" +# + accepted_receipt.model_dump_json() +# + "\n" +# + finished_receipt.model_dump_json() +# + "\n" +# ) + +# app = web.Application() +# state = {"body": jsonl_body.encode("utf-8")} + +# async def handler(request: web.Request): +# rng = request.headers.get("Range") +# if rng: +# return web.Response(status=416) +# return web.Response(status=200, body=state["body"], content_type="application/jsonl") + +# app.router.add_get("/receipts/{page}.jsonl", handler) +# runner = web.AppRunner(app) +# await runner.setup() +# site = web.TCPSite(runner, "127.0.0.1", 0) +# await site.start() +# server = getattr(site, "_server", None) +# assert server is not None and server.sockets, "Server failed to start" +# port = server.sockets[0].getsockname()[1] + +# try: +# await sync_to_async(Miner.objects.create, thread_sensitive=True)( +# hotkey=miner_hotkey, address="127.0.0.1", port=port +# ) + +# receipts_mgr = Receipts() +# page = 123456 +# result = await receipts_mgr._transfer_receipts_from_miners( +# miner_hotkeys=[miner_hotkey], pages=[page], semaphore_limit=2, request_timeout=2.0 +# ) + +# assert result.n_receipts == 3 +# assert result.n_successful_transfers == 1 +# assert result.transfer_errors == [] +# assert result.line_errors == [] + +# stored_started = await sync_to_async( +# lambda: JobStartedReceipt.objects.get(job_uuid=started_payload.job_uuid), +# thread_sensitive=True, +# )() +# assert str(stored_started.job_uuid) == started_payload.job_uuid +# assert stored_started.miner_hotkey == started_payload.miner_hotkey +# assert stored_started.executor_class == "always_on.gpu-24gb" +# assert stored_started.is_organic is True +# assert stored_started.ttl == 300 +# assert ( +# isinstance(stored_started.validator_signature, str) +# and stored_started.validator_signature +# ) +# assert isinstance(stored_started.miner_signature, str) and stored_started.miner_signature + +# stored_accepted = await sync_to_async( +# lambda: JobAcceptedReceipt.objects.get(job_uuid=accepted_payload.job_uuid), +# thread_sensitive=True, +# )() +# assert str(stored_accepted.job_uuid) == accepted_payload.job_uuid +# assert stored_accepted.miner_hotkey == accepted_payload.miner_hotkey +# assert stored_accepted.ttl == 123 +# assert ( +# isinstance(stored_accepted.validator_signature, str) +# and stored_accepted.validator_signature +# ) +# assert isinstance(stored_accepted.miner_signature, str) and stored_accepted.miner_signature + +# stored_finished = await sync_to_async( +# lambda: JobFinishedReceipt.objects.get(job_uuid=finished_payload.job_uuid), +# thread_sensitive=True, +# )() +# assert str(stored_finished.job_uuid) == finished_payload.job_uuid +# assert stored_finished.miner_hotkey == finished_payload.miner_hotkey +# assert stored_finished.time_took_us == 42 +# assert stored_finished.score_str == "0.5" +# assert ( +# isinstance(stored_finished.validator_signature, str) +# and stored_finished.validator_signature +# ) +# assert isinstance(stored_finished.miner_signature, str) and stored_finished.miner_signature + +# finally: +# await runner.cleanup() @pytest.mark.django_db From 7eb9218ee0d550c3061e658b8afa8ebbb5afaacc Mon Sep 17 00:00:00 2001 From: Piotr Figwer Date: Tue, 19 Aug 2025 11:52:32 +0200 Subject: [PATCH 08/13] Re-enable test --- .../validator/receipts/default.py | 2 +- .../validator/receipts/tests/test_receipts.py | 290 +++++++++--------- 2 files changed, 146 insertions(+), 146 deletions(-) diff --git a/validator/app/src/compute_horde_validator/validator/receipts/default.py b/validator/app/src/compute_horde_validator/validator/receipts/default.py index 74289050a..0a355016c 100644 --- a/validator/app/src/compute_horde_validator/validator/receipts/default.py +++ b/validator/app/src/compute_horde_validator/validator/receipts/default.py @@ -601,7 +601,7 @@ async def _get_block_timestamp(self, block_number: int) -> datetime.datetime: ) try: - ts = await supertensor().get_block_timestamp(block_number) + ts = supertensor().get_block_timestamp(block_number) if isinstance(ts, datetime.datetime): return ts else: diff --git a/validator/app/src/compute_horde_validator/validator/receipts/tests/test_receipts.py b/validator/app/src/compute_horde_validator/validator/receipts/tests/test_receipts.py index 47accb060..d79e45251 100644 --- a/validator/app/src/compute_horde_validator/validator/receipts/tests/test_receipts.py +++ b/validator/app/src/compute_horde_validator/validator/receipts/tests/test_receipts.py @@ -20,151 +20,151 @@ from compute_horde_validator.validator.receipts import Receipts -# @pytest.mark.asyncio -# @pytest.mark.django_db -# async def test_transfer_receipts_from_miners_happy_path(settings): -# settings.RECEIPT_TRANSFER_CHECKPOINT_CACHE = "default" - -# miner_kp = bittensor_wallet.Keypair.create_from_mnemonic( -# "almost fatigue race slim picnic mass better clog deal solve already champion" -# ) -# miner_hotkey = miner_kp.ss58_address -# validator_kp = settings.BITTENSOR_WALLET().get_hotkey() - -# started_payload = JobStartedReceiptPayload( -# job_uuid=str(uuid.uuid4()), -# miner_hotkey=miner_hotkey, -# validator_hotkey=validator_kp.ss58_address, -# timestamp=datetime.datetime.now(datetime.UTC), -# executor_class="always_on.gpu-24gb", -# is_organic=True, -# ttl=300, -# ) -# started_blob = started_payload.blob_for_signing() -# started_receipt = Receipt( -# payload=started_payload, -# validator_signature=sign_blob(validator_kp, started_blob), -# miner_signature=sign_blob(miner_kp, started_blob), -# ) - -# accepted_payload = JobAcceptedReceiptPayload( -# job_uuid=str(uuid.uuid4()), -# miner_hotkey=miner_hotkey, -# validator_hotkey=validator_kp.ss58_address, -# timestamp=datetime.datetime.now(datetime.UTC), -# time_accepted=datetime.datetime.now(datetime.UTC), -# ttl=123, -# ) -# accepted_blob = accepted_payload.blob_for_signing() -# accepted_receipt = Receipt( -# payload=accepted_payload, -# validator_signature=sign_blob(validator_kp, accepted_blob), -# miner_signature=sign_blob(miner_kp, accepted_blob), -# ) - -# finished_payload = JobFinishedReceiptPayload( -# job_uuid=str(uuid.uuid4()), -# miner_hotkey=miner_hotkey, -# validator_hotkey=validator_kp.ss58_address, -# timestamp=datetime.datetime.now(datetime.UTC), -# time_started=datetime.datetime.now(datetime.UTC) - datetime.timedelta(seconds=5), -# time_took_us=42, -# score_str="0.5", -# ) -# finished_blob = finished_payload.blob_for_signing() -# finished_receipt = Receipt( -# payload=finished_payload, -# validator_signature=sign_blob(validator_kp, finished_blob), -# miner_signature=sign_blob(miner_kp, finished_blob), -# ) - -# jsonl_body = ( -# started_receipt.model_dump_json() -# + "\n" -# + accepted_receipt.model_dump_json() -# + "\n" -# + finished_receipt.model_dump_json() -# + "\n" -# ) - -# app = web.Application() -# state = {"body": jsonl_body.encode("utf-8")} - -# async def handler(request: web.Request): -# rng = request.headers.get("Range") -# if rng: -# return web.Response(status=416) -# return web.Response(status=200, body=state["body"], content_type="application/jsonl") - -# app.router.add_get("/receipts/{page}.jsonl", handler) -# runner = web.AppRunner(app) -# await runner.setup() -# site = web.TCPSite(runner, "127.0.0.1", 0) -# await site.start() -# server = getattr(site, "_server", None) -# assert server is not None and server.sockets, "Server failed to start" -# port = server.sockets[0].getsockname()[1] - -# try: -# await sync_to_async(Miner.objects.create, thread_sensitive=True)( -# hotkey=miner_hotkey, address="127.0.0.1", port=port -# ) - -# receipts_mgr = Receipts() -# page = 123456 -# result = await receipts_mgr._transfer_receipts_from_miners( -# miner_hotkeys=[miner_hotkey], pages=[page], semaphore_limit=2, request_timeout=2.0 -# ) - -# assert result.n_receipts == 3 -# assert result.n_successful_transfers == 1 -# assert result.transfer_errors == [] -# assert result.line_errors == [] - -# stored_started = await sync_to_async( -# lambda: JobStartedReceipt.objects.get(job_uuid=started_payload.job_uuid), -# thread_sensitive=True, -# )() -# assert str(stored_started.job_uuid) == started_payload.job_uuid -# assert stored_started.miner_hotkey == started_payload.miner_hotkey -# assert stored_started.executor_class == "always_on.gpu-24gb" -# assert stored_started.is_organic is True -# assert stored_started.ttl == 300 -# assert ( -# isinstance(stored_started.validator_signature, str) -# and stored_started.validator_signature -# ) -# assert isinstance(stored_started.miner_signature, str) and stored_started.miner_signature - -# stored_accepted = await sync_to_async( -# lambda: JobAcceptedReceipt.objects.get(job_uuid=accepted_payload.job_uuid), -# thread_sensitive=True, -# )() -# assert str(stored_accepted.job_uuid) == accepted_payload.job_uuid -# assert stored_accepted.miner_hotkey == accepted_payload.miner_hotkey -# assert stored_accepted.ttl == 123 -# assert ( -# isinstance(stored_accepted.validator_signature, str) -# and stored_accepted.validator_signature -# ) -# assert isinstance(stored_accepted.miner_signature, str) and stored_accepted.miner_signature - -# stored_finished = await sync_to_async( -# lambda: JobFinishedReceipt.objects.get(job_uuid=finished_payload.job_uuid), -# thread_sensitive=True, -# )() -# assert str(stored_finished.job_uuid) == finished_payload.job_uuid -# assert stored_finished.miner_hotkey == finished_payload.miner_hotkey -# assert stored_finished.time_took_us == 42 -# assert stored_finished.score_str == "0.5" -# assert ( -# isinstance(stored_finished.validator_signature, str) -# and stored_finished.validator_signature -# ) -# assert isinstance(stored_finished.miner_signature, str) and stored_finished.miner_signature - -# finally: -# await runner.cleanup() +@pytest.mark.asyncio +@pytest.mark.django_db +async def test_transfer_receipts_from_miners_happy_path(settings): + settings.RECEIPT_TRANSFER_CHECKPOINT_CACHE = "default" + + miner_kp = bittensor_wallet.Keypair.create_from_mnemonic( + "almost fatigue race slim picnic mass better clog deal solve already champion" + ) + miner_hotkey = miner_kp.ss58_address + validator_kp = settings.BITTENSOR_WALLET().get_hotkey() + + started_payload = JobStartedReceiptPayload( + job_uuid=str(uuid.uuid4()), + miner_hotkey=miner_hotkey, + validator_hotkey=validator_kp.ss58_address, + timestamp=datetime.datetime.now(datetime.UTC), + executor_class="always_on.gpu-24gb", + is_organic=True, + ttl=300, + ) + started_blob = started_payload.blob_for_signing() + started_receipt = Receipt( + payload=started_payload, + validator_signature=sign_blob(validator_kp, started_blob), + miner_signature=sign_blob(miner_kp, started_blob), + ) + + accepted_payload = JobAcceptedReceiptPayload( + job_uuid=str(uuid.uuid4()), + miner_hotkey=miner_hotkey, + validator_hotkey=validator_kp.ss58_address, + timestamp=datetime.datetime.now(datetime.UTC), + time_accepted=datetime.datetime.now(datetime.UTC), + ttl=123, + ) + accepted_blob = accepted_payload.blob_for_signing() + accepted_receipt = Receipt( + payload=accepted_payload, + validator_signature=sign_blob(validator_kp, accepted_blob), + miner_signature=sign_blob(miner_kp, accepted_blob), + ) + + finished_payload = JobFinishedReceiptPayload( + job_uuid=str(uuid.uuid4()), + miner_hotkey=miner_hotkey, + validator_hotkey=validator_kp.ss58_address, + timestamp=datetime.datetime.now(datetime.UTC), + time_started=datetime.datetime.now(datetime.UTC) - datetime.timedelta(seconds=5), + time_took_us=42, + score_str="0.5", + ) + finished_blob = finished_payload.blob_for_signing() + finished_receipt = Receipt( + payload=finished_payload, + validator_signature=sign_blob(validator_kp, finished_blob), + miner_signature=sign_blob(miner_kp, finished_blob), + ) + + jsonl_body = ( + started_receipt.model_dump_json() + + "\n" + + accepted_receipt.model_dump_json() + + "\n" + + finished_receipt.model_dump_json() + + "\n" + ) + + app = web.Application() + state = {"body": jsonl_body.encode("utf-8")} + + async def handler(request: web.Request): + rng = request.headers.get("Range") + if rng: + return web.Response(status=416) + return web.Response(status=200, body=state["body"], content_type="application/jsonl") + + app.router.add_get("/receipts/{page}.jsonl", handler) + runner = web.AppRunner(app) + await runner.setup() + site = web.TCPSite(runner, "127.0.0.1", 0) + await site.start() + server = getattr(site, "_server", None) + assert server is not None and server.sockets, "Server failed to start" + port = server.sockets[0].getsockname()[1] + + try: + await sync_to_async(Miner.objects.create, thread_sensitive=True)( + hotkey=miner_hotkey, address="127.0.0.1", port=port + ) + + receipts_mgr = Receipts() + page = 123456 + result = await receipts_mgr._transfer_receipts_from_miners( + miner_hotkeys=[miner_hotkey], pages=[page], semaphore_limit=2, request_timeout=2.0 + ) + + assert result.n_receipts == 3 + assert result.n_successful_transfers == 1 + assert result.transfer_errors == [] + assert result.line_errors == [] + + stored_started = await sync_to_async( + lambda: JobStartedReceipt.objects.get(job_uuid=started_payload.job_uuid), + thread_sensitive=True, + )() + assert str(stored_started.job_uuid) == started_payload.job_uuid + assert stored_started.miner_hotkey == started_payload.miner_hotkey + assert stored_started.executor_class == "always_on.gpu-24gb" + assert stored_started.is_organic is True + assert stored_started.ttl == 300 + assert ( + isinstance(stored_started.validator_signature, str) + and stored_started.validator_signature + ) + assert isinstance(stored_started.miner_signature, str) and stored_started.miner_signature + + stored_accepted = await sync_to_async( + lambda: JobAcceptedReceipt.objects.get(job_uuid=accepted_payload.job_uuid), + thread_sensitive=True, + )() + assert str(stored_accepted.job_uuid) == accepted_payload.job_uuid + assert stored_accepted.miner_hotkey == accepted_payload.miner_hotkey + assert stored_accepted.ttl == 123 + assert ( + isinstance(stored_accepted.validator_signature, str) + and stored_accepted.validator_signature + ) + assert isinstance(stored_accepted.miner_signature, str) and stored_accepted.miner_signature + + stored_finished = await sync_to_async( + lambda: JobFinishedReceipt.objects.get(job_uuid=finished_payload.job_uuid), + thread_sensitive=True, + )() + assert str(stored_finished.job_uuid) == finished_payload.job_uuid + assert stored_finished.miner_hotkey == finished_payload.miner_hotkey + assert stored_finished.time_took_us == 42 + assert stored_finished.score_str == "0.5" + assert ( + isinstance(stored_finished.validator_signature, str) + and stored_finished.validator_signature + ) + assert isinstance(stored_finished.miner_signature, str) and stored_finished.miner_signature + + finally: + await runner.cleanup() @pytest.mark.django_db From 4dfd887fad77d0466ebfe565de3510500a4014b0 Mon Sep 17 00:00:00 2001 From: Piotr Figwer Date: Wed, 20 Aug 2025 09:06:34 +0200 Subject: [PATCH 09/13] Adjustments after rebase --- .../validator/organic_jobs/miner_driver.py | 49 ++++++++++--------- 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/validator/app/src/compute_horde_validator/validator/organic_jobs/miner_driver.py b/validator/app/src/compute_horde_validator/validator/organic_jobs/miner_driver.py index 923d0348c..2fbb8503c 100644 --- a/validator/app/src/compute_horde_validator/validator/organic_jobs/miner_driver.py +++ b/validator/app/src/compute_horde_validator/validator/organic_jobs/miner_driver.py @@ -382,31 +382,32 @@ async def streaming_ready_callback(msg: V0StreamingJobReadyRequest) -> None: comment = "Miner failed to excuse job" status = OrganicJob.Status.FAILED system_event_subtype = SystemEvent.EventSubType.JOB_REJECTED - job_request_time = job_started_receipt.timestamp - valid_excuses = await job_excuses.filter_valid_excuse_receipts( - receipts_to_check=rejection.msg.receipts or [], - check_time=job_request_time, - declined_job_uuid=str(job.job_uuid), - declined_job_executor_class=ExecutorClass(job.executor_class), - declined_job_is_synthetic=False, - minimum_validator_stake_for_excuse=await aget_config( - "DYNAMIC_MINIMUM_VALIDATOR_STAKE_FOR_EXCUSE" - ), - miner_hotkey=job.miner.hotkey, - ) - expected_executor_count = await job_excuses.get_expected_miner_executor_count( - check_time=job_request_time, - miner_hotkey=job.miner.hotkey, - executor_class=ExecutorClass(job.executor_class), - ) - if len(valid_excuses) >= expected_executor_count: - comment = "Miner properly excused job" - status = OrganicJob.Status.EXCUSED - system_event_subtype = SystemEvent.EventSubType.JOB_EXCUSED else: - comment = "Miner failed to excuse job" - status = OrganicJob.Status.FAILED - system_event_subtype = SystemEvent.EventSubType.JOB_REJECTED + job_request_time = job_started_receipt.timestamp + valid_excuses = await job_excuses.filter_valid_excuse_receipts( + receipts_to_check=rejection.msg.receipts or [], + check_time=job_request_time, + declined_job_uuid=str(job.job_uuid), + declined_job_executor_class=ExecutorClass(job.executor_class), + declined_job_is_synthetic=False, + minimum_validator_stake_for_excuse=await aget_config( + "DYNAMIC_MINIMUM_VALIDATOR_STAKE_FOR_EXCUSE" + ), + miner_hotkey=job.miner.hotkey, + ) + expected_executor_count = await job_excuses.get_expected_miner_executor_count( + check_time=job_request_time, + miner_hotkey=job.miner.hotkey, + executor_class=ExecutorClass(job.executor_class), + ) + if len(valid_excuses) >= expected_executor_count: + comment = "Miner properly excused job" + status = OrganicJob.Status.EXCUSED + system_event_subtype = SystemEvent.EventSubType.JOB_EXCUSED + else: + comment = "Miner failed to excuse job" + status = OrganicJob.Status.FAILED + system_event_subtype = SystemEvent.EventSubType.JOB_REJECTED logger.info(comment) job.comment = comment From 2a28a041904e160a97421ec4814b73d1e3a2c0ce Mon Sep 17 00:00:00 2001 From: Piotr Figwer Date: Wed, 20 Aug 2025 10:27:23 +0200 Subject: [PATCH 10/13] Enable check_still_running_tasks --- .../validator/tests/conftest.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/validator/app/src/compute_horde_validator/validator/tests/conftest.py b/validator/app/src/compute_horde_validator/validator/tests/conftest.py index 4a95d9dec..daf79a039 100644 --- a/validator/app/src/compute_horde_validator/validator/tests/conftest.py +++ b/validator/app/src/compute_horde_validator/validator/tests/conftest.py @@ -1,3 +1,4 @@ +import asyncio import ipaddress import logging import uuid @@ -6,6 +7,7 @@ import bittensor_wallet import pytest +import pytest_asyncio import turbobt from compute_horde.executor_class import EXECUTOR_CLASS from compute_horde_core.executor_class import ExecutorClass @@ -153,14 +155,14 @@ def run_uuid(): # NOTE: Use this fixture when you need to find dangling asyncio tasks. It is currently commented # because redis channels layers keeps dangling tasks, that makes the tests fail -_- -# @pytest_asyncio.fixture(autouse=True) -# async def check_still_running_tasks(): -# yield -# tasks = asyncio.all_tasks() -# if len(tasks) > 1: -# raise ValueError( -# "\n" + "\n".join(f"{task.get_name()}: {task.get_coro()}" for task in tasks) -# ) +@pytest_asyncio.fixture(autouse=True) +async def check_still_running_tasks(): + yield + tasks = asyncio.all_tasks() + if len(tasks) > 1: + raise ValueError( + "\n" + "\n".join(f"{task.get_name()}: {task.get_coro()}" for task in tasks) + ) @pytest.fixture From e0c12f582350d45afe7846024926d358988e8191 Mon Sep 17 00:00:00 2001 From: Piotr Figwer Date: Wed, 20 Aug 2025 10:50:31 +0200 Subject: [PATCH 11/13] Revert testing changes --- .../validator/allowance/tests/mockchain.py | 7 +------ .../validator/tests/conftest.py | 18 ++++++++---------- 2 files changed, 9 insertions(+), 16 deletions(-) diff --git a/validator/app/src/compute_horde_validator/validator/allowance/tests/mockchain.py b/validator/app/src/compute_horde_validator/validator/allowance/tests/mockchain.py index 95c5d73ee..75d4035ae 100644 --- a/validator/app/src/compute_horde_validator/validator/allowance/tests/mockchain.py +++ b/validator/app/src/compute_horde_validator/validator/allowance/tests/mockchain.py @@ -1,9 +1,7 @@ import asyncio import datetime -import tempfile from contextlib import contextmanager from functools import lru_cache -from pathlib import Path from unittest import mock from unittest.mock import patch @@ -59,10 +57,7 @@ def cmbm(block_number): @lru_cache def wallet(): - # Use an isolated temp directory for test wallets to avoid reading any real/local keyfiles - wallets_root = Path(tempfile.gettempdir()) / "compute_horde_test_wallets" - wallets_root.mkdir(parents=True, exist_ok=True) - wallet_ = bittensor_wallet.Wallet(name="test_mock_validator", path=str(wallets_root)) + wallet_ = bittensor_wallet.Wallet(name="test_mock_validator") wallet_.regenerate_coldkey( mnemonic="local ghost evil lizard decade own lecture absurd vote despair predict cage", use_password=False, diff --git a/validator/app/src/compute_horde_validator/validator/tests/conftest.py b/validator/app/src/compute_horde_validator/validator/tests/conftest.py index daf79a039..4a95d9dec 100644 --- a/validator/app/src/compute_horde_validator/validator/tests/conftest.py +++ b/validator/app/src/compute_horde_validator/validator/tests/conftest.py @@ -1,4 +1,3 @@ -import asyncio import ipaddress import logging import uuid @@ -7,7 +6,6 @@ import bittensor_wallet import pytest -import pytest_asyncio import turbobt from compute_horde.executor_class import EXECUTOR_CLASS from compute_horde_core.executor_class import ExecutorClass @@ -155,14 +153,14 @@ def run_uuid(): # NOTE: Use this fixture when you need to find dangling asyncio tasks. It is currently commented # because redis channels layers keeps dangling tasks, that makes the tests fail -_- -@pytest_asyncio.fixture(autouse=True) -async def check_still_running_tasks(): - yield - tasks = asyncio.all_tasks() - if len(tasks) > 1: - raise ValueError( - "\n" + "\n".join(f"{task.get_name()}: {task.get_coro()}" for task in tasks) - ) +# @pytest_asyncio.fixture(autouse=True) +# async def check_still_running_tasks(): +# yield +# tasks = asyncio.all_tasks() +# if len(tasks) > 1: +# raise ValueError( +# "\n" + "\n".join(f"{task.get_name()}: {task.get_coro()}" for task in tasks) +# ) @pytest.fixture From 403fb9bcbaa9569d68b2b7c95967ae4a18b69b88 Mon Sep 17 00:00:00 2001 From: piotr-figwer-reef Date: Wed, 20 Aug 2025 12:06:54 +0200 Subject: [PATCH 12/13] Async cleanup --- .../validator/receipts/tests/test_receipts.py | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/validator/app/src/compute_horde_validator/validator/receipts/tests/test_receipts.py b/validator/app/src/compute_horde_validator/validator/receipts/tests/test_receipts.py index d79e45251..e3b14291c 100644 --- a/validator/app/src/compute_horde_validator/validator/receipts/tests/test_receipts.py +++ b/validator/app/src/compute_horde_validator/validator/receipts/tests/test_receipts.py @@ -21,7 +21,7 @@ @pytest.mark.asyncio -@pytest.mark.django_db +@pytest.mark.django_db(transaction=True) async def test_transfer_receipts_from_miners_happy_path(settings): settings.RECEIPT_TRANSFER_CHECKPOINT_CACHE = "default" @@ -167,7 +167,7 @@ async def handler(request: web.Request): await runner.cleanup() -@pytest.mark.django_db +@pytest.mark.django_db(transaction=True) def test_create_job_started_receipt_returns_payload_and_signature(settings): receipts = Receipts() @@ -197,7 +197,7 @@ def test_create_job_started_receipt_returns_payload_and_signature(settings): assert payload.timestamp.tzinfo is datetime.UTC -@pytest.mark.django_db +@pytest.mark.django_db(transaction=True) def test_create_job_finished_receipt_returns_expected_values(settings): receipts = Receipts() @@ -231,7 +231,7 @@ def test_create_job_finished_receipt_returns_expected_values(settings): @pytest.mark.asyncio -@pytest.mark.django_db +@pytest.mark.django_db(transaction=True) async def test_get_valid_job_started_receipts_for_miner_filters_correctly(settings): miner_hotkey = "miner_hotkey_valid" other_miner = "miner_hotkey_other" @@ -239,7 +239,7 @@ async def test_get_valid_job_started_receipts_for_miner_filters_correctly(settin base_ts = datetime.datetime.now(datetime.UTC) - await sync_to_async(JobStartedReceipt.objects.create, thread_sensitive=True)( + await JobStartedReceipt.objects.acreate( job_uuid=str(uuid.uuid4()), miner_hotkey=miner_hotkey, validator_hotkey=validator_hotkey, @@ -250,7 +250,7 @@ async def test_get_valid_job_started_receipts_for_miner_filters_correctly(settin ttl=60, ) - await sync_to_async(JobStartedReceipt.objects.create, thread_sensitive=True)( + await JobStartedReceipt.objects.acreate( job_uuid=str(uuid.uuid4()), miner_hotkey=miner_hotkey, validator_hotkey=validator_hotkey, @@ -261,7 +261,7 @@ async def test_get_valid_job_started_receipts_for_miner_filters_correctly(settin ttl=30, ) - await sync_to_async(JobStartedReceipt.objects.create, thread_sensitive=True)( + await JobStartedReceipt.objects.acreate( job_uuid=str(uuid.uuid4()), miner_hotkey=other_miner, validator_hotkey=validator_hotkey, @@ -287,7 +287,7 @@ async def test_get_valid_job_started_receipts_for_miner_filters_correctly(settin @pytest.mark.asyncio -@pytest.mark.django_db +@pytest.mark.django_db(transaction=True) async def test_get_job_finished_receipts_for_miner_filters_by_uuid(settings): miner_hotkey = "miner_hotkey_finished" validator_hotkey = settings.BITTENSOR_WALLET().get_hotkey().ss58_address @@ -296,7 +296,7 @@ async def test_get_job_finished_receipts_for_miner_filters_by_uuid(settings): wanted_uuid = str(uuid.uuid4()) other_uuid = str(uuid.uuid4()) - await sync_to_async(JobFinishedReceipt.objects.create, thread_sensitive=True)( + await JobFinishedReceipt.objects.acreate( job_uuid=wanted_uuid, miner_hotkey=miner_hotkey, validator_hotkey=validator_hotkey, @@ -307,7 +307,7 @@ async def test_get_job_finished_receipts_for_miner_filters_by_uuid(settings): score_str="0.5", ) - await sync_to_async(JobFinishedReceipt.objects.create, thread_sensitive=True)( + await JobFinishedReceipt.objects.acreate( job_uuid=other_uuid, miner_hotkey=miner_hotkey, validator_hotkey=validator_hotkey, @@ -331,13 +331,13 @@ async def test_get_job_finished_receipts_for_miner_filters_by_uuid(settings): @pytest.mark.asyncio -@pytest.mark.django_db +@pytest.mark.django_db(transaction=True) async def test_get_job_started_receipt_by_uuid_returns_instance_or_none(settings): receipts = Receipts() job_uuid_present = str(uuid.uuid4()) job_uuid_missing = str(uuid.uuid4()) - await sync_to_async(JobStartedReceipt.objects.create, thread_sensitive=True)( + await JobStartedReceipt.objects.acreate( job_uuid=job_uuid_present, miner_hotkey="miner_xyz", validator_hotkey=settings.BITTENSOR_WALLET().get_hotkey().ss58_address, @@ -357,7 +357,7 @@ async def test_get_job_started_receipt_by_uuid_returns_instance_or_none(settings @pytest.mark.asyncio -@pytest.mark.django_db +@pytest.mark.django_db(transaction=True) async def test_get_completed_job_receipts_for_block_range_returns_only_in_range(settings): receipts = Receipts() @@ -378,7 +378,7 @@ async def test_get_completed_job_receipts_for_block_range_returns_only_in_range( validator_hotkey = settings.BITTENSOR_WALLET().get_hotkey().ss58_address in_uuid = str(uuid.uuid4()) - await sync_to_async(JobFinishedReceipt.objects.create, thread_sensitive=True)( + await JobFinishedReceipt.objects.acreate( job_uuid=in_uuid, miner_hotkey=miner_hotkey, validator_hotkey=validator_hotkey, @@ -390,7 +390,7 @@ async def test_get_completed_job_receipts_for_block_range_returns_only_in_range( score_str="1.0", ) - await sync_to_async(JobFinishedReceipt.objects.create, thread_sensitive=True)( + await JobFinishedReceipt.objects.acreate( job_uuid=str(uuid.uuid4()), miner_hotkey=miner_hotkey, validator_hotkey=validator_hotkey, @@ -402,7 +402,7 @@ async def test_get_completed_job_receipts_for_block_range_returns_only_in_range( score_str="0.1", ) - await sync_to_async(JobFinishedReceipt.objects.create, thread_sensitive=True)( + await JobFinishedReceipt.objects.acreate( job_uuid=str(uuid.uuid4()), miner_hotkey=miner_hotkey, validator_hotkey=validator_hotkey, From 5cee2adebc1883a1d9b152c573d3a48c632c9dee Mon Sep 17 00:00:00 2001 From: Piotr Figwer Date: Wed, 20 Aug 2025 13:46:34 +0200 Subject: [PATCH 13/13] Maybe fix failing s3 tests --- .../compute_horde_validator/validator/s3.py | 20 +++++++++++++++---- .../validator/tests/test_s3.py | 15 ++++++++++---- 2 files changed, 27 insertions(+), 8 deletions(-) diff --git a/validator/app/src/compute_horde_validator/validator/s3.py b/validator/app/src/compute_horde_validator/validator/s3.py index 47db09980..89b56d015 100644 --- a/validator/app/src/compute_horde_validator/validator/s3.py +++ b/validator/app/src/compute_horde_validator/validator/s3.py @@ -13,6 +13,17 @@ logger = logging.getLogger(__name__) +@contextlib.contextmanager +def s3_client_context(**kwargs): + client = get_s3_client(**kwargs) + try: + yield client + finally: + # Ensure the client is properly closed + if hasattr(client, '_endpoint'): + client._endpoint.http_session.close() + + def get_s3_client( aws_access_key_id=None, aws_secret_access_key=None, @@ -103,11 +114,12 @@ def download_prompts_from_s3_url(s3_url: str) -> list[str]: async def download_file_content(s3_url: str, client: httpx.AsyncClient | None = None) -> bytes: - if not client: - ctx = httpx.AsyncClient() + if client is None: + async with httpx.AsyncClient() as client: + response = await client.get(s3_url) + response.raise_for_status() + return response.content else: - ctx = contextlib.nullcontext(client) # type: ignore - async with ctx as client: response = await client.get(s3_url) response.raise_for_status() return response.content diff --git a/validator/app/src/compute_horde_validator/validator/tests/test_s3.py b/validator/app/src/compute_horde_validator/validator/tests/test_s3.py index 00b860d75..a0863484d 100644 --- a/validator/app/src/compute_horde_validator/validator/tests/test_s3.py +++ b/validator/app/src/compute_horde_validator/validator/tests/test_s3.py @@ -1,14 +1,16 @@ -from unittest.mock import MagicMock, patch +from unittest.mock import AsyncMock, MagicMock, patch import pytest from moto import mock_aws from compute_horde_validator.validator.s3 import ( + download_file_content, download_prompts_from_s3_url, generate_download_url, generate_upload_url, get_public_url, get_s3_client, + s3_client_context, ) @@ -19,10 +21,15 @@ def bucket_name(): @pytest.fixture(autouse=True) def bucket(bucket_name: str): - with mock_aws(): - client = get_s3_client() - client.create_bucket(Bucket=bucket_name) + mock = mock_aws() + mock.start() + + try: + with s3_client_context() as client: + client.create_bucket(Bucket=bucket_name) yield + finally: + mock.stop() def test_generate_upload_url(bucket_name: str):