Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions compute_horde/compute_horde/executor_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,10 @@ class ExecutorClassSpec:
# TODO: TIMEOUTS - this should depend on the requested job timing instead, but capped at seconds left in current cycle
MAX_EXECUTOR_TIMEOUT = timedelta(minutes=20).total_seconds()

# Executor classes considered to be the one used for LLM-type jobs
LLM_EXECUTOR_CLASSES = { ExecutorClass.always_on__llm__a6000 }

DEFAULT_EXECUTOR_CLASS = ExecutorClass.spin_up_4min__gpu_24gb
DEFAULT_LLM_EXECUTOR_CLASS = ExecutorClass.always_on__llm__a6000
DEFAULT_EXECUTOR_TIMEOUT = EXECUTOR_CLASS[DEFAULT_EXECUTOR_CLASS].spin_up_time

13 changes: 1 addition & 12 deletions validator/app/src/compute_horde_validator/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ def wrapped(*args, **kwargs):
int,
),
# llama params
"DYNAMIC_MAX_PROMPT_SERIES": (
"DYNAMIC_MAX_PROMPT_SERIES_PER_EXECUTOR_CLASS": (
3500,
"Maximum number of prompt series upon which the prompt generator will not be triggered",
int,
Expand All @@ -254,22 +254,11 @@ def wrapped(*args, **kwargs):
"how many prompt samples to generate (should be larger than how many prompts series we use per synthetic run)",
int,
),
"DYNAMIC_NUMBER_OF_PROMPTS_PER_WORKLOAD": (
240,
"how many prompts to answer in a single workload",
int,
),
# prompt generation params
"DYNAMIC_PROMPTS_SERIES_IN_A_SINGLE_GENERATION": (
25,
"Number of batches that prompt generator will process in a single go",
int,
),
"DYNAMIC_NUMBER_OF_PROMPTS_IN_SERIES": (
240,
"Number of prompts to generate in a single series",
int,
),
# prompts answering params
"DYNAMIC_NUMBER_OF_PROMPTS_TO_SAMPLE_FROM_SERIES": (
1,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,20 @@
from compute_horde_core.executor_class import ExecutorClass
from compute_horde_core.output_upload import OutputUpload
from compute_horde_core.volume import Volume
from compute_horde_validator.validator.generation_profile import PromptGenerationProfile, PROMPT_GENERATION_PROFILES


class BasePromptJobGenerator(abc.ABC):
def __init__(
self,
_uuid: uuid.UUID,
*,
num_prompts_per_batch: int,
profile: PromptGenerationProfile,
batch_uuids: list[uuid.UUID],
upload_urls: list[str],
) -> None:
self._uuid = _uuid
self.num_prompts_per_batch = num_prompts_per_batch
self.profile = profile
self.batch_uuids = batch_uuids
self.upload_urls = upload_urls

Expand All @@ -33,6 +34,10 @@ def docker_image_name(self) -> str: ...

@abc.abstractmethod
def executor_class(self) -> ExecutorClass: ...
"""
This is the executor class to run the generation job.
It need not be related to the actual generation profile
"""

def docker_run_options_preset(self) -> DockerRunOptionsPreset:
return "nvidia_all"
Expand All @@ -46,6 +51,9 @@ def volume(self) -> Volume | None:
def output(self) -> OutputUpload | None:
return None

def num_prompts_per_batch(self) -> int:
return PROMPT_GENERATION_PROFILES[self.profile].num_prompts

def get_job_details(self) -> OrganicJobDetails:
return OrganicJobDetails(
job_uuid=str(self._uuid),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@


class PromptJobGenerator(BasePromptJobGenerator):
EXTRA_PROMPTS_TO_ACCOUNT_FOR_SOME_INVALID_ONES = 10

def generator_version(self) -> int:
return 0

Expand All @@ -21,18 +23,22 @@ def docker_image_name(self) -> str:
return f"backenddevelopersltd/compute-horde-prompt-gen-{settings.PROMPT_GENERATION_MODEL}:v0-latest"

def executor_class(self) -> ExecutorClass:
# Currently we don't care on which executor we generate the prompts
# However, if we wanted we could pick one of the executor classes that's should be used for solving
# the prompts according to the generation profile
return ExecutorClass.always_on__llm__a6000

def docker_run_cmd(self) -> list[str]:
return [
"--quantize",
"--model_name",
settings.PROMPT_GENERATION_MODEL,
"--batch_size=250", # on A6000 we want 240 prompts generated in single file, but not all results are valid
"--batch_size",
str(self.num_prompts_per_batch() + PromptJobGenerator.EXTRA_PROMPTS_TO_ACCOUNT_FOR_SOME_INVALID_ONES),
"--num_return_sequences=1",
"--max_new_tokens=40", # 40 new tokens is enough for reasonable length prompt - 30 caused too much cut off prompts
"--number_of_prompts_per_batch",
str(self.num_prompts_per_batch),
str(self.num_prompts_per_batch()),
"--uuids",
str(",".join(map(str, self.batch_uuids))),
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,20 +57,17 @@ async def answer_prompts(
job_generator = LlmPromptsJobGenerator(workload.s3_url, seed)
await job_generator.ainit(miner_hotkey=TRUSTED_MINER_FAKE_KEY)

# TODO: Should be generated for all the llm executor classes.
# SolveWorkload/PromptSample should have a executor_class field saying which
# executor_class this sample is for.
job_uuid = job_uuid or uuid.uuid4()
job_details = OrganicJobDetails(
job_uuid=str(job_uuid),
executor_class=ExecutorClass.always_on__llm__a6000,
executor_class=workload.executor_class,
docker_image=job_generator.docker_image_name(),
docker_run_options_preset=job_generator.docker_run_options_preset(),
docker_run_cmd=job_generator.docker_run_cmd(),
total_job_timeout=(
job_generator.timeout_seconds()
+ max(
EXECUTOR_CLASS[ExecutorClass.always_on__llm__a6000].spin_up_time,
EXECUTOR_CLASS[workload.executor_class].spin_up_time,
MIN_SPIN_UP_TIME,
)
),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
TrustedMinerClient,
)
from compute_horde_validator.validator.dynamic_config import aget_config
from compute_horde_validator.validator.generation_profile import PromptGenerationProfile
from compute_horde_validator.validator.models import PromptSeries, SystemEvent
from compute_horde_validator.validator.s3 import generate_upload_url, get_public_url

Expand All @@ -26,6 +27,7 @@

async def generate_prompts(
*,
profile: PromptGenerationProfile,
create_miner_client: Callable[..., TrustedMinerClient] | None = None,
job_uuid: uuid.UUID | None = None,
wait_timeout: int | None = None,
Expand All @@ -45,13 +47,12 @@ async def generate_prompts(
job_uuid = job_uuid or uuid.uuid4()

num_batches = await aget_config("DYNAMIC_PROMPTS_SERIES_IN_A_SINGLE_GENERATION")
num_prompts_per_batch = await aget_config("DYNAMIC_NUMBER_OF_PROMPTS_IN_SERIES")

series_uuids, upload_urls, public_urls = _generate_uuids_and_urls(num_batches)

job_generator = prompt_job_generator(
job_uuid,
num_prompts_per_batch=num_prompts_per_batch,
profile=profile,
batch_uuids=series_uuids,
upload_urls=upload_urls,
)
Expand Down Expand Up @@ -86,7 +87,9 @@ async def generate_prompts(
type=SystemEvent.EventType.LLM_PROMPT_GENERATION,
subtype=SystemEvent.EventSubType.FAILURE,
long_description=f"Trusted miner failed to run prompt generation job: {e!r}",
data={},
data={
"profile": profile,
},
)
logger.warning("Failed to run organic job", exc_info=True)
return
Expand All @@ -103,6 +106,7 @@ async def generate_prompts(
"completed_at": completed_at.isoformat(),
"duration": (completed_at - started_at).total_seconds(),
"count": len(series_uuids),
"profile": profile,
},
)

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from dataclasses import dataclass
from enum import StrEnum

from compute_horde_core.executor_class import ExecutorClass
from compute_horde_validator.settings import PROMPT_JOB_GENERATOR


class PromptGenerationProfile(StrEnum):
default_a6000 = "default_a6000"
default_a100 = "default_a100"


@dataclass
class GenerationProfileSpec:
description: str
num_prompts: int


PROMPT_GENERATION_PROFILES = {
PromptGenerationProfile.default_a6000: GenerationProfileSpec(
description="The default generation profile for a6000 cards",
num_prompts=240
),
# this is a currently unused in production, fake generation profile
# that's used for testing the multi-hw support
PromptGenerationProfile.default_a100: GenerationProfileSpec(
description="The default generation profile for a100 cards",
num_prompts=777
)
}


EXECUTOR_TO_PROMPT_GENERATION_PROFILE = {
ExecutorClass.always_on__llm__a6000: PromptGenerationProfile.default_a6000,
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Generated by Django 4.2.19 on 2025-03-31 20:35

import compute_horde_core.executor_class
import compute_horde_validator.validator.generation_profile
from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('validator', '0060_remove_adminjobrequest_raw_script'),
]

operations = [
migrations.AddField(
model_name='promptseries',
name='generation_profile',
field=models.CharField(choices=[(compute_horde_validator.validator.generation_profile.PromptGenerationProfile['default_a6000'], compute_horde_validator.validator.generation_profile.PromptGenerationProfile['default_a6000']), (compute_horde_validator.validator.generation_profile.PromptGenerationProfile['default_a100'], compute_horde_validator.validator.generation_profile.PromptGenerationProfile['default_a100'])], default=compute_horde_validator.validator.generation_profile.PromptGenerationProfile['default_a6000'], max_length=255),
),
migrations.AddField(
model_name='solveworkload',
name='executor_class',
field=models.CharField(choices=[(compute_horde_core.executor_class.ExecutorClass['spin_up_4min__gpu_24gb'], compute_horde_core.executor_class.ExecutorClass['spin_up_4min__gpu_24gb']), (compute_horde_core.executor_class.ExecutorClass['always_on__gpu_24gb'], compute_horde_core.executor_class.ExecutorClass['always_on__gpu_24gb']), (compute_horde_core.executor_class.ExecutorClass['always_on__llm__a6000'], compute_horde_core.executor_class.ExecutorClass['always_on__llm__a6000'])], default=compute_horde_core.executor_class.ExecutorClass['always_on__llm__a6000'], max_length=255),
),
]
12 changes: 12 additions & 0 deletions validator/app/src/compute_horde_validator/validator/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,10 @@
from os import urandom
from typing import Self

from .generation_profile import PromptGenerationProfile
from asgiref.sync import sync_to_async
from compute_horde.executor_class import DEFAULT_EXECUTOR_CLASS
from compute_horde_core.executor_class import ExecutorClass
from compute_horde.subtensor import get_cycle_containing_block
from compute_horde.utils import MIN_VALIDATOR_STAKE
from compute_horde_core.output_upload import OutputUpload, ZipAndHttpPutUpload
Expand Down Expand Up @@ -511,6 +513,11 @@ class PromptSeries(models.Model):
s3_url = models.URLField(max_length=1000)
created_at = models.DateTimeField(default=now)
generator_version = models.PositiveSmallIntegerField()
generation_profile = models.CharField(
max_length=255,
choices=[(profile, profile) for profile in PromptGenerationProfile],
default=PromptGenerationProfile.default_a6000
)


class SolveWorkload(models.Model):
Expand All @@ -523,6 +530,11 @@ class SolveWorkload(models.Model):
s3_url = models.URLField(max_length=1000)
created_at = models.DateTimeField(default=now)
finished_at = models.DateTimeField(null=True, default=None, db_index=True)
executor_class = models.CharField(
max_length=255,
choices=[(cls, cls) for cls in ExecutorClass],
default=ExecutorClass.always_on__llm__a6000
)

def __str__(self):
return f"uuid: {self.workload_uuid} - seed: {self.seed}"
Expand Down
Loading
Loading