Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
49f3f25
abstract Summarizer and concrete OpenAISummarizer
bobular Sep 16, 2025
7cc64d3
first attempt at ClaudeSummarizer
bobular Sep 16, 2025
6b2e379
switch to Claude and tweaks
bobular Sep 17, 2025
fa12327
deprecate OPENAI_-prefixed daily cost env vars
bobular Sep 17, 2025
7d7ba6f
add more retries for Claude
bobular Sep 18, 2025
98330e5
Anthropic SDK upgrade to 2.9.0; use Sonnet 4.5; tweak prompt for sent…
bobular Oct 24, 2025
9e929e0
added embedding stuff; not tested; no cost monitoring
bobular Oct 24, 2025
6c5818f
tidy up token usage
bobular Oct 24, 2025
57617da
merge from master; fix import paths for latest OpenAI SDK
bobular Oct 24, 2025
bf83d47
better handling of AI API 500 responses and configurable MAX_CONCURRE…
bobular Oct 27, 2025
410732a
rewrite retry logic for Java 11 (was 12+)
bobular Oct 27, 2025
95757e6
merged in API robustness updates from ai-expression-claude branch
bobular Oct 27, 2025
a84d34d
Merge remote-tracking branch 'origin/master' into ai-expression-topic…
bobular Nov 8, 2025
69e8dc1
make topic embedding configurable by hard-coding, default is off
bobular Nov 8, 2025
77b6adc
topic embeddings now configured by reporter request payload
bobular Nov 8, 2025
f6ab3ba
add Claude extended thinking - off by default
bobular Nov 9, 2025
48a0bad
Merge remote-tracking branch 'origin/master' into ai-expression-topic…
bobular Feb 14, 2026
3b27f64
workaround for paralog_count
bobular Feb 14, 2026
5f1bc6d
Merge remote-tracking branch 'origin/master' into ai-expression-topic…
Feb 14, 2026
a7499a9
add correct costings for Claude and use new provider-agnostic prop names
bobular Feb 14, 2026
d2b88f9
add claude api key lookup from secrets in prod
bobular Feb 14, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions Model/lib/conifer/roles/conifer/vars/ApiCommon/default.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,10 @@ modelprop:
JBROWSE_SERVICE_URL: "/{{ webapp_ctx }}/service/jbrowse"
AI_EXPRESSION_CACHE_DIR: "/var/www/Common/ai-expr-cache"
AI_EXPRESSION_QUALTRICS_ID: SV_38C4ZX1JxLi2SEe
OPENAI_MAX_DAILY_AI_EXPRESSION_DOLLAR_COST: 33
OPENAI_DOLLAR_COST_PER_1M_AI_INPUT_TOKENS: 2.5
OPENAI_DOLLAR_COST_PER_1M_AI_OUTPUT_TOKENS: 10
MAX_DAILY_AI_EXPRESSION_DOLLAR_COST: 33
# Claude Sonnet 4.5 costs from here: https://platform.claude.com/docs/en/about-claude/pricing
DOLLAR_COST_PER_1M_AI_INPUT_TOKENS: 3
DOLLAR_COST_PER_1M_AI_OUTPUT_TOKENS: 15

user_datasets_uploadTypes_env_map:
w: "genelist"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ modelprop:
GOOGLE_MAPS_API_KEY: "{{ lookup('euparc', 'attr=api_key xpath=sites/google_maps default=NOKEY') }}"
COMMUNITY_SITE: "//{{ community_env_map[prefix]|default(community_env_map['default']) }}"
OPENAI_API_KEY: "{{ lookup('euparc', 'attr=api_key xpath=sites/openai default=NOKEY') }}"
CLAUDE_API_KEY: "{{ lookup('euparc', 'attr=api_key xpath=sites/claude default=NOKEY') }}"


# the below extends the w_ q_ prefix pattern used for workspace_env_map, which
Expand Down
5 changes: 5 additions & 0 deletions Model/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,11 @@
<artifactId>openai-java</artifactId>
</dependency>

<dependency>
<groupId>com.anthropic</groupId>
<artifactId>anthropic-java</artifactId>
</dependency>

</dependencies>

</project>
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
import org.apidb.apicommon.model.report.ai.expression.DailyCostMonitor;
import org.apidb.apicommon.model.report.ai.expression.GeneRecordProcessor;
import org.apidb.apicommon.model.report.ai.expression.GeneRecordProcessor.GeneSummaryInputs;
import org.apidb.apicommon.model.report.ai.expression.ClaudeSummarizer;
//import org.apidb.apicommon.model.report.ai.expression.OpenAISummarizer;
import org.apidb.apicommon.model.report.ai.expression.Summarizer;
import org.gusdb.wdk.model.WdkModelException;
import org.gusdb.wdk.model.WdkServiceTemporarilyUnavailableException;
Expand All @@ -27,13 +29,44 @@
import org.json.JSONException;
import org.json.JSONObject;

/**
* Reporter that generates AI-powered gene expression summaries using LLM models.
*
* <p>This reporter analyzes expression data across multiple experiments for a single gene
* and generates natural language summaries of expression patterns and biological significance.
* Results are cached to minimize API costs and response times.</p>
*
* <h3>Configuration (JSON request payload)</h3>
* <pre>
* {
* "populateIfNotPresent": true|false, // If true, generate summary if not cached (default: false)
* "makeTopicEmbeddings": true|false // If true, generate embedding vectors for topics (default: false)
* }
* </pre>
*
* <h3>Cache Invalidation Warning</h3>
* <p><strong>IMPORTANT:</strong> Changing the {@code makeTopicEmbeddings} setting will invalidate
* the entire cache for all genes, as this value is included in the cache digest. To avoid costly
* cache regeneration, choose a setting and stick with it across requests. Only change this value
* when you intentionally want to regenerate all summaries with or without embeddings.</p>
*
* <h3>Model Configuration</h3>
* <p>The AI model and embedding model are hardcoded in the summarizer implementations
* ({@link ClaudeSummarizer}, {@link org.apidb.apicommon.model.report.ai.expression.OpenAISummarizer}).
* Changing models will also invalidate the cache.</p>
*/
public class SingleGeneAiExpressionReporter extends AbstractReporter {

private static final int MAX_RESULT_SIZE = 1; // one gene at a time for now

private static final String POPULATION_MODE_PROP_KEY = "populateIfNotPresent";
private static final String AI_MAX_CONCURRENT_REQUESTS_PROP_KEY = "AI_MAX_CONCURRENT_REQUESTS";
private static final int DEFAULT_MAX_CONCURRENT_REQUESTS = 10;
private static final String MAKE_TOPIC_EMBEDDINGS_PROP_KEY = "makeTopicEmbeddings";

private boolean _populateIfNotPresent;
private int _maxConcurrentRequests;
private boolean _makeTopicEmbeddings;
private DailyCostMonitor _costMonitor;

@Override
Expand All @@ -42,6 +75,15 @@ public Reporter configure(JSONObject config) throws ReporterConfigException, Wdk
// assign cache mode
_populateIfNotPresent = config.optBoolean(POPULATION_MODE_PROP_KEY, false);

// assign topic embeddings flag
_makeTopicEmbeddings = config.optBoolean(MAKE_TOPIC_EMBEDDINGS_PROP_KEY, false);

// read max concurrent requests from model properties or use default
String maxConcurrentRequestsStr = _wdkModel.getProperties().get(AI_MAX_CONCURRENT_REQUESTS_PROP_KEY);
_maxConcurrentRequests = maxConcurrentRequestsStr != null
? Integer.parseInt(maxConcurrentRequestsStr)
: DEFAULT_MAX_CONCURRENT_REQUESTS;

// instantiate cost monitor
_costMonitor = new DailyCostMonitor(_wdkModel);

Expand All @@ -52,7 +94,7 @@ public Reporter configure(JSONObject config) throws ReporterConfigException, Wdk
" should only be assigned to " + geneRecordClass.getFullName());
}

// check result size; limit to small results due to OpenAI cost
// check result size; limit to small results due to AI API cost
if (_baseAnswer.getResultSizeFactory().getResultSize() > MAX_RESULT_SIZE) {
throw new ReporterConfigException("This reporter cannot be called with results of size greater than " + MAX_RESULT_SIZE);
}
Expand All @@ -79,9 +121,11 @@ protected void write(OutputStream out) throws IOException, WdkModelException {
// open summary cache (manages persistence of expression data)
AiExpressionCache cache = AiExpressionCache.getInstance(_wdkModel);

// create summarizer (interacts with OpenAI)
Summarizer summarizer = new Summarizer(_wdkModel, _costMonitor);

// create summarizer (interacts with Claude)
ClaudeSummarizer summarizer = new ClaudeSummarizer(_wdkModel, _costMonitor, _makeTopicEmbeddings);
// or alternatively use OpenAI (with the appropriate import)
// OpenAISummarizer summarizer = new OpenAISummarizer(_wdkModel, _costMonitor, _makeTopicEmbeddings);

// open record and output streams
try (RecordStream recordStream = RecordStreamFactory.getRecordStream(_baseAnswer, List.of(), tables);
BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out))) {
Expand All @@ -93,12 +137,13 @@ protected void write(OutputStream out) throws IOException, WdkModelException {

// create summary inputs
GeneSummaryInputs summaryInputs =
GeneRecordProcessor.getSummaryInputsFromRecord(record, Summarizer.OPENAI_CHAT_MODEL.toString(),
GeneRecordProcessor.getSummaryInputsFromRecord(record, ClaudeSummarizer.CLAUDE_MODEL.toString(),
Summarizer.EMBEDDING_MODEL.asString(), _makeTopicEmbeddings, ClaudeSummarizer.USE_EXTENDED_THINKING,
Summarizer::getExperimentMessage, Summarizer::getFinalSummaryMessage);

// fetch summary, producing if necessary and requested
JSONObject expressionSummary = _populateIfNotPresent
? cache.populateSummary(summaryInputs, summarizer::describeExperiment, summarizer::summarizeExperiments)
? cache.populateSummary(summaryInputs, summarizer::describeExperiment, summarizer::summarizeExperiments, _maxConcurrentRequests)
: cache.readSummary(summaryInputs);

// join entries with commas
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,6 @@ public class AiExpressionCache {
private static Logger LOG = Logger.getLogger(AiExpressionCache.class);

// parallel processing
private static final int MAX_CONCURRENT_EXPERIMENT_LOOKUPS_PER_REQUEST = 10;
private static final long VISIT_ENTRY_LOCK_MAX_WAIT_MILLIS = 50;

// cache location
Expand Down Expand Up @@ -317,18 +316,20 @@ private static Optional<JSONObject> readCachedData(Path entryDir) {
* @param summaryInputs gene summary inputs
* @param experimentDescriber function to describe an experiment
* @param experimentSummarizer function to summarize experiments into an expression summary
* @param maxConcurrentRequests maximum number of concurrent experiment lookups
* @return expression summary (will always be a cache hit)
*/
public JSONObject populateSummary(GeneSummaryInputs summaryInputs,
FunctionWithException<ExperimentInputs, CompletableFuture<JSONObject>> experimentDescriber,
BiFunctionWithException<String, List<JSONObject>, JSONObject> experimentSummarizer) {
BiFunctionWithException<String, List<JSONObject>, JSONObject> experimentSummarizer,
int maxConcurrentRequests) {
try {
return _cache.populateAndProcessContent(summaryInputs.getGeneId(),

// populator
entryDir -> {
// first populate each dataset entry as needed and collect experiment descriptors
List<JSONObject> experiments = populateExperiments(summaryInputs.getExperimentsWithData(), experimentDescriber);
List<JSONObject> experiments = populateExperiments(summaryInputs.getExperimentsWithData(), experimentDescriber, maxConcurrentRequests);

// sort them most-interesting first so that the "Other" section will be filled
// in that order (and also to give the AI the data in a sensible order)
Expand Down Expand Up @@ -362,14 +363,16 @@ public JSONObject populateSummary(GeneSummaryInputs summaryInputs,
*
* @param experimentData experiment inputs
* @param experimentDescriber function to describe an experiment
* @param maxConcurrentRequests maximum number of concurrent experiment lookups
* @return list of cached experiment descriptions
* @throws Exception if unable to generate descriptions or store
*/
private List<JSONObject> populateExperiments(List<ExperimentInputs> experimentData,
FunctionWithException<ExperimentInputs, CompletableFuture<JSONObject>> experimentDescriber) throws Exception {
FunctionWithException<ExperimentInputs, CompletableFuture<JSONObject>> experimentDescriber,
int maxConcurrentRequests) throws Exception {

// use a thread for each experiment, up to a reasonable max
int threadPoolSize = Math.min(MAX_CONCURRENT_EXPERIMENT_LOOKUPS_PER_REQUEST, experimentData.size());
int threadPoolSize = Math.min(maxConcurrentRequests, experimentData.size());

ExecutorService exec = Executors.newFixedThreadPool(threadPoolSize);
try {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
package org.apidb.apicommon.model.report.ai.expression;

import java.time.Duration;
import java.util.concurrent.CompletableFuture;

import org.gusdb.wdk.model.WdkModel;
import org.gusdb.wdk.model.WdkModelException;

import com.anthropic.client.AnthropicClientAsync;
import com.anthropic.client.okhttp.AnthropicOkHttpClientAsync;
import com.anthropic.models.messages.MessageCreateParams;
import com.anthropic.models.messages.Model;
import com.openai.models.ResponseFormatJsonSchema.JsonSchema.Schema;

public class ClaudeSummarizer extends Summarizer {

public static final Model CLAUDE_MODEL = Model.CLAUDE_SONNET_4_5_20250929;
public static final boolean USE_EXTENDED_THINKING = false;

private static final String CLAUDE_API_KEY_PROP_NAME = "CLAUDE_API_KEY";

private final AnthropicClientAsync _claudeClient;

public ClaudeSummarizer(WdkModel wdkModel, DailyCostMonitor costMonitor, boolean makeTopicEmbeddings) throws WdkModelException {
super(wdkModel, costMonitor, makeTopicEmbeddings);

String apiKey = wdkModel.getProperties().get(CLAUDE_API_KEY_PROP_NAME);
if (apiKey == null) {
throw new WdkModelException("WDK property '" + CLAUDE_API_KEY_PROP_NAME + "' has not been set.");
}

_claudeClient = AnthropicOkHttpClientAsync.builder()
.apiKey(apiKey)
.maxRetries(32) // Handle 429 errors
.checkJacksonVersionCompatibility(false)
.build();
}

@Override
protected CompletableFuture<String> callApiForJson(String prompt, Schema schema) {
// Convert JSON schema to natural language description for Claude
String jsonFormatInstructions = convertSchemaToPromptInstructions(schema);

String enhancedPrompt = prompt + "\n\n" + jsonFormatInstructions;

MessageCreateParams.Builder requestBuilder = MessageCreateParams.builder()
.model(CLAUDE_MODEL)
.maxTokens((long) MAX_RESPONSE_TOKENS)
.system(SYSTEM_MESSAGE)
.addUserMessage(enhancedPrompt);

if (USE_EXTENDED_THINKING) {
requestBuilder.enabledThinking(1024);
}

MessageCreateParams request = requestBuilder.build();

return retryOnOverload(
() -> _claudeClient.messages().create(request),
e -> e instanceof com.anthropic.errors.InternalServerException,
"Claude API call"
).thenApply(response -> {
// Convert Claude usage to TokenUsage for cost monitoring
com.anthropic.models.messages.Usage claudeUsage = response.usage();
TokenUsage tokenUsage = TokenUsage.builder()
.promptTokens(claudeUsage.inputTokens())
.completionTokens(claudeUsage.outputTokens())
.build();

_costMonitor.updateCost(tokenUsage);

// Extract text from content blocks using stream API
String rawText = response.content().stream()
.flatMap(contentBlock -> contentBlock.text().stream())
.map(textBlock -> textBlock.text())
.findFirst()
.orElseThrow(() -> new RuntimeException("No text content found in Claude response"));

// Strip JSON markdown formatting if present
return stripJsonMarkdown(rawText);
});
}

@Override
protected void updateCostMonitor(Object apiResponse) {
// Claude response handling is done in callApiForJson
}

private String stripJsonMarkdown(String text) {
String trimmed = text.trim();

// Remove ```json and ``` markdown formatting
if (trimmed.startsWith("```json")) {
trimmed = trimmed.substring(7); // Remove "```json"
} else if (trimmed.startsWith("```")) {
trimmed = trimmed.substring(3); // Remove "```"
}

if (trimmed.endsWith("```")) {
trimmed = trimmed.substring(0, trimmed.length() - 3); // Remove trailing "```"
}

return trimmed.trim();
}

private String convertSchemaToPromptInstructions(Schema schema) {
// Convert OpenAI JSON schema to Claude-friendly format instructions
if (schema == experimentResponseSchema) {
return "Respond in valid JSON format matching this exact structure:\n" +
"{\n" +
" \"one_sentence_summary\": \"string describing gene expression\",\n" +
" \"biological_importance\": \"integer 0-5\",\n" +
" \"confidence\": \"integer 0-5\",\n" +
" \"experiment_keywords\": [\"array\", \"of\", \"strings\"],\n" +
" \"notes\": \"string with additional context\"\n" +
"}";
} else if (schema == finalResponseSchema) {
return "Respond in valid JSON format matching this exact structure:\n" +
"{\n" +
" \"headline\": \"string summarizing key results\",\n" +
" \"one_paragraph_summary\": \"string with ~100 words\",\n" +
" \"topics\": [\n" +
" {\n" +
" \"headline\": \"string summarizing topic\",\n" +
" \"one_sentence_summary\": \"string describing topic results\",\n" +
" \"dataset_ids\": [\"array\", \"of\", \"dataset_id\", \"strings\"]\n" +
" }\n" +
" ]\n" +
"}";
} else {
return "Respond in valid JSON format.";
}
}
}
Loading