diff --git a/.changeset/stale-buckets-roll.md b/.changeset/stale-buckets-roll.md new file mode 100644 index 0000000..42405b8 --- /dev/null +++ b/.changeset/stale-buckets-roll.md @@ -0,0 +1,5 @@ +--- +"@bashbuddy/cli": patch +--- + +Added gemma 3, and better command structure diff --git a/apps/cli/package.json b/apps/cli/package.json index cf174fc..65c8c3e 100644 --- a/apps/cli/package.json +++ b/apps/cli/package.json @@ -38,7 +38,7 @@ "@trpc/client": "catalog:", "clipboardy": "^4.0.0", "commander": "^13.1.0", - "node-llama-cpp": "^3.6.0", + "node-llama-cpp": "^3.7.0", "superjson": "catalog:", "yaml": "^2.7.0", "zod": "catalog:" diff --git a/apps/cli/src/commands/ask.ts b/apps/cli/src/commands/ask.ts index 089ce08..9f01cb8 100644 --- a/apps/cli/src/commands/ask.ts +++ b/apps/cli/src/commands/ask.ts @@ -4,8 +4,9 @@ import chalk from "chalk"; import clipboardy from "clipboardy"; import { Command } from "commander"; -import type { LLMResponse } from "@bashbuddy/validators"; -import { processPrompt } from "@bashbuddy/agent"; +import type { LLMMessage } from "@bashbuddy/agent"; +import type { LLMContext, LLMResponse } from "@bashbuddy/validators"; +import { processPrompt, yamlPrompt } from "@bashbuddy/agent"; import { SITE_URLS } from "@bashbuddy/consts"; import { LocalLLM } from "../llms/localllm"; @@ -22,8 +23,14 @@ import { runCommandWithStream } from "../utils/runner"; export function createAskCommand(): Command { const askCommand = new Command("ask") .description("Ask a question to the AI") - .argument("", "The question to ask the AI") - .action((questionParts: string[]) => { + .argument("[question...]", "The question to ask the AI") + .action((questionParts: string[] = []) => { + // If no question parts, prompt the user + if (questionParts.length === 0) { + promptForQuestion().catch(console.error); + return; + } + // Join all parts of the question with spaces const question = questionParts.join(" "); execute(question).catch(console.error); @@ -32,6 +39,34 @@ export function createAskCommand(): Command { return askCommand; } +/** + * Prompt the user for a question if none was provided + */ +async function promptForQuestion() { + p.intro("BashBuddy"); + + const question = await p.text({ + message: "What would you like to ask?", + placeholder: "Ask for a command", + }); + + if (p.isCancel(question) || !question) { + p.cancel("Operation cancelled"); + return; + } + + await execute(question); +} + +interface ConversationState { + messages: LLMMessage[]; + context: LLMContext; + chatId: string; + llm?: LocalLLM; + isCloudMode: boolean; + revisionCount: number; +} + async function execute(question: string) { p.intro("BashBuddy"); @@ -42,6 +77,7 @@ async function execute(question: string) { ]); let commandToRun: string | undefined; + let conversationState: ConversationState; switch (mode) { case LOCAL_MODE: { @@ -61,15 +97,26 @@ async function execute(question: string) { await llm.init(); modelSpinner.stop("Model loaded!"); - const createNewOutputStream = (newUserInput: string) => - Promise.resolve(processPrompt(llm, context, newUserInput, true)); - - commandToRun = await cliInfer( - await createNewOutputStream(question), - createNewOutputStream, - 1, - false, - ); + conversationState = { + messages: [ + { + role: "system", + content: yamlPrompt(context), + }, + { + role: "user", + content: question, + }, + ], + context, + chatId: "local", + llm, + isCloudMode: false, + revisionCount: 1, + }; + + const stream = processPrompt(llm, conversationState.messages); + commandToRun = await handleInference(stream, conversationState); await llm.dispose(); @@ -79,20 +126,27 @@ async function execute(question: string) { try { const chatId = await trpc.chat.createChat.mutate(); - const createNewOutputStream = (newUserInput: string) => - trpc.chat.ask.mutate({ - input: newUserInput, - context, - chatId, - useYaml: true, - }); - - commandToRun = await cliInfer( - await createNewOutputStream(question), - createNewOutputStream, - 1, - true, - ); + conversationState = { + messages: [ + { + role: "user", + content: question, + }, + ], + context, + chatId, + isCloudMode: true, + revisionCount: 1, + }; + + const stream = await trpc.chat.ask.mutate({ + input: question, + context, + chatId, + useYaml: true, + }); + + commandToRun = await handleInference(stream, conversationState); } catch (err) { if (err instanceof TRPCClientError) { // eslint-disable-next-line @typescript-eslint/no-unsafe-member-access @@ -140,24 +194,32 @@ async function execute(question: string) { } } -async function cliInfer( +/** + * Process LLM inference and return the parsed response + */ +async function processInference( outputStream: AsyncIterable, - createNewOutputStream: ( - newUserInput: string, - ) => Promise>, - revisionCount = 1, - isCloudMode = false, -): Promise { + state: ConversationState, +): Promise { const llmSpinner = p.spinner(); llmSpinner.start("Processing..."); let finalResponse: LLMResponse; try { - finalResponse = await parseYamlResponse(outputStream, (response) => { - if (response.command) { - llmSpinner.message(response.command); - } + const { parsed, raw } = await parseYamlResponse( + outputStream, + (response) => { + if (response.command) { + llmSpinner.message(response.command); + } + }, + ); + + finalResponse = parsed; + state.messages.push({ + role: "model", + content: raw, }); } catch (err) { if (err instanceof ResponseParseError) { @@ -171,25 +233,70 @@ async function cliInfer( } llmSpinner.stop(finalResponse.command); + return finalResponse; +} - if (finalResponse.wrong) { +/** + * Display command information to the user + */ +function displayCommandInfo(response: LLMResponse): void { + if (response.wrong) { p.log.message(chalk.red("Please, limit yourself to ask for commands. ")); - return; } - if (finalResponse.explanation) { - p.log.message(chalk.dim(`Explanation: ${finalResponse.explanation}`)); + if (response.explanation) { + p.log.message(chalk.dim(`Explanation: ${response.explanation}`)); } - if (finalResponse.dangerous) { + if (response.dangerous) { p.log.message( chalk.red( `⚠️ Be careful, buddy has marked this command as dangerous. Make sure to know what it does.`, ), ); } +} + +/** + * Generate a new inference stream based on user suggestion + */ +async function generateNewStream( + suggestion: string, + state: ConversationState, +): Promise> { + // Add the suggestion to the messages + state.messages.push({ + role: "user", + content: suggestion, + }); + + // Increment revision count + state.revisionCount += 1; + // Generate a new stream based on mode + if (state.isCloudMode) { + return trpc.chat.ask.mutate({ + input: suggestion, + context: state.context, + chatId: state.chatId, + useYaml: true, + }); + } else { + if (!state.llm) { + throw new Error("LLM not initialized"); + } + return processPrompt(state.llm, state.messages); + } +} + +/** + * Handle user action on the command + */ +async function handleUserAction( + response: LLMResponse, + state: ConversationState, +): Promise { // Options for the select component const options = [ { value: "copyAndRun", label: "Copy & Run" }, @@ -198,9 +305,9 @@ async function cliInfer( ]; // Only add the suggest option if we haven't reached the revision limit in cloud mode - if (!isCloudMode || revisionCount < 5) { + if (!state.isCloudMode || state.revisionCount < 5) { options.push({ value: "suggest", label: "Suggest changes" }); - } else if (revisionCount >= 5) { + } else if (state.revisionCount >= 5) { p.log.message( chalk.yellow("You've reached the maximum of 5 revisions in cloud mode."), ); @@ -221,11 +328,11 @@ async function cliInfer( switch (action) { case "run": - return finalResponse.command; + return response.command; case "copy": { // Copy the command to clipboard try { - await clipboardy.write(finalResponse.command); + await clipboardy.write(response.command); p.log.success("Command copied to clipboard"); } catch { p.log.error("Failed to copy command to clipboard"); @@ -233,7 +340,7 @@ async function cliInfer( p.log.message( chalk.dim( - `Feel free to paste the command into your terminal: ${finalResponse.command}`, + `Feel free to paste the command into your terminal: ${response.command}`, ), ); @@ -242,18 +349,18 @@ async function cliInfer( case "copyAndRun": { // Copy the command to clipboard and run it try { - await clipboardy.write(finalResponse.command); + await clipboardy.write(response.command); p.log.success("Command copied to clipboard"); } catch { p.log.error( - `Failed to copy command to clipboard, but will still run. Feel free to copy it: ${finalResponse.command}`, + `Failed to copy command to clipboard, but will still run. Feel free to copy it: ${response.command}`, ); } - return finalResponse.command; + return response.command; } case "suggest": { - // Allow user to suggest changes (original behavior when typing) + // Allow user to suggest changes const suggestion = await p.text({ message: "What changes would you like to suggest?", placeholder: "Type your suggestion here", @@ -265,12 +372,8 @@ async function cliInfer( } if (suggestion) { - return cliInfer( - await createNewOutputStream(suggestion), - createNewOutputStream, - revisionCount + 1, - isCloudMode, - ); + const newStream = await generateNewStream(suggestion, state); + return handleInference(newStream, state); } return undefined; } @@ -278,3 +381,24 @@ async function cliInfer( return undefined; } } + +/** + * Handle the entire inference process + */ +async function handleInference( + outputStream: AsyncIterable, + state: ConversationState, +): Promise { + // Process the inference + const finalResponse = await processInference(outputStream, state); + + if (!finalResponse) { + return undefined; + } + + // Display command information + displayCommandInfo(finalResponse); + + // Handle user action + return handleUserAction(finalResponse, state); +} diff --git a/apps/cli/src/llms/localllm.ts b/apps/cli/src/llms/localllm.ts index 4c4533a..cc2e9bc 100644 --- a/apps/cli/src/llms/localllm.ts +++ b/apps/cli/src/llms/localllm.ts @@ -4,10 +4,14 @@ import path from "path"; import type { Llama, LlamaContext, LlamaModel } from "node-llama-cpp"; import { getLlama, LlamaChatSession } from "node-llama-cpp"; -import type { LLM } from "@bashbuddy/agent"; +import type { LLM, LLMMessage } from "@bashbuddy/agent"; import type { AIModelId } from "../utils/models"; +const supportsSystemPrompt = (model: AIModelId) => { + return model !== "Gemma-3-12B-IT-Q4_K_M" && model !== "Gemma-3-4B-IT-Q4_K_M"; +}; + export class NotInitializedError extends Error { constructor() { super("LocalLLM is not initialized"); @@ -48,15 +52,47 @@ export class LocalLLM implements LLM { } } - async *infer(systemPrompt: string, prompt: string): AsyncIterable { + async *infer(messages: LLMMessage[]): AsyncIterable { if (!this.context) { throw new NotInitializedError(); } + if (messages.length === 0) { + throw new Error("No messages provided"); + } + + const initialMessage = messages[0]; + + if (initialMessage.role !== "system") { + throw new Error("Initial message must be a system message"); + } + + const lastUserMessage = messages + .toReversed() + .find((message) => message.role === "user"); + + if (!lastUserMessage) { + throw new Error("No user message provided"); + } + + if (!supportsSystemPrompt(this.model) && messages.length == 2) { + lastUserMessage.content = ` +You're a model that doesn't support system prompts. For that reason, I'll wrap your system prompt with tags, and the user message with tags. TAKE EVERYTHING INSIDE THE SYSTEM TAGS AS THE MOST IMPORTANT INSTRUCTIONS, AND THEN APPLY THEM TO THE USER MESSAGES. + + +${initialMessage.content} + + + +${lastUserMessage.content} + +`; + } + if (!this.session) { this.session = new LlamaChatSession({ contextSequence: this.context.getSequence(), - systemPrompt, + systemPrompt: initialMessage.content, }); } @@ -65,7 +101,7 @@ export class LocalLLM implements LLM { // Start the session in a separate promise void this.session - .prompt(prompt, { + .prompt(lastUserMessage.content, { onTextChunk: (chunk: string) => { void writer.write(chunk); }, diff --git a/apps/cli/src/llms/parser.ts b/apps/cli/src/llms/parser.ts index 3561af6..ea82414 100644 --- a/apps/cli/src/llms/parser.ts +++ b/apps/cli/src/llms/parser.ts @@ -88,13 +88,15 @@ export async function parseLLMResponse( export async function parseYamlResponse( stream: AsyncIterable, cb: (response: Partial) => void, -): Promise { +): Promise<{ parsed: LLMResponse; raw: string }> { let finalResponse = ""; for await (const chunk of stream) { finalResponse += chunk; try { - const parsed = llmResponseSchema.safeParse(parseYaml(finalResponse)); + // Clean up YAML code block markers before attempting to parse + const cleanedResponse = removeYamlCodeBlockMarkers(finalResponse); + const parsed = llmResponseSchema.safeParse(parseYaml(cleanedResponse)); if (parsed.success) { cb(parsed.data); } @@ -103,8 +105,11 @@ export async function parseYamlResponse( } } + // Clean up YAML code block markers in the final response + const cleanedFinalResponse = removeYamlCodeBlockMarkers(finalResponse); + const parsedFinalResponse = llmResponseSchema.safeParse( - parseYaml(finalResponse), + parseYaml(cleanedFinalResponse), ); if (!parsedFinalResponse.success) { @@ -121,5 +126,18 @@ export async function parseYamlResponse( throw new ResponseParseError(); } - return parsedFinalResponse.data; + return { parsed: parsedFinalResponse.data, raw: cleanedFinalResponse }; +} + +/** + * Removes YAML code block markers from a string + */ +function removeYamlCodeBlockMarkers(text: string): string { + // Remove ```yaml at the beginning + let cleaned = text.replace(/^\s*```yaml\s*/i, ""); + + // Remove ``` at the end + cleaned = cleaned.replace(/\s*```\s*$/, ""); + + return cleaned; } diff --git a/apps/cli/src/utils/models.ts b/apps/cli/src/utils/models.ts index 4ffc9c3..44ea432 100644 --- a/apps/cli/src/utils/models.ts +++ b/apps/cli/src/utils/models.ts @@ -6,9 +6,26 @@ import type { ProgressData } from "./downloadManager"; import { DownloadManager } from "./downloadManager"; import { executeCommand } from "./shell"; -// Define model interface +export const QWEN_2_5_7B = "Qwen-2.5-7B-Instruct-Q6_K"; +export const QWEN_2_5_7B_INST = "Qwen-2.5-7B-Instruct-Q4_K_M"; +export const META_LLAMA_3_1_8B_INST = "Meta-Llama-3.1-8B-Instruct-Q4_K_M"; +export const META_LLAMA_3_1_8B_INST_Q8 = "Meta-Llama-3.1-8B-Instruct-Q8_0"; +export const GEMMA_3_4B_IT = "Gemma-3-4B-IT-Q4_K_M"; +export const GEMMA_3_12B_IT = "Gemma-3-12B-IT-Q4_K_M"; + +export const ALL_MODEL_IDS = [ + QWEN_2_5_7B, + QWEN_2_5_7B_INST, + META_LLAMA_3_1_8B_INST, + META_LLAMA_3_1_8B_INST_Q8, + GEMMA_3_4B_IT, + GEMMA_3_12B_IT, +] as const; + +export type AIModelId = (typeof ALL_MODEL_IDS)[number]; + export interface AIModel { - id: string; + id: AIModelId; name: string; downloadUrl: string; size: string; @@ -21,7 +38,24 @@ export interface AIModel { // Available models export const availableModels: AIModel[] = [ { - id: "Meta-Llama-3.1-8B-Instruct-Q4_K_M", + id: QWEN_2_5_7B, + name: "Qwen 2.5 7B Q6", + downloadUrl: + "https://huggingface.co/bartowski/Qwen2.5-7B-Instruct-GGUF/resolve/main/Qwen2.5-7B-Instruct-Q6_K.gguf", + size: "6.65 GB", + requiredRAM: 10, + }, + { + id: QWEN_2_5_7B_INST, + name: "Qwen 2.5 7B Q4", + downloadUrl: + "https://huggingface.co/bartowski/Qwen2.5-7B-Instruct-GGUF/resolve/main/Qwen2.5-7B-Instruct-Q4_K_M.gguf", + size: "4.58 GB", + requiredRAM: 8, + recommended: true, + }, + { + id: META_LLAMA_3_1_8B_INST, name: "Llama 3.1 8B Q4", downloadUrl: "https://huggingface.co/bartowski/Meta-Llama-3.1-8B-Instruct-GGUF/resolve/main/Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf", @@ -30,7 +64,7 @@ export const availableModels: AIModel[] = [ recommended: true, }, { - id: "Meta-Llama-3.1-8B-Instruct-Q8_0", + id: META_LLAMA_3_1_8B_INST_Q8, name: "Llama 3.1 8B Q8", downloadUrl: "https://huggingface.co/bartowski/Meta-Llama-3.1-8B-Instruct-GGUF/resolve/main/Meta-Llama-3.1-8B-Instruct-Q8_0.gguf", @@ -38,25 +72,23 @@ export const availableModels: AIModel[] = [ requiredRAM: 12, }, - // TODO: Waiting on update from node-llama-cpp (https://github.com/withcatai/node-llama-cpp/issues/440) - // { - // id: "Gemma-3-4B-IT-Q4_K_M", - // name: "Gemma 3 4B Q4", - // description: "Less weight, faster, but less accurate", - // downloadUrl: - // "https://huggingface.co/unsloth/gemma-3-4b-it-GGUF/resolve/main/gemma-3-4b-it-Q4_K_M.gguf", - // size: "2.4 GB", - // requiredRAM: 6, - // }, - // { - // id: "Gemma-3-12B-IT-Q4_K_M", - // name: "Gemma 3 12B Q4", - // description: "Heavier, slower, but more accurate", - // downloadUrl: - // "https://huggingface.co/unsloth/gemma-3-12b-it-GGUF/resolve/main/gemma-3-12b-it-Q4_K_M.gguf", - // size: "7.3 GB", - // requiredRAM: 12, - // }, + { + id: GEMMA_3_4B_IT, + name: "Gemma 3 4B Q4", + downloadUrl: + "https://huggingface.co/unsloth/gemma-3-4b-it-GGUF/resolve/main/gemma-3-4b-it-Q4_K_M.gguf", + size: "2.4 GB", + requiredRAM: 4, + }, + { + id: GEMMA_3_12B_IT, + name: "Gemma 3 12B Q4", + downloadUrl: + "https://huggingface.co/unsloth/gemma-3-12b-it-GGUF/resolve/main/gemma-3-12b-it-Q4_K_M.gguf", + size: "7.3 GB", + requiredRAM: 12, + }, + // It's fucking stupid // { // id: "Llama-3.2-3B-Instruct-Q4_K_M", @@ -78,26 +110,7 @@ export const availableModels: AIModel[] = [ // size: "9.05 GB", // requiredRAM: 16, // }, - { - id: "Qwen-2.5-7B-Instruct-Q6_K", - name: "Qwen 2.5 7B Q6", - downloadUrl: - "https://huggingface.co/bartowski/Qwen2.5-7B-Instruct-GGUF/resolve/main/Qwen2.5-7B-Instruct-Q6_K.gguf", - size: "6.65 GB", - requiredRAM: 10, - }, - { - id: "Qwen-2.5-7B-Instruct-Q4_K_M", - name: "Qwen 2.5 7B Q4", - downloadUrl: - "https://huggingface.co/bartowski/Qwen2.5-7B-Instruct-GGUF/resolve/main/Qwen2.5-7B-Instruct-Q4_K_M.gguf", - size: "4.58 GB", - requiredRAM: 8, - recommended: true, - }, -]; - -export type AIModelId = (typeof availableModels)[number]["id"]; +] as const; // Hardware acceleration types export interface HardwareAcceleration { diff --git a/apps/landing/src/components/Header.svelte b/apps/landing/src/components/Header.svelte index c6d7004..150938c 100644 --- a/apps/landing/src/components/Header.svelte +++ b/apps/landing/src/components/Header.svelte @@ -9,7 +9,8 @@ import { SITE_URLS } from "@bashbuddy/consts"; - const BANNER_STORAGE_KEY = "bashbuddy-qwen-banner-closed"; + // const BANNER_STORAGE_KEY = "bashbuddy-qwen-banner-closed"; + const BANNER_STORAGE_KEY = "bashbuddy-gemma-3-banner-closed"; let isMenuOpen = $state(false); let isBannerVisible = $state(false); @@ -104,12 +105,11 @@ >

- We've added Qwen 2.5 models to BashBuddy Local! + We've added Gemma 3 models to BashBuddy Local! - Read more ➡️Read more ➡️