diff --git a/package.json b/package.json index eaec156..39d0e35 100644 --- a/package.json +++ b/package.json @@ -85,6 +85,11 @@ "type": "boolean", "default": true, "description": "Inline completion label will be the first line of response. Max is 10 tokens, but this is unlikely to be reached. If the first line is empty, the default label will be used. Not streamable, disable on slow devices." + }, + "ollama-autocoder.preview max tokens": { + "type": "integer", + "default": 10, + "description": "The maximum number of tokens generated by the model for the response preview. Typically not reached as the preview stops on newline. Recommended to keep very low due to computational cost." } } }, diff --git a/src/extension.ts b/src/extension.ts index af01506..08da8e4 100644 --- a/src/extension.ts +++ b/src/extension.ts @@ -12,6 +12,7 @@ let promptWindowSize: number; let rawInput: boolean | undefined; let completionKeys: string; let responsePreview: boolean | undefined; +let responsePreviewMaxTokens: number; function updateVSConfig() { VSConfig = vscode.workspace.getConfiguration("ollama-autocoder"); @@ -23,6 +24,7 @@ function updateVSConfig() { rawInput = VSConfig.get("raw input"); completionKeys = VSConfig.get("completion keys") || " "; responsePreview = VSConfig.get("response preview"); + responsePreviewMaxTokens = VSConfig.get("preview max tokens") || 10; if (apiSystemMessage == "DEFAULT" || rawInput) apiSystemMessage = undefined; } @@ -176,9 +178,9 @@ function activate(context: vscode.ExtensionContext) { prompt: prompt, stream: false, system: apiSystemMessage, - raw: true, + raw: rawInput, options: { - num_predict: 10, // reduced compute max. Yes, I know it's a constant. Maybe an option in the future but might confuse people. + num_predict: responsePreviewMaxTokens, // reduced compute max stop: ['\n'] } }, {