input was always raw on response preview (wrong), added response preview max tokens setting

This commit is contained in:
Nathan Hedge 2024-01-08 20:21:32 -06:00
parent 5153544346
commit e254010ff4
No known key found for this signature in database
GPG Key ID: 1ADBA36D6E304C5C
2 changed files with 9 additions and 2 deletions

View File

@ -85,6 +85,11 @@
"type": "boolean",
"default": true,
"description": "Inline completion label will be the first line of response. Max is 10 tokens, but this is unlikely to be reached. If the first line is empty, the default label will be used. Not streamable, disable on slow devices."
},
"ollama-autocoder.preview max tokens": {
"type": "integer",
"default": 10,
"description": "The maximum number of tokens generated by the model for the response preview. Typically not reached as the preview stops on newline. Recommended to keep very low due to computational cost."
}
}
},

View File

@ -12,6 +12,7 @@ let promptWindowSize: number;
let rawInput: boolean | undefined;
let completionKeys: string;
let responsePreview: boolean | undefined;
let responsePreviewMaxTokens: number;
function updateVSConfig() {
VSConfig = vscode.workspace.getConfiguration("ollama-autocoder");
@ -23,6 +24,7 @@ function updateVSConfig() {
rawInput = VSConfig.get("raw input");
completionKeys = VSConfig.get("completion keys") || " ";
responsePreview = VSConfig.get("response preview");
responsePreviewMaxTokens = VSConfig.get("preview max tokens") || 10;
if (apiSystemMessage == "DEFAULT" || rawInput) apiSystemMessage = undefined;
}
@ -176,9 +178,9 @@ function activate(context: vscode.ExtensionContext) {
prompt: prompt,
stream: false,
system: apiSystemMessage,
raw: true,
raw: rawInput,
options: {
num_predict: 10, // reduced compute max. Yes, I know it's a constant. Maybe an option in the future but might confuse people.
num_predict: responsePreviewMaxTokens, // reduced compute max
stop: ['\n']
}
}, {