changed system to pseudo-system message header for better optimzed output, added temp setting, removed raw setting (now unnecessary)

This commit is contained in:
Nathan Hedge 2024-03-11 13:27:51 -05:00
parent 72003d9d52
commit cc0b3cc20f
No known key found for this signature in database
GPG Key ID: 1ADBA36D6E304C5C
2 changed files with 36 additions and 27 deletions

View File

@ -2,7 +2,7 @@
"name": "ollama-autocoder", "name": "ollama-autocoder",
"displayName": "Ollama Autocoder", "displayName": "Ollama Autocoder",
"description": "A simple to use Ollama autocompletion engine with options exposed and streaming functionality", "description": "A simple to use Ollama autocompletion engine with options exposed and streaming functionality",
"version": "0.0.7", "version": "0.0.8",
"icon": "icon.png", "icon": "icon.png",
"publisher": "10nates", "publisher": "10nates",
"license": "MIT", "license": "MIT",
@ -56,19 +56,15 @@
"default": "openhermes2.5-mistral:7b-q4_K_M", "default": "openhermes2.5-mistral:7b-q4_K_M",
"description": "The model to use for generating completions" "description": "The model to use for generating completions"
}, },
"ollama-autocoder.raw input": { "ollama-autocoder.message header": {
"type": "boolean",
"default": false,
"description": "Prompt the model without formatting. Disables system message. Turn this on if you are having trouble with a model falling out of coding mode."
},
"ollama-autocoder.system message": {
"type": "string", "type": "string",
"default": "You are a code autocompletion engine. Respond with a continuation of the code provided and nothing else. Code should not be in a code block. Anything that is not code should be written as a code comment.", "editPresentation": "multilineText",
"description": "The system message to use for code completions. Type DEFAULT for Makefile." "default": "The following is a complete {LANG} file named {FILE_NAME} in the project {PROJECT_NAME}. Anything NOT code is written as a CODE COMMENT. \n\n```\n",
"description": "Pseudo-system prompt, optimized for code completion. It is recommended to keep the format the same if modified. Leave blank for no formatting (raw)."
}, },
"ollama-autocoder.max tokens predicted": { "ollama-autocoder.max tokens predicted": {
"type": "integer", "type": "integer",
"default": 500, "default": 1000,
"description": "The maximum number of tokens generated by the model." "description": "The maximum number of tokens generated by the model."
}, },
"ollama-autocoder.prompt window size": { "ollama-autocoder.prompt window size": {
@ -88,7 +84,7 @@
}, },
"ollama-autocoder.preview max tokens": { "ollama-autocoder.preview max tokens": {
"type": "integer", "type": "integer",
"default": 10, "default": 50,
"description": "The maximum number of tokens generated by the model for the response preview. Typically not reached as the preview stops on newline. Recommended to keep very low due to computational cost." "description": "The maximum number of tokens generated by the model for the response preview. Typically not reached as the preview stops on newline. Recommended to keep very low due to computational cost."
}, },
"ollama-autocoder.preview delay": { "ollama-autocoder.preview delay": {
@ -101,6 +97,11 @@
"default": true, "default": true,
"description": "Ollama continues autocompletion after what is previewed inline. Disabling disables that feature as some may find it irritating. Multiline completion is still accessible through the shortcut even after disabling." "description": "Ollama continues autocompletion after what is previewed inline. Disabling disables that feature as some may find it irritating. Multiline completion is still accessible through the shortcut even after disabling."
},
"ollama-autocoder.temperature": {
"type": "number",
"default": 0.5,
"description": "Temperature of the model. It is recommended to set it lower than you would for dialogue."
} }
} }
}, },

View File

@ -6,10 +6,10 @@ import axios from "axios";
let VSConfig: vscode.WorkspaceConfiguration; let VSConfig: vscode.WorkspaceConfiguration;
let apiEndpoint: string; let apiEndpoint: string;
let apiModel: string; let apiModel: string;
let apiSystemMessage: string | undefined; let apiMessageHeader: string;
let apiTemperature: number;
let numPredict: number; let numPredict: number;
let promptWindowSize: number; let promptWindowSize: number;
let rawInput: boolean | undefined;
let completionKeys: string; let completionKeys: string;
let responsePreview: boolean | undefined; let responsePreview: boolean | undefined;
let responsePreviewMaxTokens: number; let responsePreviewMaxTokens: number;
@ -20,17 +20,15 @@ function updateVSConfig() {
VSConfig = vscode.workspace.getConfiguration("ollama-autocoder"); VSConfig = vscode.workspace.getConfiguration("ollama-autocoder");
apiEndpoint = VSConfig.get("endpoint") || "http://localhost:11434/api/generate"; apiEndpoint = VSConfig.get("endpoint") || "http://localhost:11434/api/generate";
apiModel = VSConfig.get("model") || "openhermes2.5-mistral:7b-q4_K_M"; // The model I tested with apiModel = VSConfig.get("model") || "openhermes2.5-mistral:7b-q4_K_M"; // The model I tested with
apiSystemMessage = VSConfig.get("system message"); apiMessageHeader = VSConfig.get("message header") || "";
numPredict = VSConfig.get("max tokens predicted") || 500; numPredict = VSConfig.get("max tokens predicted") || 1000;
promptWindowSize = VSConfig.get("prompt window size") || 2000; promptWindowSize = VSConfig.get("prompt window size") || 2000;
rawInput = VSConfig.get("raw input");
completionKeys = VSConfig.get("completion keys") || " "; completionKeys = VSConfig.get("completion keys") || " ";
responsePreview = VSConfig.get("response preview"); responsePreview = VSConfig.get("response preview");
responsePreviewMaxTokens = VSConfig.get("preview max tokens") || 10; responsePreviewMaxTokens = VSConfig.get("preview max tokens") || 50;
responsePreviewDelay = VSConfig.get("preview delay") || 0; // Must be || 0 instead of || [default] because of truthy responsePreviewDelay = VSConfig.get("preview delay") || 0; // Must be || 0 instead of || [default] because of truthy
continueInline = VSConfig.get("continue inline"); continueInline = VSConfig.get("continue inline");
apiTemperature = VSConfig.get("temperature") || 0.5;
if (apiSystemMessage == "DEFAULT" || rawInput) apiSystemMessage = undefined;
} }
updateVSConfig(); updateVSConfig();
@ -38,6 +36,15 @@ updateVSConfig();
// No need for restart for any of these settings // No need for restart for any of these settings
vscode.workspace.onDidChangeConfiguration(updateVSConfig); vscode.workspace.onDidChangeConfiguration(updateVSConfig);
// Give model additional information
function messageHeaderSub(document: vscode.TextDocument) {
const sub = apiMessageHeader
.replace("{LANG}", document.languageId)
.replace("{FILE_NAME}", document.fileName)
.replace("{PROJECT_NAME}", vscode.workspace.name || "(undefined)");
return sub;
}
// internal function for autocomplete, not directly exposed // internal function for autocomplete, not directly exposed
async function autocompleteCommand(textEditor: vscode.TextEditor, cancellationToken?: vscode.CancellationToken) { async function autocompleteCommand(textEditor: vscode.TextEditor, cancellationToken?: vscode.CancellationToken) {
const document = textEditor.document; const document = textEditor.document;
@ -72,12 +79,13 @@ async function autocompleteCommand(textEditor: vscode.TextEditor, cancellationTo
// Make a request to the ollama.ai REST API // Make a request to the ollama.ai REST API
const response = await axios.post(apiEndpoint, { const response = await axios.post(apiEndpoint, {
model: apiModel, // Change this to the model you want to use model: apiModel, // Change this to the model you want to use
prompt: prompt, prompt: messageHeaderSub(textEditor.document) + prompt,
stream: true, stream: true,
system: apiSystemMessage, raw: true,
raw: rawInput,
options: { options: {
num_predict: numPredict num_predict: numPredict,
temperature: apiTemperature,
stop: ["```"]
} }
}, { }, {
cancelToken: axiosCancelToken, cancelToken: axiosCancelToken,
@ -174,13 +182,13 @@ async function provideCompletionItems(document: vscode.TextDocument, position: v
prompt = prompt.substring(Math.max(0, prompt.length - promptWindowSize), prompt.length); prompt = prompt.substring(Math.max(0, prompt.length - promptWindowSize), prompt.length);
const response_preview = await axios.post(apiEndpoint, { const response_preview = await axios.post(apiEndpoint, {
model: apiModel, // Change this to the model you want to use model: apiModel, // Change this to the model you want to use
prompt: prompt, prompt: messageHeaderSub(document) + prompt,
stream: false, stream: false,
system: apiSystemMessage, raw: true,
raw: rawInput,
options: { options: {
num_predict: responsePreviewMaxTokens, // reduced compute max num_predict: responsePreviewMaxTokens, // reduced compute max
stop: ['\n'] temperature: apiTemperature,
stop: ['\n', '```']
} }
}, { }, {
cancelToken: new axios.CancelToken((c) => { cancelToken: new axios.CancelToken((c) => {