changed system to pseudo-system message header for better optimzed output, added temp setting, removed raw setting (now unnecessary)
This commit is contained in:
parent
72003d9d52
commit
cc0b3cc20f
23
package.json
23
package.json
|
@ -2,7 +2,7 @@
|
||||||
"name": "ollama-autocoder",
|
"name": "ollama-autocoder",
|
||||||
"displayName": "Ollama Autocoder",
|
"displayName": "Ollama Autocoder",
|
||||||
"description": "A simple to use Ollama autocompletion engine with options exposed and streaming functionality",
|
"description": "A simple to use Ollama autocompletion engine with options exposed and streaming functionality",
|
||||||
"version": "0.0.7",
|
"version": "0.0.8",
|
||||||
"icon": "icon.png",
|
"icon": "icon.png",
|
||||||
"publisher": "10nates",
|
"publisher": "10nates",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
|
@ -56,19 +56,15 @@
|
||||||
"default": "openhermes2.5-mistral:7b-q4_K_M",
|
"default": "openhermes2.5-mistral:7b-q4_K_M",
|
||||||
"description": "The model to use for generating completions"
|
"description": "The model to use for generating completions"
|
||||||
},
|
},
|
||||||
"ollama-autocoder.raw input": {
|
"ollama-autocoder.message header": {
|
||||||
"type": "boolean",
|
|
||||||
"default": false,
|
|
||||||
"description": "Prompt the model without formatting. Disables system message. Turn this on if you are having trouble with a model falling out of coding mode."
|
|
||||||
},
|
|
||||||
"ollama-autocoder.system message": {
|
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"default": "You are a code autocompletion engine. Respond with a continuation of the code provided and nothing else. Code should not be in a code block. Anything that is not code should be written as a code comment.",
|
"editPresentation": "multilineText",
|
||||||
"description": "The system message to use for code completions. Type DEFAULT for Makefile."
|
"default": "The following is a complete {LANG} file named {FILE_NAME} in the project {PROJECT_NAME}. Anything NOT code is written as a CODE COMMENT. \n\n```\n",
|
||||||
|
"description": "Pseudo-system prompt, optimized for code completion. It is recommended to keep the format the same if modified. Leave blank for no formatting (raw)."
|
||||||
},
|
},
|
||||||
"ollama-autocoder.max tokens predicted": {
|
"ollama-autocoder.max tokens predicted": {
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"default": 500,
|
"default": 1000,
|
||||||
"description": "The maximum number of tokens generated by the model."
|
"description": "The maximum number of tokens generated by the model."
|
||||||
},
|
},
|
||||||
"ollama-autocoder.prompt window size": {
|
"ollama-autocoder.prompt window size": {
|
||||||
|
@ -88,7 +84,7 @@
|
||||||
},
|
},
|
||||||
"ollama-autocoder.preview max tokens": {
|
"ollama-autocoder.preview max tokens": {
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"default": 10,
|
"default": 50,
|
||||||
"description": "The maximum number of tokens generated by the model for the response preview. Typically not reached as the preview stops on newline. Recommended to keep very low due to computational cost."
|
"description": "The maximum number of tokens generated by the model for the response preview. Typically not reached as the preview stops on newline. Recommended to keep very low due to computational cost."
|
||||||
},
|
},
|
||||||
"ollama-autocoder.preview delay": {
|
"ollama-autocoder.preview delay": {
|
||||||
|
@ -101,6 +97,11 @@
|
||||||
"default": true,
|
"default": true,
|
||||||
"description": "Ollama continues autocompletion after what is previewed inline. Disabling disables that feature as some may find it irritating. Multiline completion is still accessible through the shortcut even after disabling."
|
"description": "Ollama continues autocompletion after what is previewed inline. Disabling disables that feature as some may find it irritating. Multiline completion is still accessible through the shortcut even after disabling."
|
||||||
|
|
||||||
|
},
|
||||||
|
"ollama-autocoder.temperature": {
|
||||||
|
"type": "number",
|
||||||
|
"default": 0.5,
|
||||||
|
"description": "Temperature of the model. It is recommended to set it lower than you would for dialogue."
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
|
@ -6,10 +6,10 @@ import axios from "axios";
|
||||||
let VSConfig: vscode.WorkspaceConfiguration;
|
let VSConfig: vscode.WorkspaceConfiguration;
|
||||||
let apiEndpoint: string;
|
let apiEndpoint: string;
|
||||||
let apiModel: string;
|
let apiModel: string;
|
||||||
let apiSystemMessage: string | undefined;
|
let apiMessageHeader: string;
|
||||||
|
let apiTemperature: number;
|
||||||
let numPredict: number;
|
let numPredict: number;
|
||||||
let promptWindowSize: number;
|
let promptWindowSize: number;
|
||||||
let rawInput: boolean | undefined;
|
|
||||||
let completionKeys: string;
|
let completionKeys: string;
|
||||||
let responsePreview: boolean | undefined;
|
let responsePreview: boolean | undefined;
|
||||||
let responsePreviewMaxTokens: number;
|
let responsePreviewMaxTokens: number;
|
||||||
|
@ -20,17 +20,15 @@ function updateVSConfig() {
|
||||||
VSConfig = vscode.workspace.getConfiguration("ollama-autocoder");
|
VSConfig = vscode.workspace.getConfiguration("ollama-autocoder");
|
||||||
apiEndpoint = VSConfig.get("endpoint") || "http://localhost:11434/api/generate";
|
apiEndpoint = VSConfig.get("endpoint") || "http://localhost:11434/api/generate";
|
||||||
apiModel = VSConfig.get("model") || "openhermes2.5-mistral:7b-q4_K_M"; // The model I tested with
|
apiModel = VSConfig.get("model") || "openhermes2.5-mistral:7b-q4_K_M"; // The model I tested with
|
||||||
apiSystemMessage = VSConfig.get("system message");
|
apiMessageHeader = VSConfig.get("message header") || "";
|
||||||
numPredict = VSConfig.get("max tokens predicted") || 500;
|
numPredict = VSConfig.get("max tokens predicted") || 1000;
|
||||||
promptWindowSize = VSConfig.get("prompt window size") || 2000;
|
promptWindowSize = VSConfig.get("prompt window size") || 2000;
|
||||||
rawInput = VSConfig.get("raw input");
|
|
||||||
completionKeys = VSConfig.get("completion keys") || " ";
|
completionKeys = VSConfig.get("completion keys") || " ";
|
||||||
responsePreview = VSConfig.get("response preview");
|
responsePreview = VSConfig.get("response preview");
|
||||||
responsePreviewMaxTokens = VSConfig.get("preview max tokens") || 10;
|
responsePreviewMaxTokens = VSConfig.get("preview max tokens") || 50;
|
||||||
responsePreviewDelay = VSConfig.get("preview delay") || 0; // Must be || 0 instead of || [default] because of truthy
|
responsePreviewDelay = VSConfig.get("preview delay") || 0; // Must be || 0 instead of || [default] because of truthy
|
||||||
continueInline = VSConfig.get("continue inline");
|
continueInline = VSConfig.get("continue inline");
|
||||||
|
apiTemperature = VSConfig.get("temperature") || 0.5;
|
||||||
if (apiSystemMessage == "DEFAULT" || rawInput) apiSystemMessage = undefined;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
updateVSConfig();
|
updateVSConfig();
|
||||||
|
@ -38,6 +36,15 @@ updateVSConfig();
|
||||||
// No need for restart for any of these settings
|
// No need for restart for any of these settings
|
||||||
vscode.workspace.onDidChangeConfiguration(updateVSConfig);
|
vscode.workspace.onDidChangeConfiguration(updateVSConfig);
|
||||||
|
|
||||||
|
// Give model additional information
|
||||||
|
function messageHeaderSub(document: vscode.TextDocument) {
|
||||||
|
const sub = apiMessageHeader
|
||||||
|
.replace("{LANG}", document.languageId)
|
||||||
|
.replace("{FILE_NAME}", document.fileName)
|
||||||
|
.replace("{PROJECT_NAME}", vscode.workspace.name || "(undefined)");
|
||||||
|
return sub;
|
||||||
|
}
|
||||||
|
|
||||||
// internal function for autocomplete, not directly exposed
|
// internal function for autocomplete, not directly exposed
|
||||||
async function autocompleteCommand(textEditor: vscode.TextEditor, cancellationToken?: vscode.CancellationToken) {
|
async function autocompleteCommand(textEditor: vscode.TextEditor, cancellationToken?: vscode.CancellationToken) {
|
||||||
const document = textEditor.document;
|
const document = textEditor.document;
|
||||||
|
@ -72,12 +79,13 @@ async function autocompleteCommand(textEditor: vscode.TextEditor, cancellationTo
|
||||||
// Make a request to the ollama.ai REST API
|
// Make a request to the ollama.ai REST API
|
||||||
const response = await axios.post(apiEndpoint, {
|
const response = await axios.post(apiEndpoint, {
|
||||||
model: apiModel, // Change this to the model you want to use
|
model: apiModel, // Change this to the model you want to use
|
||||||
prompt: prompt,
|
prompt: messageHeaderSub(textEditor.document) + prompt,
|
||||||
stream: true,
|
stream: true,
|
||||||
system: apiSystemMessage,
|
raw: true,
|
||||||
raw: rawInput,
|
|
||||||
options: {
|
options: {
|
||||||
num_predict: numPredict
|
num_predict: numPredict,
|
||||||
|
temperature: apiTemperature,
|
||||||
|
stop: ["```"]
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
cancelToken: axiosCancelToken,
|
cancelToken: axiosCancelToken,
|
||||||
|
@ -174,13 +182,13 @@ async function provideCompletionItems(document: vscode.TextDocument, position: v
|
||||||
prompt = prompt.substring(Math.max(0, prompt.length - promptWindowSize), prompt.length);
|
prompt = prompt.substring(Math.max(0, prompt.length - promptWindowSize), prompt.length);
|
||||||
const response_preview = await axios.post(apiEndpoint, {
|
const response_preview = await axios.post(apiEndpoint, {
|
||||||
model: apiModel, // Change this to the model you want to use
|
model: apiModel, // Change this to the model you want to use
|
||||||
prompt: prompt,
|
prompt: messageHeaderSub(document) + prompt,
|
||||||
stream: false,
|
stream: false,
|
||||||
system: apiSystemMessage,
|
raw: true,
|
||||||
raw: rawInput,
|
|
||||||
options: {
|
options: {
|
||||||
num_predict: responsePreviewMaxTokens, // reduced compute max
|
num_predict: responsePreviewMaxTokens, // reduced compute max
|
||||||
stop: ['\n']
|
temperature: apiTemperature,
|
||||||
|
stop: ['\n', '```']
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
cancelToken: new axios.CancelToken((c) => {
|
cancelToken: new axios.CancelToken((c) => {
|
||||||
|
|
Loading…
Reference in New Issue