changed system to pseudo-system message header for better optimzed output, added temp setting, removed raw setting (now unnecessary)

2024-03-11 13:27:51 -05:00 · 2024-03-11 13:27:51 -05:00 · cc0b3cc20f
parent 72003d9d52
commit cc0b3cc20f
2 changed files with 36 additions and 27 deletions
--- a/package.json
+++ b/package.json
@ -2,7 +2,7 @@
 	"name": "ollama-autocoder",
 	"displayName": "Ollama Autocoder",
 	"description": "A simple to use Ollama autocompletion engine with options exposed and streaming functionality",
-	"version": "0.0.7",
+	"version": "0.0.8",
 	"icon": "icon.png",
 	"publisher": "10nates",
 	"license": "MIT",
@ -56,19 +56,15 @@
 					"default": "openhermes2.5-mistral:7b-q4_K_M",
 					"description": "The model to use for generating completions"
 				},
-				"ollama-autocoder.raw input": {
-					"type": "boolean",
-					"default": false,
-					"description": "Prompt the model without formatting. Disables system message. Turn this on if you are having trouble with a model falling out of coding mode."
-				},
-				"ollama-autocoder.system message": {
+				"ollama-autocoder.message header": {
 					"type": "string",
-					"default": "You are a code autocompletion engine. Respond with a continuation of the code provided and nothing else. Code should not be in a code block. Anything that is not code should be written as a code comment.",
-					"description": "The system message to use for code completions. Type DEFAULT for Makefile."
+					"editPresentation": "multilineText",
+					"default": "The following is a complete {LANG} file named {FILE_NAME} in the project {PROJECT_NAME}. Anything NOT code is written as a CODE COMMENT. \n\n```\n",
+					"description": "Pseudo-system prompt, optimized for code completion. It is recommended to keep the format the same if modified. Leave blank for no formatting (raw)."
 				},
 				"ollama-autocoder.max tokens predicted": {
 					"type": "integer",
-					"default": 500,
+					"default": 1000,
 					"description": "The maximum number of tokens generated by the model."
 				},
 				"ollama-autocoder.prompt window size": {
@ -88,7 +84,7 @@
 				},
 				"ollama-autocoder.preview max tokens": {
 					"type": "integer",
-					"default": 10,
+					"default": 50,
 					"description": "The maximum number of tokens generated by the model for the response preview. Typically not reached as the preview stops on newline. Recommended to keep very low due to computational cost."
 				},
 				"ollama-autocoder.preview delay": {
@ -101,6 +97,11 @@
 					"default": true,
 					"description": "Ollama continues autocompletion after what is previewed inline. Disabling disables that feature as some may find it irritating. Multiline completion is still accessible through the shortcut even after disabling."
 				
+				},
+				"ollama-autocoder.temperature": {
+					"type": "number",
+					"default": 0.5,
+					"description": "Temperature of the model. It is recommended to set it lower than you would for dialogue."
 				}
 			}
 		},
--- a/src/extension.ts
+++ b/src/extension.ts
@ -6,10 +6,10 @@ import axios from "axios";
 let VSConfig: vscode.WorkspaceConfiguration;
 let apiEndpoint: string;
 let apiModel: string;
-let apiSystemMessage: string | undefined;
+let apiMessageHeader: string;
+let apiTemperature: number;
 let numPredict: number;
 let promptWindowSize: number;
-let rawInput: boolean | undefined;
 let completionKeys: string;
 let responsePreview: boolean | undefined;
 let responsePreviewMaxTokens: number;
@ -20,17 +20,15 @@ function updateVSConfig() {
 	VSConfig = vscode.workspace.getConfiguration("ollama-autocoder");
 	apiEndpoint = VSConfig.get("endpoint") || "http://localhost:11434/api/generate";
 	apiModel = VSConfig.get("model") || "openhermes2.5-mistral:7b-q4_K_M"; // The model I tested with
-	apiSystemMessage = VSConfig.get("system message");
-	numPredict = VSConfig.get("max tokens predicted") || 500;
+	apiMessageHeader = VSConfig.get("message header") || "";
+	numPredict = VSConfig.get("max tokens predicted") || 1000;
 	promptWindowSize = VSConfig.get("prompt window size") || 2000;
-	rawInput = VSConfig.get("raw input");
 	completionKeys = VSConfig.get("completion keys") || " ";
 	responsePreview = VSConfig.get("response preview");
-	responsePreviewMaxTokens = VSConfig.get("preview max tokens") || 10;
+	responsePreviewMaxTokens = VSConfig.get("preview max tokens") || 50;
 	responsePreviewDelay = VSConfig.get("preview delay") || 0; // Must be || 0 instead of || [default] because of truthy
 	continueInline = VSConfig.get("continue inline");
-
-	if (apiSystemMessage == "DEFAULT" || rawInput) apiSystemMessage = undefined;
+	apiTemperature = VSConfig.get("temperature") || 0.5;
 }

 updateVSConfig();
@ -38,6 +36,15 @@ updateVSConfig();
 // No need for restart for any of these settings
 vscode.workspace.onDidChangeConfiguration(updateVSConfig);

+// Give model additional information
+function messageHeaderSub(document: vscode.TextDocument) {
+	const sub = apiMessageHeader
+		.replace("{LANG}", document.languageId)
+		.replace("{FILE_NAME}", document.fileName)
+		.replace("{PROJECT_NAME}", vscode.workspace.name || "(undefined)");
+	return sub;
+}
+
 // internal function for autocomplete, not directly exposed
 async function autocompleteCommand(textEditor: vscode.TextEditor, cancellationToken?: vscode.CancellationToken) {
 	const document = textEditor.document;
@ -72,12 +79,13 @@ async function autocompleteCommand(textEditor: vscode.TextEditor, cancellationTo
 				// Make a request to the ollama.ai REST API
 				const response = await axios.post(apiEndpoint, {
 					model: apiModel, // Change this to the model you want to use
-					prompt: prompt,
+					prompt: messageHeaderSub(textEditor.document) + prompt,
 					stream: true,
-					system: apiSystemMessage,
-					raw: rawInput,
+					raw: true,
 					options: {
-						num_predict: numPredict
+						num_predict: numPredict,
+						temperature: apiTemperature,
+						stop: ["```"]
 					}
 				}, {
 					cancelToken: axiosCancelToken,
@ -174,13 +182,13 @@ async function provideCompletionItems(document: vscode.TextDocument, position: v
 		prompt = prompt.substring(Math.max(0, prompt.length - promptWindowSize), prompt.length);
 		const response_preview = await axios.post(apiEndpoint, {
 			model: apiModel, // Change this to the model you want to use
-			prompt: prompt,
+			prompt: messageHeaderSub(document) + prompt,
 			stream: false,
-			system: apiSystemMessage,
-			raw: rawInput,
+			raw: true,
 			options: {
 				num_predict: responsePreviewMaxTokens, // reduced compute max
-				stop: ['\n']
+				temperature: apiTemperature,
+				stop: ['\n', '```']
 			}
 		}, {
 			cancelToken: new axios.CancelToken((c) => {