From cc0b3cc20fed86d37d2795cf8bd7911e94d39237 Mon Sep 17 00:00:00 2001
From: Nathan Hedge <23344786+10Nates@users.noreply.github.com>
Date: Mon, 11 Mar 2024 13:27:51 -0500
Subject: [PATCH] changed system to pseudo-system message header for better
 optimzed output, added temp setting, removed raw setting (now unnecessary)

---
 package.json     | 23 ++++++++++++-----------
 src/extension.ts | 40 ++++++++++++++++++++++++----------------
 2 files changed, 36 insertions(+), 27 deletions(-)

diff --git a/package.json b/package.json
index d921e63..3fe26f1 100644
--- a/package.json
+++ b/package.json
@@ -2,7 +2,7 @@
 	"name": "ollama-autocoder",
 	"displayName": "Ollama Autocoder",
 	"description": "A simple to use Ollama autocompletion engine with options exposed and streaming functionality",
-	"version": "0.0.7",
+	"version": "0.0.8",
 	"icon": "icon.png",
 	"publisher": "10nates",
 	"license": "MIT",
@@ -56,19 +56,15 @@
 					"default": "openhermes2.5-mistral:7b-q4_K_M",
 					"description": "The model to use for generating completions"
 				},
-				"ollama-autocoder.raw input": {
-					"type": "boolean",
-					"default": false,
-					"description": "Prompt the model without formatting. Disables system message. Turn this on if you are having trouble with a model falling out of coding mode."
-				},
-				"ollama-autocoder.system message": {
+				"ollama-autocoder.message header": {
 					"type": "string",
-					"default": "You are a code autocompletion engine. Respond with a continuation of the code provided and nothing else. Code should not be in a code block. Anything that is not code should be written as a code comment.",
-					"description": "The system message to use for code completions. Type DEFAULT for Makefile."
+					"editPresentation": "multilineText",
+					"default": "The following is a complete {LANG} file named {FILE_NAME} in the project {PROJECT_NAME}. Anything NOT code is written as a CODE COMMENT. \n\n```\n",
+					"description": "Pseudo-system prompt, optimized for code completion. It is recommended to keep the format the same if modified. Leave blank for no formatting (raw)."
 				},
 				"ollama-autocoder.max tokens predicted": {
 					"type": "integer",
-					"default": 500,
+					"default": 1000,
 					"description": "The maximum number of tokens generated by the model."
 				},
 				"ollama-autocoder.prompt window size": {
@@ -88,7 +84,7 @@
 				},
 				"ollama-autocoder.preview max tokens": {
 					"type": "integer",
-					"default": 10,
+					"default": 50,
 					"description": "The maximum number of tokens generated by the model for the response preview. Typically not reached as the preview stops on newline. Recommended to keep very low due to computational cost."
 				},
 				"ollama-autocoder.preview delay": {
@@ -101,6 +97,11 @@
 					"default": true,
 					"description": "Ollama continues autocompletion after what is previewed inline. Disabling disables that feature as some may find it irritating. Multiline completion is still accessible through the shortcut even after disabling."
 				
+				},
+				"ollama-autocoder.temperature": {
+					"type": "number",
+					"default": 0.5,
+					"description": "Temperature of the model. It is recommended to set it lower than you would for dialogue."
 				}
 			}
 		},
diff --git a/src/extension.ts b/src/extension.ts
index 32e6ca5..3b91019 100644
--- a/src/extension.ts
+++ b/src/extension.ts
@@ -6,10 +6,10 @@ import axios from "axios";
 let VSConfig: vscode.WorkspaceConfiguration;
 let apiEndpoint: string;
 let apiModel: string;
-let apiSystemMessage: string | undefined;
+let apiMessageHeader: string;
+let apiTemperature: number;
 let numPredict: number;
 let promptWindowSize: number;
-let rawInput: boolean | undefined;
 let completionKeys: string;
 let responsePreview: boolean | undefined;
 let responsePreviewMaxTokens: number;
@@ -20,17 +20,15 @@ function updateVSConfig() {
 	VSConfig = vscode.workspace.getConfiguration("ollama-autocoder");
 	apiEndpoint = VSConfig.get("endpoint") || "http://localhost:11434/api/generate";
 	apiModel = VSConfig.get("model") || "openhermes2.5-mistral:7b-q4_K_M"; // The model I tested with
-	apiSystemMessage = VSConfig.get("system message");
-	numPredict = VSConfig.get("max tokens predicted") || 500;
+	apiMessageHeader = VSConfig.get("message header") || "";
+	numPredict = VSConfig.get("max tokens predicted") || 1000;
 	promptWindowSize = VSConfig.get("prompt window size") || 2000;
-	rawInput = VSConfig.get("raw input");
 	completionKeys = VSConfig.get("completion keys") || " ";
 	responsePreview = VSConfig.get("response preview");
-	responsePreviewMaxTokens = VSConfig.get("preview max tokens") || 10;
+	responsePreviewMaxTokens = VSConfig.get("preview max tokens") || 50;
 	responsePreviewDelay = VSConfig.get("preview delay") || 0; // Must be || 0 instead of || [default] because of truthy
 	continueInline = VSConfig.get("continue inline");
-
-	if (apiSystemMessage == "DEFAULT" || rawInput) apiSystemMessage = undefined;
+	apiTemperature = VSConfig.get("temperature") || 0.5;
 }
 
 updateVSConfig();
@@ -38,6 +36,15 @@ updateVSConfig();
 // No need for restart for any of these settings
 vscode.workspace.onDidChangeConfiguration(updateVSConfig);
 
+// Give model additional information
+function messageHeaderSub(document: vscode.TextDocument) {
+	const sub = apiMessageHeader
+		.replace("{LANG}", document.languageId)
+		.replace("{FILE_NAME}", document.fileName)
+		.replace("{PROJECT_NAME}", vscode.workspace.name || "(undefined)");
+	return sub;
+}
+
 // internal function for autocomplete, not directly exposed
 async function autocompleteCommand(textEditor: vscode.TextEditor, cancellationToken?: vscode.CancellationToken) {
 	const document = textEditor.document;
@@ -72,12 +79,13 @@ async function autocompleteCommand(textEditor: vscode.TextEditor, cancellationTo
 				// Make a request to the ollama.ai REST API
 				const response = await axios.post(apiEndpoint, {
 					model: apiModel, // Change this to the model you want to use
-					prompt: prompt,
+					prompt: messageHeaderSub(textEditor.document) + prompt,
 					stream: true,
-					system: apiSystemMessage,
-					raw: rawInput,
+					raw: true,
 					options: {
-						num_predict: numPredict
+						num_predict: numPredict,
+						temperature: apiTemperature,
+						stop: ["```"]
 					}
 				}, {
 					cancelToken: axiosCancelToken,
@@ -174,13 +182,13 @@ async function provideCompletionItems(document: vscode.TextDocument, position: v
 		prompt = prompt.substring(Math.max(0, prompt.length - promptWindowSize), prompt.length);
 		const response_preview = await axios.post(apiEndpoint, {
 			model: apiModel, // Change this to the model you want to use
-			prompt: prompt,
+			prompt: messageHeaderSub(document) + prompt,
 			stream: false,
-			system: apiSystemMessage,
-			raw: rawInput,
+			raw: true,
 			options: {
 				num_predict: responsePreviewMaxTokens, // reduced compute max
-				stop: ['\n']
+				temperature: apiTemperature,
+				stop: ['\n', '```']
 			}
 		}, {
 			cancelToken: new axios.CancelToken((c) => {