Added response previews and an option to disable it

2024-01-06 17:33:17 -06:00 · 2024-01-06 17:33:17 -06:00 · 5153544346
parent a36cd2ed80
commit 5153544346
3 changed files with 44 additions and 5 deletions
--- a/README.md
+++ b/README.md
@ -12,7 +12,7 @@ A simple to use Ollama autocompletion engine with options exposed and streaming

 ## How to Use

-1. In a text document, press space. The option `Autocomplete with Ollama` will appear. Press `enter` to start generation.
+1. In a text document, press space. The option `Autocomplete with Ollama` or a preview of the first line of autocompletion will appear. Press `enter` to start generation.
   - Alternatively, you can run the `Autocomplete with Ollama` command from the command pallete (or set a keybind).
 2. After startup, the tokens will be streamed to your cursor.
 3. To stop the generation early, press the "Cancel" button on the "Ollama Autocoder" notification or type something.
@ -22,4 +22,4 @@ A simple to use Ollama autocompletion engine with options exposed and streaming

 - For fastest results, an Nvidia GPU or Apple Silicon is recommended. CPU still works on small models.
 - The prompt only sees behind the cursor. The model is unaware of text in front of its position.
-  
+- For CPU-only, low end, or battery powered devices, it is highly recommended to disable the `response preview` option, as it automatically triggers the model.
--- a/package.json
+++ b/package.json
@ -80,6 +80,11 @@
 					"type": "string",
 					"default": " ",
 					"description": "Character that the autocompletion item provider appear on. Multiple characters will be treated as different entries. REQUIRES RELOAD"
+				},
+				"ollama-autocoder.response preview": {
+					"type": "boolean",
+					"default": true,
+					"description": "Inline completion label will be the first line of response. Max is 10 tokens, but this is unlikely to be reached. If the first line is empty, the default label will be used. Not streamable, disable on slow devices."
 				}
 			}
 		},
--- a/src/extension.ts
+++ b/src/extension.ts
@ -1,4 +1,4 @@
-// Significant help from GPT4
+// Original script was GPT4 but it has been deeply Ship of Theseused. 

 import * as vscode from "vscode";
 import axios from "axios";
@ -11,6 +11,7 @@ let numPredict: number;
 let promptWindowSize: number;
 let rawInput: boolean | undefined;
 let completionKeys: string;
+let responsePreview: boolean | undefined;

 function updateVSConfig() {
 	VSConfig = vscode.workspace.getConfiguration("ollama-autocoder");
@ -21,6 +22,7 @@ function updateVSConfig() {
 	promptWindowSize = VSConfig.get("prompt window size") || 2000;
 	rawInput = VSConfig.get("raw input");
 	completionKeys = VSConfig.get("completion keys") || " ";
+	responsePreview = VSConfig.get("response preview");

 	if (apiSystemMessage == "DEFAULT" || rawInput) apiSystemMessage = undefined;
 }
@ -157,13 +159,45 @@ async function autocompleteCommand(textEditor: vscode.TextEditor, cancellationTo
 function activate(context: vscode.ExtensionContext) {
 	// Register a completion provider for JavaScript files
 	const completionProvider = vscode.languages.registerCompletionItemProvider("*", {
-		async provideCompletionItems(_, __, cancellationToken) {
+		async provideCompletionItems(document, position, cancellationToken) {
+
 			// Create a completion item
 			const item = new vscode.CompletionItem("Autocomplete with Ollama");
+
 			// Set the insert text to a placeholder
 			item.insertText = new vscode.SnippetString('${1:}');
+
+			// Set the label & inset text to a shortened, non-stream response
+			if (responsePreview) {
+				let prompt = document.getText(new vscode.Range(document.lineAt(0).range.start, position));
+				prompt = prompt.substring(Math.max(0, prompt.length - promptWindowSize), prompt.length);
+				const response_preview = await axios.post(apiEndpoint, {
+					model: apiModel, // Change this to the model you want to use
+					prompt: prompt,
+					stream: false,
+					system: apiSystemMessage,
+					raw: true,
+					options: {
+						num_predict: 10, // reduced compute max. Yes, I know it's a constant. Maybe an option in the future but might confuse people.
+						stop: ['\n']
+					}
+				}, {
+					cancelToken: new axios.CancelToken((c) => {
+						const cancelPost = function () {
+							c("Autocompletion request terminated by completion cancel");
+						};
+						cancellationToken.onCancellationRequested(cancelPost);
+					})
+				});
+
+				if (response_preview.data.response.trim() != "") { // default if empty
+					item.label = response_preview.data.response.trimStart(); // tended to add whitespace at the beginning
+					item.insertText = response_preview.data.response.trimStart();
+				}
+			}
+
 			// Set the documentation to a message
-			item.documentation = new vscode.MarkdownString('Press `Enter` to get a completion from Ollama');
+			item.documentation = new vscode.MarkdownString('Press `Enter` to get an autocompletion from Ollama');
 			// Set the command to trigger the completion
 			item.command = {
 				command: 'ollama-autocoder.autocomplete',