diff --git a/README.md b/README.md index c30ebbb..e496e32 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ A simple to use Ollama autocompletion engine with options exposed and streaming ## How to Use -1. In a text document, press space. The option `Autocomplete with Ollama` or a preview of the first line of autocompletion will appear. Press `enter` to start generation. +1. In a text document, press space *(or any character in the `completion keys` setting)*. The option `Autocomplete with Ollama` or a preview of the first line of autocompletion will appear. Press `enter` to start generation. - Alternatively, you can run the `Autocomplete with Ollama` command from the command pallete (or set a keybind). 2. After startup, the tokens will be streamed to your cursor. 3. To stop the generation early, press the "Cancel" button on the "Ollama Autocoder" notification or type something. @@ -22,4 +22,5 @@ A simple to use Ollama autocompletion engine with options exposed and streaming - For fastest results, an Nvidia GPU or Apple Silicon is recommended. CPU still works on small models. - The prompt only sees behind the cursor. The model is unaware of text in front of its position. -- For CPU-only, low end, or battery powered devices, it is highly recommended to disable the `response preview` option, as it automatically triggers the model. +- For CPU-only, low end, or battery powered devices, it is highly recommended to disable the `response preview` option, as it automatically triggers the model. *This will cause `continue inline` to be always on.* You can also increase the `preview delay` time. +- If you don't want inline generation to continue beyond the response preview, change the `continue inline` option in settings to false. *This doesn't apply to the command pallete.* diff --git a/package.json b/package.json index cf77e13..97da3e7 100644 --- a/package.json +++ b/package.json @@ -90,6 +90,17 @@ "type": "integer", "default": 10, "description": "The maximum number of tokens generated by the model for the response preview. Typically not reached as the preview stops on newline. Recommended to keep very low due to computational cost." + }, + "ollama-autocoder.preview delay": { + "type": "number", + "default": 1, + "description": "Time to wait in seconds before starting inline preview generation. Prevents Ollama server from running briefly every time the completion key is pressed, which causes unnecessary compute usage. If you are not on a battery powered device, set this to 0 for a more responsive experience." + }, + "ollama-autocoder.continue inline": { + "type": "boolean", + "default": true, + "description": "Ollama continues autocompletion after what is previewed inline. Disabling disables that feature as some may find it irritating. Multiline completion is still accessible through the shortcut even after disabling." + } } }, diff --git a/src/extension.ts b/src/extension.ts index 52600ab..ff59419 100644 --- a/src/extension.ts +++ b/src/extension.ts @@ -13,6 +13,8 @@ let rawInput: boolean | undefined; let completionKeys: string; let responsePreview: boolean | undefined; let responsePreviewMaxTokens: number; +let responsePreviewDelay: number; +let continueInline: boolean | undefined; function updateVSConfig() { VSConfig = vscode.workspace.getConfiguration("ollama-autocoder"); @@ -25,6 +27,8 @@ function updateVSConfig() { completionKeys = VSConfig.get("completion keys") || " "; responsePreview = VSConfig.get("response preview"); responsePreviewMaxTokens = VSConfig.get("preview max tokens") || 10; + responsePreviewDelay = VSConfig.get("preview delay") || 0; // Must be || 0 instead of || [default] because of truthy + continueInline = VSConfig.get("continue inline"); if (apiSystemMessage == "DEFAULT" || rawInput) apiSystemMessage = undefined; } @@ -169,6 +173,12 @@ function activate(context: vscode.ExtensionContext) { // Set the insert text to a placeholder item.insertText = new vscode.SnippetString('${1:}'); + // Wait before initializing Ollama to reduce compute usage + if (responsePreview) await new Promise(resolve => setTimeout(resolve, responsePreviewDelay * 1000)); + if (cancellationToken.isCancellationRequested) { + return [ item ]; + } + // Set the label & inset text to a shortened, non-stream response if (responsePreview) { let prompt = document.getText(new vscode.Range(document.lineAt(0).range.start, position)); @@ -201,7 +211,7 @@ function activate(context: vscode.ExtensionContext) { // Set the documentation to a message item.documentation = new vscode.MarkdownString('Press `Enter` to get an autocompletion from Ollama'); // Set the command to trigger the completion - item.command = { + if (continueInline || !responsePreview) item.command = { command: 'ollama-autocoder.autocomplete', title: 'Autocomplete with Ollama', arguments: [cancellationToken]