From b85c35b720604ee6267e9f56ee79ac52f1e8e968 Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Thu, 31 Oct 2024 18:00:09 +0100 Subject: [PATCH] added example code and readme --- .gitignore | 3 +++ README.md | 51 +++++++++++++++++++++++++++++++++++++++++++++++++-- example.py | 9 +++++++++ 3 files changed, 61 insertions(+), 2 deletions(-) create mode 100644 example.py diff --git a/.gitignore b/.gitignore index 5d381cc..5b64471 100644 --- a/.gitignore +++ b/.gitignore @@ -150,6 +150,9 @@ dmypy.json # pytype static type analyzer .pytype/ +# ml Model +fabelous-mini-embedder + # Cython debug symbols cython_debug/ diff --git a/README.md b/README.md index 0ba7c2b..a278378 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,50 @@ -# fabelous-embeddings +# Fabelous Embedder Mini -This is the fabelous-embeddings model that is based on the jina-embeddings-v2-base-de Model from Jina AI. \ No newline at end of file +This repository contains a custom sentence transformer model named **fabelous-mini-embedder**, trained on 13 different programming languages and English. + +## Description + +The model was built using the Sentence Transformer library, which provides an easy-to-use interface for working with sentence embeddings, including pre-trained models. + +In addition to **fabelous-mini-embedder**, we also have a proprietary model called **fabelous-embedder-base**. They are trained on a vast dataset covering various programming languages, including: + +- Python +- Java +- Go +- C++ +- TypeScript + + +## Example Usage + +Here’s how to use the model to generate sentence embeddings: + +```python +from sentence_transformers import SentenceTransformer + +model = SentenceTransformer("fabelous-mini-embedder") + +instruction = "This is an example sentence" +embeddings = model.encode(instruction) + +print(embeddings) # Output: (array of numerical embeddings) +``` + +The generated embeddings can be used for tasks like semantic search or classification. + +## Future Improvements + +We are currently working on the second generation of our model, focusing on: + +- **Enhanced Multilingual Support:** Adding support for German in addition to English. +- **Expanded Dataset:** Increasing the code dataset significantly to improve model performance and accuracy. + +We welcome contributions from the community to help us achieve these goals! + +## Installation + +To install the required libraries, run: + +```bash +pip install sentence-transformers +``` \ No newline at end of file diff --git a/example.py b/example.py new file mode 100644 index 0000000..32ed985 --- /dev/null +++ b/example.py @@ -0,0 +1,9 @@ +from sentence_transformers import SentenceTransformer + + +model = SentenceTransformer("fabelous-mini-embedder") + +Instruction = "This is a example Sentence" + +embeddings = model.encode(Instruction) +print(embeddings)