Vector Embeddings

SOTA embedding models for semantic search, RAG applications, and similarity computation.

Overview

AbstractCore provides a unified interface to state-of-the-art embedding models across multiple providers. Generate high-quality vector embeddings for semantic search, retrieval-augmented generation (RAG), and similarity analysis.

Supported Providers

HuggingFace
Open-source models, local processing
Ollama
Local embedding models
LMStudio
GUI-based local models

Quick Start

from abstractcore.embeddings import EmbeddingManager

# Create embedding manager (HuggingFace by default)
embedder = EmbeddingManager(model="sentence-transformers/all-MiniLM-L6-v2")

# Generate single embedding
text = "AbstractCore is a unified LLM interface"
embedding = embedder.embed(text)
print(f"Embedding dimension: {len(embedding)}")

# Generate batch embeddings
texts = [
    "Machine learning is fascinating",
    "AI models are getting better",
    "Python is great for data science"
]
embeddings = embedder.embed_batch(texts)
print(f"Generated {len(embeddings)} embeddings")

# Compute similarity
similarity = embedder.compute_similarity(
    "Machine learning",
    "Artificial intelligence"
)
print(f"Similarity: {similarity:.3f}")

Provider Configuration

HuggingFace (Default)

# HuggingFace provider (default)
embedder = EmbeddingManager(
    model="sentence-transformers/all-MiniLM-L6-v2",
    provider="huggingface"  # Optional, default
)

# Popular HuggingFace models
models = [
    "sentence-transformers/all-MiniLM-L6-v2",  # Fast, good quality
    "sentence-transformers/all-mpnet-base-v2", # Higher quality
    "BAAI/bge-large-en-v1.5",                 # SOTA English
    "intfloat/multilingual-e5-large"          # Multilingual
]

Ollama Provider

# Ollama provider
embedder = EmbeddingManager(
    model="granite-embedding:278m",
    provider="ollama"
)

# First, download the model
# ollama pull granite-embedding:278m

# Popular Ollama embedding models
models = [
    "granite-embedding:278m",     # IBM Granite
    "nomic-embed-text",          # Nomic AI
    "mxbai-embed-large"          # MixedBread AI
]

LMStudio Provider

# LMStudio provider
embedder = EmbeddingManager(
    model="text-embedding-all-minilm-l6-v2-embedding",
    provider="lmstudio",
    base_url="http://localhost:1234"  # LMStudio server
)

# Make sure LMStudio is running with an embedding model loaded

RAG Applications

Simple RAG System

from abstractcore import create_llm
from abstractcore.embeddings import EmbeddingManager

class SimpleRAG:
    def __init__(self, llm_provider="openai", llm_model="gpt-4o-mini"):
        self.llm = create_llm(llm_provider, model=llm_model)
        self.embedder = EmbeddingManager(model="sentence-transformers/all-MiniLM-L6-v2")
        self.knowledge_base = []
        self.embeddings = []
    
    def add_knowledge(self, documents: List[str]):
        """Add documents to knowledge base."""
        self.knowledge_base.extend(documents)
        new_embeddings = self.embedder.embed_batch(documents)
        self.embeddings.extend(new_embeddings)
    
    def retrieve(self, query: str, top_k: int = 3) -> List[str]:
        """Retrieve relevant documents."""
        query_embedding = self.embedder.embed(query)
        
        similarities = []
        for i, doc_embedding in enumerate(self.embeddings):
            similarity = self.embedder.compute_similarity_vectors(
                query_embedding, doc_embedding
            )
            similarities.append((i, similarity))
        
        # Get top_k most similar documents
        similarities.sort(key=lambda x: x[1], reverse=True)
        return [self.knowledge_base[i] for i, _ in similarities[:top_k]]
    
    def generate(self, query: str) -> str:
        """Generate answer using retrieved context."""
        # Retrieve relevant documents
        context_docs = self.retrieve(query)
        context = "\n\n".join(context_docs)
        
        # Create prompt with context
        prompt = f"""Based on the following context, answer the question.

Context:
{context}

Question: {query}

Answer:"""
        
        response = self.llm.generate(prompt)
        return response.content

# Usage
rag = SimpleRAG()

# Add knowledge
rag.add_knowledge([
    "AbstractCore is a Python library for unified LLM access.",
    "It supports OpenAI, Anthropic, Ollama, MLX, and LMStudio.",
    "AbstractCore provides tool calling across all providers.",
    "The library includes session management and embeddings."
])

# Ask questions
answer = rag.generate("What providers does AbstractCore support?")
print(answer)

Similarity & Clustering

Similarity Matrix

# Compute similarity matrix
texts = [
    "Machine learning algorithms",
    "Deep learning networks", 
    "Natural language processing",
    "Computer vision systems",
    "Web development frameworks"
]

# Generate embeddings
embeddings = embedder.embed_batch(texts)

# Compute similarity matrix
similarity_matrix = embedder.compute_similarities_matrix(embeddings, embeddings)

print("Similarity Matrix:")
for i, text1 in enumerate(texts):
    for j, text2 in enumerate(texts):
        if i <= j:  # Only show upper triangle
            similarity = similarity_matrix[i][j]
            print(f"{text1[:20]}... vs {text2[:20]}...: {similarity:.3f}")
    print()

Automatic Clustering

# Find similar clusters
texts = [
    "Python programming language",
    "JavaScript for web development", 
    "Machine learning with Python",
    "Deep learning algorithms",
    "React JavaScript framework",
    "Neural networks and AI",
    "Web development with JS",
    "Python data science tools"
]

embeddings = embedder.embed_batch(texts)

# Find clusters with similarity threshold
clusters = embedder.find_similar_clusters(
    texts, 
    embeddings, 
    threshold=0.7  # Similarity threshold
)

print("Discovered clusters:")
for i, cluster in enumerate(clusters):
    print(f"Cluster {i+1}:")
    for text in cluster:
        print(f"  - {text}")
    print()

Performance Optimization

Caching

# Enable caching for better performance
embedder = EmbeddingManager(
    model="sentence-transformers/all-MiniLM-L6-v2",
    cache_dir="./embedding_cache",  # Custom cache directory
    use_cache=True  # Enable caching (default)
)

# Embeddings are automatically cached
embedding1 = embedder.embed("This text will be cached")
embedding2 = embedder.embed("This text will be cached")  # Retrieved from cache

# Clear cache if needed
embedder.clear_cache()

Batch Processing

# Process large batches efficiently
large_text_collection = [f"Document {i}" for i in range(1000)]

# Process in batches for memory efficiency
batch_size = 32
all_embeddings = []

for i in range(0, len(large_text_collection), batch_size):
    batch = large_text_collection[i:i+batch_size]
    batch_embeddings = embedder.embed_batch(batch)
    all_embeddings.extend(batch_embeddings)
    
    print(f"Processed {min(i+batch_size, len(large_text_collection))}/{len(large_text_collection)} documents")

print(f"Generated {len(all_embeddings)} embeddings")

Model Comparison

# Compare different models
models = [
    "sentence-transformers/all-MiniLM-L6-v2",  # Fast, lightweight
    "sentence-transformers/all-mpnet-base-v2", # Better quality
    "BAAI/bge-large-en-v1.5"                  # SOTA performance
]

test_texts = ["Machine learning", "Artificial intelligence"]

for model_name in models:
    embedder = EmbeddingManager(model=model_name)
    
    # Time the embedding generation
    import time
    start = time.time()
    embeddings = embedder.embed_batch(test_texts)
    duration = time.time() - start
    
    print(f"Model: {model_name}")
    print(f"Dimension: {len(embeddings[0])}")
    print(f"Time: {duration:.3f}s")
    print(f"Speed: {len(test_texts)/duration:.1f} texts/sec")
    print()

Related Documentation