Vector Embeddings
SOTA embedding models for semantic search, RAG applications, and similarity computation.
Overview
AbstractCore provides a unified interface to state-of-the-art embedding models across multiple providers. Generate high-quality vector embeddings for semantic search, retrieval-augmented generation (RAG), and similarity analysis.
Supported Providers
HuggingFace
Open-source models, local processing
Open-source models, local processing
Ollama
Local embedding models
Local embedding models
LMStudio
GUI-based local models
GUI-based local models
Quick Start
from abstractcore.embeddings import EmbeddingManager
# Create embedding manager (HuggingFace by default)
embedder = EmbeddingManager(model="sentence-transformers/all-MiniLM-L6-v2")
# Generate single embedding
text = "AbstractCore is a unified LLM interface"
embedding = embedder.embed(text)
print(f"Embedding dimension: {len(embedding)}")
# Generate batch embeddings
texts = [
"Machine learning is fascinating",
"AI models are getting better",
"Python is great for data science"
]
embeddings = embedder.embed_batch(texts)
print(f"Generated {len(embeddings)} embeddings")
# Compute similarity
similarity = embedder.compute_similarity(
"Machine learning",
"Artificial intelligence"
)
print(f"Similarity: {similarity:.3f}")
Provider Configuration
HuggingFace (Default)
# HuggingFace provider (default)
embedder = EmbeddingManager(
model="sentence-transformers/all-MiniLM-L6-v2",
provider="huggingface" # Optional, default
)
# Popular HuggingFace models
models = [
"sentence-transformers/all-MiniLM-L6-v2", # Fast, good quality
"sentence-transformers/all-mpnet-base-v2", # Higher quality
"BAAI/bge-large-en-v1.5", # SOTA English
"intfloat/multilingual-e5-large" # Multilingual
]
Ollama Provider
# Ollama provider
embedder = EmbeddingManager(
model="granite-embedding:278m",
provider="ollama"
)
# First, download the model
# ollama pull granite-embedding:278m
# Popular Ollama embedding models
models = [
"granite-embedding:278m", # IBM Granite
"nomic-embed-text", # Nomic AI
"mxbai-embed-large" # MixedBread AI
]
LMStudio Provider
# LMStudio provider
embedder = EmbeddingManager(
model="text-embedding-all-minilm-l6-v2-embedding",
provider="lmstudio",
base_url="http://localhost:1234" # LMStudio server
)
# Make sure LMStudio is running with an embedding model loaded
Semantic Search
Basic Semantic Search
from abstractcore.embeddings import EmbeddingManager
# Create embedder
embedder = EmbeddingManager(model="sentence-transformers/all-MiniLM-L6-v2")
# Document collection
documents = [
"Python is a programming language",
"Machine learning uses algorithms to learn patterns",
"Natural language processing analyzes text",
"Deep learning is a subset of machine learning",
"JavaScript is used for web development"
]
# Generate embeddings for all documents
doc_embeddings = embedder.embed_batch(documents)
# Search query
query = "What is ML?"
query_embedding = embedder.embed(query)
# Find most similar documents
similarities = []
for i, doc_emb in enumerate(doc_embeddings):
similarity = embedder.compute_similarity_vectors(query_embedding, doc_emb)
similarities.append((i, similarity, documents[i]))
# Sort by similarity
similarities.sort(key=lambda x: x[1], reverse=True)
print("Search results:")
for rank, (idx, score, doc) in enumerate(similarities[:3], 1):
print(f"{rank}. {doc} (score: {score:.3f})")
Advanced Semantic Search
import numpy as np
from typing import List, Tuple
class SemanticSearchEngine:
def __init__(self, model: str = "sentence-transformers/all-MiniLM-L6-v2"):
self.embedder = EmbeddingManager(model=model)
self.documents = []
self.embeddings = []
def add_documents(self, docs: List[str]):
"""Add documents to the search index."""
self.documents.extend(docs)
new_embeddings = self.embedder.embed_batch(docs)
self.embeddings.extend(new_embeddings)
def search(self, query: str, top_k: int = 5) -> List[Tuple[str, float]]:
"""Search for similar documents."""
query_embedding = self.embedder.embed(query)
similarities = []
for i, doc_embedding in enumerate(self.embeddings):
similarity = self.embedder.compute_similarity_vectors(
query_embedding, doc_embedding
)
similarities.append((self.documents[i], similarity))
# Sort by similarity and return top_k
similarities.sort(key=lambda x: x[1], reverse=True)
return similarities[:top_k]
# Usage
search_engine = SemanticSearchEngine()
search_engine.add_documents([
"AbstractCore provides unified LLM access",
"Python is great for machine learning",
"Vector embeddings enable semantic search",
"RAG combines retrieval with generation"
])
results = search_engine.search("How to use LLMs?", top_k=2)
for doc, score in results:
print(f"{doc} (score: {score:.3f})")
RAG Applications
Simple RAG System
from abstractcore import create_llm
from abstractcore.embeddings import EmbeddingManager
class SimpleRAG:
def __init__(self, llm_provider="openai", llm_model="gpt-4o-mini"):
self.llm = create_llm(llm_provider, model=llm_model)
self.embedder = EmbeddingManager(model="sentence-transformers/all-MiniLM-L6-v2")
self.knowledge_base = []
self.embeddings = []
def add_knowledge(self, documents: List[str]):
"""Add documents to knowledge base."""
self.knowledge_base.extend(documents)
new_embeddings = self.embedder.embed_batch(documents)
self.embeddings.extend(new_embeddings)
def retrieve(self, query: str, top_k: int = 3) -> List[str]:
"""Retrieve relevant documents."""
query_embedding = self.embedder.embed(query)
similarities = []
for i, doc_embedding in enumerate(self.embeddings):
similarity = self.embedder.compute_similarity_vectors(
query_embedding, doc_embedding
)
similarities.append((i, similarity))
# Get top_k most similar documents
similarities.sort(key=lambda x: x[1], reverse=True)
return [self.knowledge_base[i] for i, _ in similarities[:top_k]]
def generate(self, query: str) -> str:
"""Generate answer using retrieved context."""
# Retrieve relevant documents
context_docs = self.retrieve(query)
context = "\n\n".join(context_docs)
# Create prompt with context
prompt = f"""Based on the following context, answer the question.
Context:
{context}
Question: {query}
Answer:"""
response = self.llm.generate(prompt)
return response.content
# Usage
rag = SimpleRAG()
# Add knowledge
rag.add_knowledge([
"AbstractCore is a Python library for unified LLM access.",
"It supports OpenAI, Anthropic, Ollama, MLX, and LMStudio.",
"AbstractCore provides tool calling across all providers.",
"The library includes session management and embeddings."
])
# Ask questions
answer = rag.generate("What providers does AbstractCore support?")
print(answer)
Similarity & Clustering
Similarity Matrix
# Compute similarity matrix
texts = [
"Machine learning algorithms",
"Deep learning networks",
"Natural language processing",
"Computer vision systems",
"Web development frameworks"
]
# Generate embeddings
embeddings = embedder.embed_batch(texts)
# Compute similarity matrix
similarity_matrix = embedder.compute_similarities_matrix(embeddings, embeddings)
print("Similarity Matrix:")
for i, text1 in enumerate(texts):
for j, text2 in enumerate(texts):
if i <= j: # Only show upper triangle
similarity = similarity_matrix[i][j]
print(f"{text1[:20]}... vs {text2[:20]}...: {similarity:.3f}")
print()
Automatic Clustering
# Find similar clusters
texts = [
"Python programming language",
"JavaScript for web development",
"Machine learning with Python",
"Deep learning algorithms",
"React JavaScript framework",
"Neural networks and AI",
"Web development with JS",
"Python data science tools"
]
embeddings = embedder.embed_batch(texts)
# Find clusters with similarity threshold
clusters = embedder.find_similar_clusters(
texts,
embeddings,
threshold=0.7 # Similarity threshold
)
print("Discovered clusters:")
for i, cluster in enumerate(clusters):
print(f"Cluster {i+1}:")
for text in cluster:
print(f" - {text}")
print()
Performance Optimization
Caching
# Enable caching for better performance
embedder = EmbeddingManager(
model="sentence-transformers/all-MiniLM-L6-v2",
cache_dir="./embedding_cache", # Custom cache directory
use_cache=True # Enable caching (default)
)
# Embeddings are automatically cached
embedding1 = embedder.embed("This text will be cached")
embedding2 = embedder.embed("This text will be cached") # Retrieved from cache
# Clear cache if needed
embedder.clear_cache()
Batch Processing
# Process large batches efficiently
large_text_collection = [f"Document {i}" for i in range(1000)]
# Process in batches for memory efficiency
batch_size = 32
all_embeddings = []
for i in range(0, len(large_text_collection), batch_size):
batch = large_text_collection[i:i+batch_size]
batch_embeddings = embedder.embed_batch(batch)
all_embeddings.extend(batch_embeddings)
print(f"Processed {min(i+batch_size, len(large_text_collection))}/{len(large_text_collection)} documents")
print(f"Generated {len(all_embeddings)} embeddings")
Model Comparison
# Compare different models
models = [
"sentence-transformers/all-MiniLM-L6-v2", # Fast, lightweight
"sentence-transformers/all-mpnet-base-v2", # Better quality
"BAAI/bge-large-en-v1.5" # SOTA performance
]
test_texts = ["Machine learning", "Artificial intelligence"]
for model_name in models:
embedder = EmbeddingManager(model=model_name)
# Time the embedding generation
import time
start = time.time()
embeddings = embedder.embed_batch(test_texts)
duration = time.time() - start
print(f"Model: {model_name}")
print(f"Dimension: {len(embeddings[0])}")
print(f"Time: {duration:.3f}s")
print(f"Speed: {len(test_texts)/duration:.1f} texts/sec")
print()