API Reference
Complete reference for the AbstractCore API. All examples work across any provider.
Table of Contents
Core Functions
create_llm()
Creates an LLM provider instance using the factory pattern.
def create_llm(
provider: str,
model: Optional[str] = None,
retry_config: Optional[RetryConfig] = None,
**kwargs
) -> AbstractCoreInterface
Parameters
provider
(str): Provider name ("openai", "anthropic", "ollama", "mlx", "lmstudio", "huggingface")model
(str, optional): Model name. If not provided, uses provider defaultretry_config
(RetryConfig, optional): Custom retry configurationtool_call_tags
(str or tuple, optional): Custom tool call format for agent CLI compatibility**kwargs
: Provider-specific parameters
Unified Token Management Parameters
api_key
(str): API key for cloud providersbase_url
(str): Custom endpoint URLtemperature
(float): Sampling temperature (0-2)max_tokens
(int): Context window (input + output) - unified across providersmax_output_tokens
(int): Maximum output tokens - unified parametermax_input_tokens
(int): Maximum input tokens (auto-calculated if not set)timeout
(int): Request timeout in secondstop_p
(float): Nucleus sampling parameter
Returns
AbstractCoreInterface
instance
Example
from abstractcore import create_llm
# Basic usage
llm = create_llm("openai", model="gpt-4o-mini")
# With unified token management
llm = create_llm(
"anthropic",
model="claude-3-5-haiku-latest",
temperature=0.7,
max_tokens=32000, # Context window (input + output)
max_output_tokens=8000, # Maximum output tokens
max_input_tokens=24000, # Maximum input tokens (auto-calculated if not set)
timeout=30
)
# With tool syntax rewriting for agent CLI compatibility
llm = create_llm(
"ollama",
model="qwen3-coder:30b",
tool_call_tags="function_call" # Converts to ...JSON...
)
# Local provider
llm = create_llm("ollama", model="qwen2.5-coder:7b", base_url="http://localhost:11434")
Classes
AbstractCoreInterface
Base interface for all LLM providers. All providers implement this interface.
generate()
Generate text response from the LLM with optional media attachments.
def generate(
self,
prompt: str,
messages: Optional[List[Dict]] = None,
system_prompt: Optional[str] = None,
media: Optional[List[str]] = None,
tools: Optional[List[Callable]] = None,
response_model: Optional[BaseModel] = None,
retry_strategy: Optional[Retry] = None,
stream: bool = False,
**kwargs
) -> Union[GenerateResponse, Iterator[GenerateResponse]]
Parameters
prompt
(str): Text prompt to generate frommessages
(List[Dict], optional): Conversation messages in OpenAI formatsystem_prompt
(str, optional): System prompt to set contextmedia
(List[str], optional): File paths to attach (images, PDFs, documents, data files)tools
(List[Callable], optional): Functions the LLM can call (using @tool decorator)response_model
(BaseModel, optional): Pydantic model for structured outputretry_strategy
(Retry, optional): Custom retry strategy for structured outputstream
(bool): Enable streaming response**kwargs
: Additional generation parameters
Media Handling Example
# Attach images, documents, and data files
from abstractcore import create_llm
llm = create_llm("openai", model="gpt-4o")
# Single image
response = llm.generate(
"What's in this image?",
media=["photo.jpg"]
)
# Multiple files (images, PDFs, Office docs, data)
response = llm.generate(
"Compare the chart with the data and summarize the document",
media=["chart.png", "data.csv", "report.pdf"]
)
# Supported file types:
# Images: PNG, JPEG, GIF, WEBP, BMP, TIFF
# Documents: PDF, DOCX, XLSX, PPTX
# Data: CSV, TSV, TXT, MD, JSON
Returns
- If
stream=False
: GenerateResponse - If
stream=True
: Iterator[GenerateResponse]
unload()
Unload model from memory (local providers only).
# Local providers support memory management
llm = create_llm("ollama", model="large-model")
response = llm.generate("Hello")
llm.unload() # Explicitly free memory
del llm
# API providers (OpenAI, Anthropic) - unload() is safe but no-op
api_llm = create_llm("openai", model="gpt-4o-mini")
api_llm.unload() # Safe to call, but has no effect
GenerateResponse
Response object from LLM generation.
@dataclass
class GenerateResponse:
content: Optional[str] # Generated text content
raw_response: Any # Raw provider response
model: Optional[str] # Model used for generation
finish_reason: Optional[str] # Why generation stopped
usage: Optional[Dict[str, int]] # Token usage information
tool_calls: Optional[List[Dict]] # Tools called by the LLM
metadata: Optional[Dict] # Additional metadata
Attributes
content
(str): Generated text contentraw_response
(Any): Raw provider responsemodel
(str): Model used for generationfinish_reason
(str): Why generation stopped ("stop", "length", "tool_calls")usage
(Dict): Token usage informationtool_calls
(List[Dict]): Tools called by the LLMmetadata
(Dict): Additional metadata
BasicSession
Manages conversation context and history.
class BasicSession:
def __init__(
self,
provider: AbstractCoreInterface,
system_prompt: Optional[str] = None
):
Parameters
provider
(AbstractCoreInterface): LLM provider instancesystem_prompt
(str, optional): System prompt for the conversation
Methods
generate()
def generate(self, prompt: str, **kwargs) -> GenerateResponse
Generate response and add to conversation history.
add_message()
def add_message(self, role: str, content: str, **metadata) -> Message
Add message to conversation history with optional metadata.
save()
def save(self, filepath: Path, summary: bool = False, assessment: bool = False, facts: bool = False) -> None
Save session to JSON file with optional analytics.
load()
@classmethod
def load(cls, filepath: Path, provider: AbstractCoreInterface) -> "BasicSession"
Load session from JSON file.
Example
from abstractcore import create_llm, BasicSession
llm = create_llm("openai", model="gpt-4o-mini")
session = BasicSession(
provider=llm,
system_prompt="You are a helpful coding tutor."
)
# Multi-turn conversation with memory
response1 = session.generate("My name is Alice and I'm learning Python.")
response2 = session.generate("What's my name and what am I learning?")
# Output: Your name is Alice and you're learning Python.
# Save with analytics
session.save(
'tutoring_session.json',
summary=True, # Generate conversation summary
assessment=True, # Assess learning progress
facts=True # Extract key facts learned
)
# Load and continue later
loaded_session = BasicSession.load('tutoring_session.json', provider=llm)
Event System
Comprehensive observability and control through events.
Event Types
GENERATION_STARTED
: Before LLM generation beginsGENERATION_COMPLETED
: After LLM generation completesTOOL_STARTED
: Before tool execution (preventable)TOOL_COMPLETED
: After tool execution completesERROR_OCCURRED
: When an error occursRETRY_ATTEMPTED
: When a retry is attempted
Event Registration
from abstractcore.events import EventType, on_global
# Cost monitoring
def monitor_costs(event):
if event.cost_usd and event.cost_usd > 0.10:
alert(f"High cost request: ${event.cost_usd}")
# Security control
def prevent_dangerous_tools(event):
for call in event.data.get('tool_calls', []):
if call.name in ['delete_file', 'system_command']:
event.prevent() # Stop tool execution
# Performance tracking
def track_performance(event):
if event.duration_ms > 10000:
log(f"Slow request: {event.duration_ms}ms")
# Register event handlers
on_global(EventType.GENERATION_COMPLETED, monitor_costs)
on_global(EventType.TOOL_STARTED, prevent_dangerous_tools)
on_global(EventType.GENERATION_COMPLETED, track_performance)
Retry Configuration
Production-grade error handling with multiple layers.
from abstractcore import create_llm
from abstractcore.core.retry import RetryConfig
config = RetryConfig(
max_attempts=3, # Try up to 3 times
initial_delay=1.0, # Start with 1 second delay
max_delay=60.0, # Cap at 1 minute
use_jitter=True, # Add randomness
failure_threshold=5, # Circuit breaker after 5 failures
recovery_timeout=60.0 # Test recovery after 1 minute
)
llm = create_llm("openai", model="gpt-4o-mini", retry_config=config)
Token Management
Unified token parameters and budget validation across all providers.
from abstractcore import create_llm
from abstractcore.utils.token_utils import estimate_tokens
# Unified token parameters work across ALL providers
llm = create_llm(
"anthropic",
model="claude-3-5-haiku-latest",
max_tokens=32000, # Context window (input + output)
max_output_tokens=8000, # Maximum output tokens
max_input_tokens=24000 # Maximum input tokens (auto-calculated if not set)
)
# Token estimation and validation
text = "Your input text here..."
estimated = estimate_tokens(text, model="claude-3-5-haiku-latest")
print(f"Estimated tokens: {estimated}")
# Budget validation with warnings
response = llm.generate("Write a detailed analysis...")
print(f"Input tokens: {response.usage.input_tokens}")
print(f"Output tokens: {response.usage.output_tokens}")
print(f"Cost estimate: ${response.usage.cost_usd:.4f}")
Production Resilience
Advanced retry logic, circuit breakers, and event-driven monitoring.
from abstractcore import create_llm
from abstractcore.resilience import RetryManager, CircuitBreaker
from abstractcore.events import EventType, on_global
# Production resilience with retry and circuit breaker
llm = create_llm(
"openai",
model="gpt-4o-mini",
retry_manager=RetryManager(max_attempts=3, backoff_strategy="exponential"),
circuit_breaker=CircuitBreaker(failure_threshold=5, timeout=60)
)
# Cost monitoring
def cost_monitor(event):
if event.cost_usd and event.cost_usd > 0.10:
alert(f"High cost request: ${event.cost_usd}")
on_global(EventType.GENERATION_COMPLETED, cost_monitor)
# Memory management for local models
local_llm = create_llm("ollama", model="large-model")
response = local_llm.generate("Hello")
local_llm.unload() # Free memory
Embeddings
Built-in support for semantic search and RAG applications.
from abstractcore.embeddings import EmbeddingManager
from abstractcore import create_llm
# Initialize components
embedder = EmbeddingManager()
llm = create_llm("openai", model="gpt-4o-mini")
# Documents to search
documents = [
"Python is great for data science and machine learning.",
"JavaScript powers modern web applications.",
"Rust ensures memory safety without garbage collection."
]
# Create embeddings
doc_embeddings = embedder.embed_batch(documents)
# User query
query = "Tell me about web development"
query_embedding = embedder.embed(query)
# Find most similar document
similarities = [
embedder.compute_similarity(query_embedding, doc_emb)
for doc_emb in doc_embeddings
]
best_doc_idx = similarities.index(max(similarities))
context = documents[best_doc_idx]
# Generate response with context
response = llm.generate(
f"Context: {context}\n\nQuestion: {query}\n\nAnswer:"
)
print(response.content)
Exceptions
Comprehensive exception hierarchy for robust applications.
from abstractcore.exceptions import (
ModelNotFoundError,
ProviderAPIError,
AuthenticationError,
RateLimitError,
TimeoutError,
ToolExecutionError
)
try:
response = llm.generate("Hello", tools=[some_tool])
except AuthenticationError:
print("Invalid API key")
except RateLimitError:
print("Rate limit exceeded, retrying...")
except ModelNotFoundError:
print("Model not available")
except ToolExecutionError as e:
print(f"Tool execution failed: {e.tool_name}")
except ProviderAPIError as e:
print(f"Provider error: {e}")