# Production-Ready Examples Real-world examples of using Outlines for structured generation in production systems. ## Table of Contents - Data Extraction - Classification Systems - Form Processing - Multi-Entity Extraction - Code Generation - Batch Processing - Production Patterns ## Data Extraction ### Basic Information Extraction ```python from pydantic import BaseModel, Field import outlines class PersonInfo(BaseModel): name: str = Field(description="Full name") age: int = Field(ge=0, le=120) occupation: str email: str = Field(pattern=r"^[\w\.-]+@[\w\.-]+\.\w+$") location: str model = outlines.models.transformers("microsoft/Phi-3-mini-4k-instruct") generator = outlines.generate.json(model, PersonInfo) text = """ Dr. Sarah Johnson is a 42-year-old research scientist at MIT. She can be reached at sarah.j@mit.edu and currently lives in Cambridge, MA. """ prompt = f"Extract person information from:\n{text}\n\nPerson:" person = generator(prompt) print(f"Name: {person.name}") print(f"Age: {person.age}") print(f"Occupation: {person.occupation}") print(f"Email: {person.email}") print(f"Location: {person.location}") ``` ### Company Information ```python class CompanyInfo(BaseModel): name: str founded_year: int = Field(ge=1800, le=2025) industry: str headquarters: str employees: int = Field(gt=0) revenue: Optional[str] = None model = outlines.models.transformers("meta-llama/Llama-3.1-8B-Instruct") generator = outlines.generate.json(model, CompanyInfo) text = """ Tesla, Inc. was founded in 2003 and operates primarily in the automotive and energy industries. The company is headquartered in Austin, Texas, and employs approximately 140,000 people worldwide. """ company = generator(f"Extract company information:\n{text}\n\nCompany:") print(f"Company: {company.name}") print(f"Founded: {company.founded_year}") print(f"Industry: {company.industry}") print(f"HQ: {company.headquarters}") print(f"Employees: {company.employees:,}") ``` ### Product Specifications ```python class ProductSpec(BaseModel): name: str brand: str price: float = Field(gt=0) dimensions: str weight: str features: list[str] rating: Optional[float] = Field(None, ge=0, le=5) generator = outlines.generate.json(model, ProductSpec) text = """ The Apple iPhone 15 Pro is priced at $999. It measures 146.6 x 70.6 x 8.25 mm and weighs 187 grams. Key features include the A17 Pro chip, titanium design, action button, and USB-C port. It has an average customer rating of 4.5 stars. """ product = generator(f"Extract product specifications:\n{text}\n\nProduct:") print(f"Product: {product.brand} {product.name}") print(f"Price: ${product.price}") print(f"Features: {', '.join(product.features)}") ``` ## Classification Systems ### Sentiment Analysis ```python from typing import Literal from enum import Enum class Sentiment(str, Enum): VERY_POSITIVE = "very_positive" POSITIVE = "positive" NEUTRAL = "neutral" NEGATIVE = "negative" VERY_NEGATIVE = "very_negative" class SentimentAnalysis(BaseModel): text: str sentiment: Sentiment confidence: float = Field(ge=0.0, le=1.0) aspects: list[str] # What aspects were mentioned reasoning: str model = outlines.models.transformers("microsoft/Phi-3-mini-4k-instruct") generator = outlines.generate.json(model, SentimentAnalysis) review = """ This product completely exceeded my expectations! The build quality is outstanding, and customer service was incredibly helpful. My only minor complaint is the packaging could be better. """ result = generator(f"Analyze sentiment:\n{review}\n\nAnalysis:") print(f"Sentiment: {result.sentiment.value}") print(f"Confidence: {result.confidence:.2%}") print(f"Aspects: {', '.join(result.aspects)}") print(f"Reasoning: {result.reasoning}") ``` ### Content Classification ```python class Category(str, Enum): TECHNOLOGY = "technology" BUSINESS = "business" SCIENCE = "science" POLITICS = "politics" ENTERTAINMENT = "entertainment" SPORTS = "sports" HEALTH = "health" class ArticleClassification(BaseModel): primary_category: Category secondary_categories: list[Category] keywords: list[str] = Field(min_items=3, max_items=10) target_audience: Literal["general", "expert", "beginner"] reading_level: Literal["elementary", "intermediate", "advanced"] generator = outlines.generate.json(model, ArticleClassification) article = """ Apple announced groundbreaking advancements in its AI capabilities with the release of iOS 18. The new features leverage machine learning to significantly improve battery life and overall device performance. Industry analysts predict this will strengthen Apple's position in the competitive smartphone market. """ classification = generator(f"Classify article:\n{article}\n\nClassification:") print(f"Primary: {classification.primary_category.value}") print(f"Secondary: {[c.value for c in classification.secondary_categories]}") print(f"Keywords: {classification.keywords}") print(f"Audience: {classification.target_audience}") ``` ### Intent Recognition ```python class Intent(str, Enum): QUESTION = "question" COMPLAINT = "complaint" REQUEST = "request" FEEDBACK = "feedback" CANCEL = "cancel" UPGRADE = "upgrade" class UserMessage(BaseModel): original_message: str intent: Intent urgency: Literal["low", "medium", "high", "critical"] department: Literal["support", "sales", "billing", "technical"] sentiment: Literal["positive", "neutral", "negative"] action_required: bool summary: str generator = outlines.generate.json(model, UserMessage) message = """ I've been charged twice for my subscription this month! This is the third time this has happened. I need someone to fix this immediately and refund the extra charge. Very disappointed with this service. """ result = generator(f"Analyze message:\n{message}\n\nAnalysis:") print(f"Intent: {result.intent.value}") print(f"Urgency: {result.urgency}") print(f"Route to: {result.department}") print(f"Action required: {result.action_required}") print(f"Summary: {result.summary}") ``` ## Form Processing ### Job Application ```python class Education(BaseModel): degree: str field: str institution: str year: int class Experience(BaseModel): title: str company: str duration: str responsibilities: list[str] class JobApplication(BaseModel): full_name: str email: str phone: str education: list[Education] experience: list[Experience] skills: list[str] availability: str model = outlines.models.transformers("meta-llama/Llama-3.1-8B-Instruct") generator = outlines.generate.json(model, JobApplication) resume_text = """ John Smith Email: john.smith@email.com | Phone: 555-0123 EDUCATION - BS in Computer Science, MIT, 2018 - MS in Artificial Intelligence, Stanford, 2020 EXPERIENCE Software Engineer, Google (2020-2023) - Developed ML pipelines for search ranking - Led team of 5 engineers - Improved search quality by 15% SKILLS: Python, Machine Learning, TensorFlow, System Design AVAILABILITY: Immediate """ application = generator(f"Extract job application:\n{resume_text}\n\nApplication:") print(f"Applicant: {application.full_name}") print(f"Email: {application.email}") print(f"Education: {len(application.education)} degrees") for edu in application.education: print(f" - {edu.degree} in {edu.field}, {edu.institution} ({edu.year})") print(f"Experience: {len(application.experience)} positions") ``` ### Invoice Processing ```python class InvoiceItem(BaseModel): description: str quantity: int = Field(gt=0) unit_price: float = Field(gt=0) total: float = Field(gt=0) class Invoice(BaseModel): invoice_number: str date: str = Field(pattern=r"\d{4}-\d{2}-\d{2}") vendor: str customer: str items: list[InvoiceItem] subtotal: float = Field(gt=0) tax: float = Field(ge=0) total: float = Field(gt=0) generator = outlines.generate.json(model, Invoice) invoice_text = """ INVOICE #INV-2024-001 Date: 2024-01-15 From: Acme Corp To: Smith & Co Items: - Widget A: 10 units @ $50.00 = $500.00 - Widget B: 5 units @ $75.00 = $375.00 - Service Fee: 1 @ $100.00 = $100.00 Subtotal: $975.00 Tax (8%): $78.00 TOTAL: $1,053.00 """ invoice = generator(f"Extract invoice:\n{invoice_text}\n\nInvoice:") print(f"Invoice: {invoice.invoice_number}") print(f"From: {invoice.vendor} → To: {invoice.customer}") print(f"Items: {len(invoice.items)}") for item in invoice.items: print(f" - {item.description}: {item.quantity} × ${item.unit_price} = ${item.total}") print(f"Total: ${invoice.total}") ``` ### Survey Responses ```python class SurveyResponse(BaseModel): respondent_id: str completion_date: str satisfaction: Literal[1, 2, 3, 4, 5] would_recommend: bool favorite_features: list[str] improvement_areas: list[str] additional_comments: Optional[str] = None generator = outlines.generate.json(model, SurveyResponse) survey_text = """ Survey ID: RESP-12345 Completed: 2024-01-20 How satisfied are you with our product? 4 out of 5 Would you recommend to a friend? Yes What features do you like most? - Fast performance - Easy to use - Great customer support What could we improve? - Better documentation - More integrations Additional feedback: Overall great product, keep up the good work! """ response = generator(f"Extract survey response:\n{survey_text}\n\nResponse:") print(f"Respondent: {response.respondent_id}") print(f"Satisfaction: {response.satisfaction}/5") print(f"Would recommend: {response.would_recommend}") print(f"Favorite features: {response.favorite_features}") print(f"Improvement areas: {response.improvement_areas}") ``` ## Multi-Entity Extraction ### News Article Entities ```python class Person(BaseModel): name: str role: Optional[str] = None affiliation: Optional[str] = None class Organization(BaseModel): name: str type: Optional[str] = None class Location(BaseModel): name: str type: Literal["city", "state", "country", "region"] class Event(BaseModel): name: str date: Optional[str] = None location: Optional[str] = None class ArticleEntities(BaseModel): people: list[Person] organizations: list[Organization] locations: list[Location] events: list[Event] dates: list[str] model = outlines.models.transformers("meta-llama/Llama-3.1-8B-Instruct") generator = outlines.generate.json(model, ArticleEntities) article = """ Apple CEO Tim Cook met with Microsoft CEO Satya Nadella at Microsoft headquarters in Redmond, Washington on September 15, 2024, to discuss potential collaboration opportunities. The meeting was attended by executives from both companies and focused on AI integration strategies. Apple's Cupertino offices will host a follow-up meeting on October 20, 2024. """ entities = generator(f"Extract all entities:\n{article}\n\nEntities:") print("People:") for person in entities.people: print(f" - {person.name} ({person.role}) @ {person.affiliation}") print("\nOrganizations:") for org in entities.organizations: print(f" - {org.name} ({org.type})") print("\nLocations:") for loc in entities.locations: print(f" - {loc.name} ({loc.type})") print("\nEvents:") for event in entities.events: print(f" - {event.name} on {event.date}") ``` ### Document Metadata ```python class Author(BaseModel): name: str email: Optional[str] = None affiliation: Optional[str] = None class Reference(BaseModel): title: str authors: list[str] year: int source: str class DocumentMetadata(BaseModel): title: str authors: list[Author] abstract: str keywords: list[str] publication_date: str journal: str doi: Optional[str] = None references: list[Reference] generator = outlines.generate.json(model, DocumentMetadata) paper = """ Title: Advances in Neural Machine Translation Authors: - Dr. Jane Smith (jane@university.edu), MIT - Prof. John Doe (jdoe@stanford.edu), Stanford University Abstract: This paper presents novel approaches to neural machine translation using transformer architectures. We demonstrate significant improvements in translation quality across multiple language pairs. Keywords: Neural Networks, Machine Translation, Transformers, NLP Published: Journal of AI Research, 2024-03-15 DOI: 10.1234/jair.2024.001 References: 1. "Attention Is All You Need" by Vaswani et al., 2017, NeurIPS 2. "BERT: Pre-training of Deep Bidirectional Transformers" by Devlin et al., 2019, NAACL """ metadata = generator(f"Extract document metadata:\n{paper}\n\nMetadata:") print(f"Title: {metadata.title}") print(f"Authors: {', '.join(a.name for a in metadata.authors)}") print(f"Keywords: {', '.join(metadata.keywords)}") print(f"References: {len(metadata.references)}") ``` ## Code Generation ### Python Function Generation ```python class Parameter(BaseModel): name: str = Field(pattern=r"^[a-z_][a-z0-9_]*$") type_hint: str default: Optional[str] = None class PythonFunction(BaseModel): function_name: str = Field(pattern=r"^[a-z_][a-z0-9_]*$") parameters: list[Parameter] return_type: str docstring: str body: list[str] # Lines of code model = outlines.models.transformers("microsoft/Phi-3-mini-4k-instruct") generator = outlines.generate.json(model, PythonFunction) spec = "Create a function to calculate the factorial of a number" func = generator(f"Generate Python function:\n{spec}\n\nFunction:") print(f"def {func.function_name}(", end="") print(", ".join(f"{p.name}: {p.type_hint}" for p in func.parameters), end="") print(f") -> {func.return_type}:") print(f' """{func.docstring}"""') for line in func.body: print(f" {line}") ``` ### SQL Query Generation ```python class SQLQuery(BaseModel): query_type: Literal["SELECT", "INSERT", "UPDATE", "DELETE"] select_columns: Optional[list[str]] = None from_tables: list[str] joins: Optional[list[str]] = None where_conditions: Optional[list[str]] = None group_by: Optional[list[str]] = None order_by: Optional[list[str]] = None limit: Optional[int] = None generator = outlines.generate.json(model, SQLQuery) request = "Get top 10 users who made purchases in the last 30 days, ordered by total spent" sql = generator(f"Generate SQL query:\n{request}\n\nQuery:") print(f"Query type: {sql.query_type}") print(f"SELECT {', '.join(sql.select_columns)}") print(f"FROM {', '.join(sql.from_tables)}") if sql.joins: for join in sql.joins: print(f" {join}") if sql.where_conditions: print(f"WHERE {' AND '.join(sql.where_conditions)}") if sql.order_by: print(f"ORDER BY {', '.join(sql.order_by)}") if sql.limit: print(f"LIMIT {sql.limit}") ``` ### API Endpoint Spec ```python class Parameter(BaseModel): name: str type: str required: bool description: str class APIEndpoint(BaseModel): method: Literal["GET", "POST", "PUT", "DELETE", "PATCH"] path: str description: str parameters: list[Parameter] request_body: Optional[dict] = None response_schema: dict status_codes: dict[int, str] generator = outlines.generate.json(model, APIEndpoint) spec = "Create user endpoint" endpoint = generator(f"Generate API endpoint:\n{spec}\n\nEndpoint:") print(f"{endpoint.method} {endpoint.path}") print(f"Description: {endpoint.description}") print("\nParameters:") for param in endpoint.parameters: req = "required" if param.required else "optional" print(f" - {param.name} ({param.type}, {req}): {param.description}") ``` ## Batch Processing ### Parallel Extraction ```python def batch_extract(texts: list[str], schema: type[BaseModel], model_name: str): """Extract structured data from multiple texts.""" model = outlines.models.transformers(model_name) generator = outlines.generate.json(model, schema) results = [] for i, text in enumerate(texts): print(f"Processing {i+1}/{len(texts)}...", end="\r") result = generator(f"Extract:\n{text}\n\nData:") results.append(result) return results class Product(BaseModel): name: str price: float category: str texts = [ "iPhone 15 Pro costs $999 in Electronics", "Running Shoes are $89.99 in Sports", "Coffee Maker priced at $49.99 in Home & Kitchen" ] products = batch_extract(texts, Product, "microsoft/Phi-3-mini-4k-instruct") for product in products: print(f"{product.name}: ${product.price} ({product.category})") ``` ### CSV Processing ```python import csv def process_csv(csv_file: str, schema: type[BaseModel]): """Process CSV file and extract structured data.""" model = outlines.models.transformers("microsoft/Phi-3-mini-4k-instruct") generator = outlines.generate.json(model, schema) results = [] with open(csv_file, 'r') as f: reader = csv.DictReader(f) for row in reader: text = " | ".join(f"{k}: {v}" for k, v in row.items()) result = generator(f"Extract:\n{text}\n\nData:") results.append(result) return results class Customer(BaseModel): name: str email: str tier: Literal["basic", "premium", "enterprise"] mrr: float # customers = process_csv("customers.csv", Customer) ``` ## Production Patterns ### Error Handling ```python from pydantic import ValidationError def safe_extract(text: str, schema: type[BaseModel], retries: int = 3): """Extract with error handling and retries.""" model = outlines.models.transformers("microsoft/Phi-3-mini-4k-instruct") generator = outlines.generate.json(model, schema) for attempt in range(retries): try: result = generator(f"Extract:\n{text}\n\nData:") return result except ValidationError as e: print(f"Attempt {attempt + 1} failed: {e}") if attempt == retries - 1: raise except Exception as e: print(f"Unexpected error: {e}") if attempt == retries - 1: raise return None ``` ### Caching ```python from functools import lru_cache import hashlib @lru_cache(maxsize=1000) def cached_extract(text_hash: str, schema_name: str): """Cache extraction results.""" # This would be called with actual extraction logic pass def extract_with_cache(text: str, schema: type[BaseModel]): """Extract with caching.""" text_hash = hashlib.md5(text.encode()).hexdigest() schema_name = schema.__name__ cached_result = cached_extract(text_hash, schema_name) if cached_result: return cached_result # Perform actual extraction model = outlines.models.transformers("microsoft/Phi-3-mini-4k-instruct") generator = outlines.generate.json(model, schema) result = generator(f"Extract:\n{text}\n\nData:") return result ``` ### Monitoring ```python import time import logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) def monitored_extract(text: str, schema: type[BaseModel]): """Extract with monitoring and logging.""" start_time = time.time() try: model = outlines.models.transformers("microsoft/Phi-3-mini-4k-instruct") generator = outlines.generate.json(model, schema) result = generator(f"Extract:\n{text}\n\nData:") elapsed = time.time() - start_time logger.info(f"Extraction succeeded in {elapsed:.2f}s") logger.info(f"Input length: {len(text)} chars") return result except Exception as e: elapsed = time.time() - start_time logger.error(f"Extraction failed after {elapsed:.2f}s: {e}") raise ``` ### Rate Limiting ```python import time from threading import Lock class RateLimiter: def __init__(self, max_requests: int, time_window: int): self.max_requests = max_requests self.time_window = time_window self.requests = [] self.lock = Lock() def wait_if_needed(self): with self.lock: now = time.time() # Remove old requests self.requests = [r for r in self.requests if now - r < self.time_window] if len(self.requests) >= self.max_requests: sleep_time = self.time_window - (now - self.requests[0]) time.sleep(sleep_time) self.requests = [] self.requests.append(now) def rate_limited_extract(texts: list[str], schema: type[BaseModel]): """Extract with rate limiting.""" limiter = RateLimiter(max_requests=10, time_window=60) # 10 req/min model = outlines.models.transformers("microsoft/Phi-3-mini-4k-instruct") generator = outlines.generate.json(model, schema) results = [] for text in texts: limiter.wait_if_needed() result = generator(f"Extract:\n{text}\n\nData:") results.append(result) return results ``` ## Resources - **Outlines Documentation**: https://outlines-dev.github.io/outlines - **Pydantic Documentation**: https://docs.pydantic.dev - **GitHub Examples**: https://github.com/outlines-dev/outlines/tree/main/examples