LLM Integration
Shipfastai supports multiple LLM providers out of the box.
Supported Providers
•OpenAI (GPT-4, GPT-3.5)
•Anthropic (Claude)
•Local models (Ollama)
OpenAI Integration
Basic Usage
from openai import AsyncOpenAI
client = AsyncOpenAI()
async def generate(prompt: str) -> str:
response = await client.chat.completions.create(
model="gpt-4-turbo",
messages=[{"role": "user", "content": prompt}]
)
return response.choices[0].message.content
Streaming Responses
async def stream_generate(prompt: str):
stream = await client.chat.completions.create(
model="gpt-4-turbo",
messages=[{"role": "user", "content": prompt}],
stream=True
)
async for chunk in stream:
if chunk.choices[0].delta.content:
yield chunk.choices[0].delta.content
Anthropic Integration
from anthropic import AsyncAnthropic
client = AsyncAnthropic()
async def generate_claude(prompt: str) -> str:
message = await client.messages.create(
model="claude-3-opus-20240229",
max_tokens=1024,
messages=[{"role": "user", "content": prompt}]
)
return message.content[0].text
Model Selection
Choose the right model for your use case:
def select_model(task: str) -> str:
if task == "code":
return "gpt-4-turbo"
elif task == "simple":
return "gpt-3.5-turbo"
elif task == "creative":
return "claude-3-opus-20240229"
return "gpt-4-turbo"
Cost Optimization
Caching
from functools import lru_cache
import hashlib
@lru_cache(maxsize=1000)
async def cached_generate(prompt_hash: str):
# Generate and cache response
pass
Token Limits
response = await client.chat.completions.create(
model="gpt-4-turbo",
messages=messages,
max_tokens=500, # Limit response length
temperature=0.7
)
Error Handling
from openai import RateLimitError, APIError
async def safe_generate(prompt: str):
try:
return await generate(prompt)
except RateLimitError:
await asyncio.sleep(60)
return await generate(prompt)
except APIError as e:
logger.error(f"API error: {e}")
raise