Loading...
Loading...
Complete reference for the CostLens SDK - Save 20-40% on AI costs with smart routing.
npm install costlens openai# .env
COSTLENS_API_KEY=cl_your_api_key_here
OPENAI_API_KEY=sk-...
ANTHROPIC_API_KEY=sk-ant-...import OpenAI from 'openai';
import CostLens from 'costlens';
const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });
const costlens = new CostLens({ apiKey: process.env.COSTLENS_API_KEY, enableCache: true });
const tracked = costlens.wrapOpenAI(openai);
const res = await tracked.chat.completions.create({
model: 'gpt-4o-mini',
messages: [{ role: 'user', content: 'Hello' }]
});npm install costlensWorks without API key setup. Provides cost optimization and smart routing for development environments.
import { CostLens } from 'costlens';
import OpenAI from 'openai';
const costlens = new CostLens();
const openai = new OpenAI({ apiKey: 'your-openai-key' });
const ai = costlens.wrapOpenAI(openai);
const response = await ai.chat.completions.create({
model: 'gpt-4',
messages: [{ role: 'user', content: 'What is 2+2?' }]
});
// Check potential savings
const savings = await costlens.calculateSavings('gpt-4', [
{ role: 'user', content: 'What is 2+2?' }
]);
console.log(`Potential savings: ${savings.savingsPercentage}% with ${savings.recommendedModel}`);new CostLens(config?: CostLensConfig)| Name | Type | Required | Description |
|---|---|---|---|
apiKey | string | Yes | Your CostLens API key |
autoOptimize | boolean | No | Cost tracking and analytics (feature in development) |
smartRouting | boolean | No | Route to cheapest model (20x savings) |
enableCache | boolean | No | Cache responses (savings on repeats) |
costLimit | number | No | Max cost per request (prevents overruns) |
autoFallback | boolean | No | Auto-fallback on rate limits |
maxRetries | number | No | Max retry attempts (default: 3) |
baseUrl | string | No | Custom base URL (default: https://api.costlens.dev) |
routingPolicy | function | No | Custom routing decisions |
qualityValidator | function | No | Custom quality scoring |
requestId | string | No | Request tracking ID |
correlationId | string | No | Correlation tracking ID |
const costlens = new CostLens({
apiKey: 'cl_your_api_key_here',
autoOptimize: true,
smartRouting: true,
enableCache: true,
costLimit: 0.10,
autoFallback: true, // Auto-retry on failures
maxRetries: 3, // Retry up to 3 times
// New SDK Features
routingPolicy: (requestedModel, messages) => {
// Custom routing logic
if (messages.length > 10) return 'gpt-4o-mini';
return requestedModel;
},
qualityValidator: (responseText, messagesJson) => {
// Custom quality scoring (1-5)
return responseText.length > 100 ? 5 : 3;
},
requestId: 'req_' + Date.now(),
correlationId: 'session_abc123'
});Wrap your provider clients so CostLens can route, cache and track usage automatically.
import OpenAI from 'openai';
import CostLens from 'costlens';
const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });
const costlens = new CostLens({ apiKey: process.env.COSTLENS_API_KEY, smartRouting: true });
const tracked = costlens.wrapOpenAI(openai);
const res = await tracked.chat.completions.create({
model: 'gpt-4o-mini',
messages: [{ role: 'user', content: 'Hello' }]
});import Anthropic from '@anthropic-ai/sdk';
import CostLens from 'costlens';
const anthropic = new Anthropic({ apiKey: process.env.ANTHROPIC_API_KEY });
const costlens = new CostLens({ apiKey: process.env.COSTLENS_API_KEY, smartRouting: true });
const trackedClaude = costlens.wrapAnthropic(anthropic);
const res = await trackedClaude.messages.create({
model: 'claude-3-haiku',
messages: [{ role: 'user', content: 'Hello' }]
});Return a helpful message and optionally record the failure for visibility.
try {
const start = Date.now();
const result = await tracked.chat.completions.create(params);
await costlens.trackOpenAI(params, result, Date.now() - start, 'prompt-42');
return result;
} catch (err) {
await costlens.trackError('openai', params.model as string, JSON.stringify(params.messages), err as Error, 0);
throw err; // surface to caller
}Track an OpenAI API call.
trackOpenAI(
params: OpenAI.Chat.ChatCompletionCreateParams,
result: OpenAI.Chat.ChatCompletion,
latency: number,
promptId?: string
): Promise<void>params - The parameters passed to OpenAIresult - The response from OpenAIlatency - Time taken in millisecondspromptId - Optional tag to group related promptsconst start = Date.now();
const result = await openai.chat.completions.create(params);
await costlens.trackOpenAI(
params,
result,
Date.now() - start,
'my-prompt-v1' // optional
);Track an Anthropic (Claude) API call.
trackAnthropic(
params: Anthropic.MessageCreateParams,
result: Anthropic.Message,
latency: number,
promptId?: string
): Promise<void>params - The parameters passed to Anthropicresult - The response from Anthropiclatency - Time taken in millisecondspromptId - Optional tag to group related promptsconst start = Date.now();
const result = await anthropic.messages.create(params);
await costlens.trackAnthropic(
params,
result,
Date.now() - start,
'my-prompt-v1' // optional
);Track a failed API call.
trackError(
provider: string,
model: string,
input: string,
error: Error,
latency: number
): Promise<void>provider - The provider (openai, anthropic)model - The model that was attemptedinput - The input that was senterror - The error objectlatency - Time taken before failuretry {
const result = await openai.chat.completions.create(params);
await costlens.trackOpenAI(params, result, latency);
} catch (error) {
await costlens.trackError(
'openai',
params.model,
JSON.stringify(params.messages),
error,
latency
);
throw error;
}Process multiple AI requests in a single call for 3-5x better performance.
trackBatch(
calls: Array<{ provider: string; model: string; tokens: number; latency: number }>
): Promise<void>calls - Array of request data to process in batchprovider - The AI provider (openai, anthropic, etc.)model - The model usedtokens - Number of tokens usedlatency - Request latency in milliseconds// Process multiple requests efficiently
const requests = [
{ provider: 'openai', model: 'gpt-4', tokens: 150, latency: 1200 },
{ provider: 'anthropic', model: 'claude-3', tokens: 200, latency: 1000 },
{ provider: 'openai', model: 'gpt-3.5-turbo', tokens: 100, latency: 800 }
];
// Single batch call - 3-5x faster than individual requests
await costlens.trackBatch(requests);
// Automatic queue management for optimal performance
// SDK automatically batches requests when possibleGet real-time performance metrics and savings data.
getCostAnalytics(): {
cacheHitRate: number; // Cache hit rate (0-1)
totalSavings: number; // Total money saved
averageLatency: number; // Average request latency
errorRate: number; // Error rate (0-1)
}const analytics = costlens.getCostAnalytics();
console.log('Cache Hit Rate:', analytics.cacheHitRate * 100 + '%');
console.log('Total Savings: $' + analytics.totalSavings);
console.log('Average Latency:', analytics.averageLatency + 'ms');
console.log('Error Rate:', analytics.errorRate * 100 + '%');Calculate potential savings before making a request.
calculateSavings(
requestedModel: string,
messages: any[]
): Promise<{
currentCost: number;
optimizedCost: number;
savings: number;
savingsPercentage: number;
recommendedModel: string;
}>const savings = await costlens.calculateSavings('gpt-4', messages);
console.log('Current Cost: $' + savings.currentCost);
console.log('Optimized Cost: $' + savings.optimizedCost);
console.log('Savings: $' + savings.savings);
console.log('Savings %: ' + savings.savingsPercentage + '%');
console.log('Recommended Model: ' + savings.recommendedModel);CostLens currently supports OpenAI and Anthropic APIs with smart routing between models.
// OpenAI routing: GPT-4 → GPT-3.5 for simple tasks
const openaiResult = await costlens.openai.chat.completions.create({
model: 'gpt-4',
messages: [{ role: 'user', content: 'Simple task' }]
});
// Automatically routed to GPT-3.5-turbo (20x cheaper)
// Anthropic routing: Claude Opus → Haiku for simple tasks
const anthropicResult = await costlens.anthropic.messages.create({
model: 'claude-3-opus-20240229',
messages: [{ role: 'user', content: 'Simple task' }]
});
// Automatically routed to Claude Haiku (60x cheaper)CostLens provides ML-powered cost forecasting and routing analytics through the dashboard.
// All analytics available through dashboard API
const stats = await fetch('/api/dashboard/stats', {
headers: { 'Authorization': 'Bearer ' + apiKey }
});
const data = await stats.json();
console.log('Cost forecast:', data.costForecast);
console.log('Routing decisions:', data.routingDecisions);
console.log('Provider stats:', data.providerStats);30-day forecast and optimization tips.
const forecast = await costlens.getCostForecast({ windowDays: 30 });
console.log(forecast.projectedMonthlyCost, forecast.trend, forecast.confidence);
const alerts = await costlens.checkCostAlerts();
console.log(alerts);
const recs = await costlens.getOptimizationRecommendations();
console.log(recs);Override default routing decisions with custom logic based on request context.
const costlens = new CostLens({
apiKey: process.env.COSTLENS_API_KEY,
routingPolicy: (requestedModel, messages) => {
// Route complex queries to better models
const complexity = messages.reduce((acc, msg) => acc + msg.content.length, 0);
if (complexity > 1000) {
return 'gpt-4o'; // Use premium model for complex tasks
}
if (requestedModel === 'gpt-4' && complexity < 100) {
return 'gpt-4o-mini'; // Downgrade simple tasks
}
return requestedModel; // Keep original choice
}
});Implement custom quality scoring to improve routing decisions over time.
const costlens = new CostLens({
apiKey: process.env.COSTLENS_API_KEY,
qualityValidator: (responseText, messagesJson) => {
const messages = JSON.parse(messagesJson);
// Score based on response completeness
let score = 3; // baseline
if (responseText.length > 200) score += 1;
if (responseText.includes('```')) score += 1; // code examples
if (messages.some(m => m.content.includes('?')) &&
responseText.includes('?')) score -= 1; // answered with question
return Math.max(1, Math.min(5, score)); // clamp 1-5
}
});Track related requests across your application with correlation IDs.
// Track user session
const sessionId = 'session_' + userId;
const requestId = 'req_' + Date.now();
const costlens = new CostLens({
apiKey: process.env.COSTLENS_API_KEY,
requestId: requestId,
correlationId: sessionId
});
const tracked = costlens.wrapOpenAI(openai);
// All requests will be tagged with these IDs
await tracked.chat.completions.create({
model: 'gpt-4o-mini',
messages: [{ role: 'user', content: 'Hello' }]
});
// Get analytics using SDK method
const analytics = costlens.getCostAnalytics();
console.log('Cache Hit Rate:', analytics.cacheHitRate * 100 + '%');
console.log('Total Savings: $' + analytics.totalSavings);Automatically cache responses to save money on repeated requests. Achieves strong hit rates in production.
const costlens = new CostLens({
apiKey: process.env.COSTLENS_API_KEY,
enableCache: true, // Enable Redis caching
});
const tracked = costlens.wrapOpenAI(openai);
// First call - cache miss, costs $0.05
await tracked.chat.completions.create({
model: 'gpt-4',
messages: [{ role: 'user', content: 'What is 2+2?' }],
});
// Second call - cache hit, costs $0.00!
await tracked.chat.completions.create({
model: 'gpt-4',
messages: [{ role: 'user', content: 'What is 2+2?' }],
});Smart routing automatically disables if response quality drops below 3.5/5 stars.
// SDK checks quality status before routing
const tracked = costlens.wrapOpenAI(openai);
// If quality is good: GPT-4 → GPT-3.5 (saves money)
// If quality dropped: Uses GPT-4 (protects quality)
await tracked.chat.completions.create({
model: 'gpt-4',
messages: [{ role: 'user', content: 'Complex task...' }],
});
// Note: Quality feedback is handled automatically by the SDK
// The SDK tracks routing decisions and learns from them internallyAutomatically compress prompts by 30-50% while preserving meaning.
const costlens = new CostLens({
apiKey: process.env.COSTLENS_API_KEY,
autoOptimize: true, // Enable AI compression
});
const tracked = costlens.wrapOpenAI(openai);
// Original: 200 tokens
// Optimized: 100 tokens (50% reduction)
// Savings: $0.009 per request
await tracked.chat.completions.create({
model: 'gpt-4',
messages: [{
role: 'user',
content: 'Please kindly help me understand what the weather will be like tomorrow in San Francisco, California, USA'
}],
});
// Compressed to: "Weather forecast for San Francisco tomorrow?"
Track exactly how much money you're saving with baseline cost comparison.
// Calculate potential savings using SDK method
const savings = await costlens.calculateSavings('gpt-4', messages);
console.log(`Current Cost: $${savings.currentCost}`);
console.log(`Optimized Cost: $${savings.optimizedCost}`);
console.log(`Savings: $${savings.savings} (${savings.savingsPercentage.toFixed(1)}%)`);
console.log(`Recommended Model: ${savings.recommendedModel}`);
// Example output:
// Current Cost: $0.15
// Optimized Cost: $0.03
// Savings: $0.12 (80.0%)
// Recommended Model: gpt-3.5-turbointerface CostLensConfig {
apiKey: string;
baseUrl?: string;
}# .env
COSTLENS_API_KEY=cl_your_api_key_here
OPENAI_API_KEY=sk-your_openai_key
ANTHROPIC_API_KEY=sk-ant-your_anthropic_keyCOSTLENS_API_KEY and header formatting.