Using ChatLiteLLM() - Langchain
Pre-Requisitesโ
!pip install litellm langchain
Quick Startโ
- OpenAI
- Anthropic
- Replicate
- Cohere
import os
from langchain_community.chat_models import ChatLiteLLM
from langchain_core.prompts import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    AIMessagePromptTemplate,
    HumanMessagePromptTemplate,
)
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
os.environ['OPENAI_API_KEY'] = ""
chat = ChatLiteLLM(model="gpt-3.5-turbo")
messages = [
    HumanMessage(
        content="what model are you"
    )
]
chat.invoke(messages)
import os
from langchain_community.chat_models import ChatLiteLLM
from langchain_core.prompts import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    AIMessagePromptTemplate,
    HumanMessagePromptTemplate,
)
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
os.environ['ANTHROPIC_API_KEY'] = ""
chat = ChatLiteLLM(model="claude-2", temperature=0.3)
messages = [
    HumanMessage(
        content="what model are you"
    )
]
chat.invoke(messages)
import os
from langchain_community.chat_models import ChatLiteLLM
from langchain_core.prompts.chat import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    AIMessagePromptTemplate,
    HumanMessagePromptTemplate,
)
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
os.environ['REPLICATE_API_TOKEN'] = ""
chat = ChatLiteLLM(model="replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1")
messages = [
    HumanMessage(
        content="what model are you?"
    )
]
chat.invoke(messages)
import os
from langchain_community.chat_models import ChatLiteLLM
from langchain_core.prompts import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    AIMessagePromptTemplate,
    HumanMessagePromptTemplate,
)
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
os.environ['COHERE_API_KEY'] = ""
chat = ChatLiteLLM(model="command-nightly")
messages = [
    HumanMessage(
        content="what model are you?"
    )
]
chat.invoke(messages)
Use Langchain ChatLiteLLM with MLflowโ
MLflow provides open-source observability solution for ChatLiteLLM.
To enable the integration, simply call mlflow.litellm.autolog() before in your code. No other setup is necessary.
import mlflow
mlflow.litellm.autolog()
Once the auto-tracing is enabled, you can invoke ChatLiteLLM and see recorded traces in MLflow.
import os
from langchain.chat_models import ChatLiteLLM
os.environ['OPENAI_API_KEY']="sk-..."
chat = ChatLiteLLM(model="gpt-4o-mini")
chat.invoke("Hi!")
Use Langchain ChatLiteLLM with Lunaryโ
import os
from langchain.chat_models import ChatLiteLLM
from langchain.schema import HumanMessage
import litellm
os.environ["LUNARY_PUBLIC_KEY"] = "" # from https://app.lunary.ai/settings
os.environ['OPENAI_API_KEY']="sk-..."
litellm.success_callback = ["lunary"] 
litellm.failure_callback = ["lunary"] 
chat = ChatLiteLLM(
  model="gpt-4o"
  messages = [
    HumanMessage(
        content="what model are you"
    )
]
chat(messages)
Get more details here
Use LangChain ChatLiteLLM + Langfuseโ
Checkout this section here for more details on how to integrate Langfuse with ChatLiteLLM.
Using Tags with LangChain and LiteLLMโ
Tags are a powerful feature in LiteLLM that allow you to categorize, filter, and track your LLM requests. When using LangChain with LiteLLM, you can pass tags through the extra_body parameter in the metadata.
Basic Tag Usageโ
- OpenAI
- Anthropic
- LiteLLM Proxy
import os
from langchain_openai import ChatOpenAI
from langchain_core.messages import HumanMessage, SystemMessage
os.environ['OPENAI_API_KEY'] = "sk-your-key-here"
chat = ChatOpenAI(
    model="gpt-4o",
    temperature=0.7,
    extra_body={
        "metadata": {
            "tags": ["production", "customer-support", "high-priority"]
        }
    }
)
messages = [
    SystemMessage(content="You are a helpful customer support assistant."),
    HumanMessage(content="How do I reset my password?")
]
response = chat.invoke(messages)
print(response)
import os
from langchain_openai import ChatOpenAI
from langchain_core.messages import HumanMessage, SystemMessage
os.environ['ANTHROPIC_API_KEY'] = "sk-ant-your-key-here"
chat = ChatOpenAI(
    model="claude-3-sonnet-20240229",
    temperature=0.7,
    extra_body={
        "metadata": {
            "tags": ["research", "analysis", "claude-model"]
        }
    }
)
messages = [
    SystemMessage(content="You are a research analyst."),
    HumanMessage(content="Analyze this market trend...")
]
response = chat.invoke(messages)
print(response)
import os
from langchain_openai import ChatOpenAI
from langchain_core.messages import HumanMessage, SystemMessage
# No API key needed when using proxy
chat = ChatOpenAI(
    openai_api_base="http://localhost:4000",  # Your proxy URL
    model="gpt-4o",
    temperature=0.7,
    extra_body={
        "metadata": {
            "tags": ["proxy", "team-alpha", "feature-flagged"],
            "generation_name": "customer-onboarding",
            "trace_user_id": "user-12345"
        }
    }
)
messages = [
    SystemMessage(content="You are an onboarding assistant."),
    HumanMessage(content="Welcome our new customer!")
]
response = chat.invoke(messages)
print(response)
Advanced Tag Patternsโ
Dynamic Tags Based on Contextโ
import os
from langchain_openai import ChatOpenAI
from langchain_core.messages import HumanMessage, SystemMessage
def create_chat_with_tags(user_type: str, feature: str):
    """Create a chat instance with dynamic tags based on context"""
    
    # Build tags dynamically
    tags = ["langchain-integration"]
    
    if user_type == "premium":
        tags.extend(["premium-user", "high-priority"])
    elif user_type == "enterprise":
        tags.extend(["enterprise", "custom-sla"])
    else:
        tags.append("standard-user")
    
    # Add feature-specific tags
    if feature == "code-review":
        tags.extend(["development", "code-analysis"])
    elif feature == "content-gen":
        tags.extend(["marketing", "content-creation"])
    
    return ChatOpenAI(
        openai_api_base="http://localhost:4000",
        model="gpt-4o",
        temperature=0.7,
        extra_body={
            "metadata": {
                "tags": tags,
                "user_type": user_type,
                "feature": feature,
                "trace_user_id": f"user-{user_type}-{feature}"
            }
        }
    )
# Usage examples
premium_chat = create_chat_with_tags("premium", "code-review")
enterprise_chat = create_chat_with_tags("enterprise", "content-gen")
messages = [HumanMessage(content="Help me with this task")]
response = premium_chat.invoke(messages)
Tags for Cost Tracking and Analyticsโ
import os
from langchain_openai import ChatOpenAI
from langchain_core.messages import HumanMessage, SystemMessage
# Tags for cost tracking
cost_tracking_chat = ChatOpenAI(
    openai_api_base="http://localhost:4000",
    model="gpt-4o",
    temperature=0.7,
    extra_body={
        "metadata": {
            "tags": [
                "cost-center-marketing",
                "budget-q4-2024",
                "project-launch-campaign",
                "high-cost-model"  # Flag for expensive models
            ],
            "department": "marketing",
            "project_id": "campaign-2024-q4",
            "cost_threshold": "high"
        }
    }
)
messages = [
    SystemMessage(content="You are a marketing copywriter."),
    HumanMessage(content="Create compelling ad copy for our new product launch.")
]
response = cost_tracking_chat.invoke(messages)
Tags for A/B Testingโ
import os
from langchain_openai import ChatOpenAI
from langchain_core.messages import HumanMessage, SystemMessage
import random
def create_ab_test_chat(test_variant: str = None):
    """Create chat instance for A/B testing with appropriate tags"""
    
    if test_variant is None:
        test_variant = random.choice(["variant-a", "variant-b"])
    
    return ChatOpenAI(
        openai_api_base="http://localhost:4000",
        model="gpt-4o",
        temperature=0.7 if test_variant == "variant-a" else 0.9,  # Different temp for variants
        extra_body={
            "metadata": {
                "tags": [
                    "ab-test-experiment-1",
                    f"variant-{test_variant}",
                    "temperature-test",
                    "user-experience"
                ],
                "experiment_id": "ab-test-001",
                "variant": test_variant,
                "test_group": "temperature-optimization"
            }
        }
    )
# Run A/B test
variant_a_chat = create_ab_test_chat("variant-a")
variant_b_chat = create_ab_test_chat("variant-b")
test_message = [HumanMessage(content="Explain quantum computing in simple terms")]
response_a = variant_a_chat.invoke(test_message)
response_b = variant_b_chat.invoke(test_message)
Tag Best Practicesโ
1. Consistent Naming Conventionโ
# โ
 Good: Consistent, descriptive tags
tags = ["production", "api-v2", "customer-support", "urgent"]
# โ Avoid: Inconsistent or unclear tags
tags = ["prod", "v2", "support", "urgent123"]
2. Hierarchical Tagsโ
# โ
 Good: Hierarchical structure
tags = ["env:production", "team:backend", "service:api", "priority:high"]
# This allows for easy filtering and grouping
3. Include Context Informationโ
extra_body={
    "metadata": {
        "tags": ["production", "user-onboarding"],
        "user_id": "user-12345",
        "session_id": "session-abc123",
        "feature_flag": "new-onboarding-flow",
        "environment": "production"
    }
}
4. Tag Categoriesโ
Consider organizing tags into categories:
- Environment: production,staging,development
- Team/Service: backend,frontend,api,worker
- Feature: authentication,payment,notification
- Priority: critical,high,medium,low
- User Type: premium,enterprise,free
Using Tags with LiteLLM Proxyโ
When using tags with LiteLLM Proxy, you can:
- Filter requests based on tags
- Track costs by tags in spend reports
- Apply routing rules based on tags
- Monitor usage with tag-based analytics
Example Proxy Configuration with Tagsโ
# config.yaml
model_list:
  - model_name: gpt-4o
    litellm_params:
      model: gpt-4o
      api_key: your-key
# Tag-based routing rules
tag_routing:
  - tags: ["premium", "high-priority"]
    models: ["gpt-4o", "claude-3-opus"]
  - tags: ["standard"]
    models: ["gpt-3.5-turbo", "claude-3-haiku"]
Monitoring and Analyticsโ
Tags enable powerful analytics capabilities:
# Example: Get spend reports by tags
import requests
response = requests.get(
    "http://localhost:4000/global/spend/report",
    headers={"Authorization": "Bearer sk-your-key"},
    params={
        "start_date": "2024-01-01",
        "end_date": "2024-12-31",
        "group_by": "tags"
    }
)
spend_by_tags = response.json()
This documentation covers the essential patterns for using tags effectively with LangChain and LiteLLM, enabling better organization, tracking, and analytics of your LLM requests.