Tracking Root Span ID in LiteLLM for UI Feedback Integration
Is there a simple way to track root span id for a call if I am using LiteLLM? I am trying to integrate feedback from the UI. My code for reference -
# app/services/chat/chat_client.py
import logging
from typing import Dict, Optional
import asyncio
from tenacity import retry, stop_after_attempt, wait_fixed, RetryError
import litellm
from litellm.litellm_core_utils.streaming_handler import CustomStreamWrapper
from litellm.types.utils import ModelResponse
from openinference.instrumentation import using_attributes
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
class ChatClientError(Exception):
"""Custom exception for ChatClient errors"""
pass
class ChatRetriesExceededError(Exception):
"""Custom exception for ChatClient retries exceeded errors"""
pass
class ChatClient:
_instance = None
# Configuration constants
TIMEOUT_SECONDS = 6 # Reduced timeout for each attempt (6 seconds)
MAX_RETRIES = 3 # Number of retries (3 total attempts)
def __new__(cls):
if cls._instance is None:
cls._instance = super(ChatClient, cls).__new__(cls)
return cls._instance
@retry(
stop=stop_after_attempt(MAX_RETRIES),
wait=wait_fixed(0),
reraise=True,
)
async def _make_request(
self, request_data: Dict, thread_id: str, uid: str
) -> ModelResponse | CustomStreamWrapper:
try:
with using_attributes(
session_id=thread_id,
user_id=uid,
):
# Create task with timeout
stream_task = asyncio.create_task(litellm.acompletion(**request_data))
# Wait for the task with timeout
stream = await asyncio.wait_for(
stream_task, timeout=self.TIMEOUT_SECONDS
)
return stream
except asyncio.TimeoutError:
logger.error(f"Request timed out after {self.TIMEOUT_SECONDS} seconds")
raise ChatClientError("Request timed out")
except Exception as e:
logger.error(f"Error making request: {str(e)}")
raise ChatClientError(f"LLM request failed: {str(e)}")
@staticmethod
async def get_response_stream(
uid: str,
thread_id: str,
model: str,
messages: list,
tools: Optional[list] = None,
parallel_tool_calls: Optional[bool] = False,
response_format: Optional[Dict[str, str]] = {"type": "text"},
) -> ModelResponse | CustomStreamWrapper:
litellm.drop_params = True
litellm._logging._disable_debugging()
# Prepare the request data
request_data = {
"model": model,
"messages": messages,
"stream": True,
}
if tools:
request_data["tools"] = tools
request_data["tool_choice"] = "auto"
request_data["parallel_tool_calls"] = parallel_tool_calls
try:
# Try to make the request with retries
return await ChatClient()._make_request(request_data, thread_id, uid)
except RetryError:
logger.error("All retry attempts failed")
raise ChatRetriesExceededError("All retry attempts failed")
except Exception as e:
logger.error(f"Unexpected error: {str(e)}")
raise ChatClientError(f"Unexpected error: {str(e)}")
chat_client = ChatClient()