Tracking Root Span ID in LiteLLM for UI Feedback Integration

·Feb 25, 2025 10:13 AM

Is there a simple way to track root span id for a call if I am using LiteLLM? I am trying to integrate feedback from the UI. My code for reference -


# app/services/chat/chat_client.py

import logging
from typing import Dict, Optional
import asyncio
from tenacity import retry, stop_after_attempt, wait_fixed, RetryError

import litellm
from litellm.litellm_core_utils.streaming_handler import CustomStreamWrapper
from litellm.types.utils import ModelResponse
from openinference.instrumentation import using_attributes

logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)


class ChatClientError(Exception):
    """Custom exception for ChatClient errors"""

    pass


class ChatRetriesExceededError(Exception):
    """Custom exception for ChatClient retries exceeded errors"""

    pass


class ChatClient:
    _instance = None

    # Configuration constants
    TIMEOUT_SECONDS = 6  # Reduced timeout for each attempt (6 seconds)
    MAX_RETRIES = 3  # Number of retries (3 total attempts)

    def __new__(cls):
        if cls._instance is None:
            cls._instance = super(ChatClient, cls).__new__(cls)
        return cls._instance

    @retry(
        stop=stop_after_attempt(MAX_RETRIES),
        wait=wait_fixed(0),
        reraise=True,
    )
    async def _make_request(
        self, request_data: Dict, thread_id: str, uid: str
    ) -> ModelResponse | CustomStreamWrapper:
        try:
            with using_attributes(
                session_id=thread_id,
                user_id=uid,
            ):
                # Create task with timeout
                stream_task = asyncio.create_task(litellm.acompletion(**request_data))

                # Wait for the task with timeout
                stream = await asyncio.wait_for(
                    stream_task, timeout=self.TIMEOUT_SECONDS
                )
                return stream

        except asyncio.TimeoutError:
            logger.error(f"Request timed out after {self.TIMEOUT_SECONDS} seconds")
            raise ChatClientError("Request timed out")
        except Exception as e:
            logger.error(f"Error making request: {str(e)}")
            raise ChatClientError(f"LLM request failed: {str(e)}")

    @staticmethod
    async def get_response_stream(
        uid: str,
        thread_id: str,
        model: str,
        messages: list,
        tools: Optional[list] = None,
        parallel_tool_calls: Optional[bool] = False,
        response_format: Optional[Dict[str, str]] = {"type": "text"},
    ) -> ModelResponse | CustomStreamWrapper:
        litellm.drop_params = True
        litellm._logging._disable_debugging()

        # Prepare the request data
        request_data = {
            "model": model,
            "messages": messages,
            "stream": True,
        }

        if tools:
            request_data["tools"] = tools
            request_data["tool_choice"] = "auto"
            request_data["parallel_tool_calls"] = parallel_tool_calls

        try:
            # Try to make the request with retries
            return await ChatClient()._make_request(request_data, thread_id, uid)

        except RetryError:
            logger.error("All retry attempts failed")
            raise ChatRetriesExceededError("All retry attempts failed")
        except Exception as e:
            logger.error(f"Unexpected error: {str(e)}")
            raise ChatClientError(f"Unexpected error: {str(e)}")


chat_client = ChatClient()

# app/services/chat/chat_client.py import logging from typing import Dict, Optional import asyncio from tenacity import retry, stop_after_attempt, wait_fixed, RetryError import litellm from litellm.litellm_core_utils.streaming_handler import CustomStreamWrapper from litellm.types.utils import ModelResponse from openinference.instrumentation import using_attributes logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) class ChatClientError(Exception): """Custom exception for ChatClient errors""" pass class ChatRetriesExceededError(Exception): """Custom exception for ChatClient retries exceeded errors""" pass class ChatClient: _instance = None # Configuration constants TIMEOUT_SECONDS = 6 # Reduced timeout for each attempt (6 seconds) MAX_RETRIES = 3 # Number of retries (3 total attempts) def __new__(cls): if cls._instance is None: cls._instance = super(ChatClient, cls).__new__(cls) return cls._instance @retry( stop=stop_after_attempt(MAX_RETRIES), wait=wait_fixed(0), reraise=True, ) async def _make_request( self, request_data: Dict, thread_id: str, uid: str ) -> ModelResponse | CustomStreamWrapper: try: with using_attributes( session_id=thread_id, user_id=uid, ): # Create task with timeout stream_task = asyncio.create_task(litellm.acompletion(**request_data)) # Wait for the task with timeout stream = await asyncio.wait_for( stream_task, timeout=self.TIMEOUT_SECONDS ) return stream except asyncio.TimeoutError: logger.error(f"Request timed out after {self.TIMEOUT_SECONDS} seconds") raise ChatClientError("Request timed out") except Exception as e: logger.error(f"Error making request: {str(e)}") raise ChatClientError(f"LLM request failed: {str(e)}") @staticmethod async def get_response_stream( uid: str, thread_id: str, model: str, messages: list, tools: Optional[list] = None, parallel_tool_calls: Optional[bool] = False, response_format: Optional[Dict[str, str]] = {"type": "text"}, ) -> ModelResponse | CustomStreamWrapper: litellm.drop_params = True litellm._logging._disable_debugging() # Prepare the request data request_data = { "model": model, "messages": messages, "stream": True, } if tools: request_data["tools"] = tools request_data["tool_choice"] = "auto" request_data["parallel_tool_calls"] = parallel_tool_calls try: # Try to make the request with retries return await ChatClient()._make_request(request_data, thread_id, uid) except RetryError: logger.error("All retry attempts failed") raise ChatRetriesExceededError("All retry attempts failed") except Exception as e: logger.error(f"Unexpected error: {str(e)}") raise ChatClientError(f"Unexpected error: {str(e)}") chat_client = ChatClient()

async def _make_request( self, request_data: Dict, thread_id: str, uid: str ) -> ModelResponse | CustomStreamWrapper: try: with using_attributes( session_id=thread_id, user_id=uid, ): # Get the current span and its context current_span = trace.get_current_span() span_context = current_span.get_span_context() trace_id = format(span_context.trace_id, '032x') span_id = format(span_context.span_id, '016x') # Log or store the trace ID and span ID as needed logger.debug(f"Trace ID: {trace_id}, Span ID: {span_id}") # Create task with timeout stream_task = asyncio.create_task(litellm.acompletion(**request_data)) # Wait for the task with timeout stream = await asyncio.wait_for( stream_task, timeout=self.TIMEOUT_SECONDS ) return stream

Tracking Root Span ID in LiteLLM for UI Feedback Integration

11 comments

Tracking Root Span ID in LiteLLM for UI Feedback Integration

11 comments