Troubleshooting LiteLLM Streaming Output with Arize Phoenix Collector

·Feb 02, 2025 08:51 AM

Hello, team! Thanks for building this package. I am trying to send LiteLLM acompletion calls to a self hosted Arize Phoenix Collector. I am able to receive the traces of the calls to acompletion but in the traces, I do not see the streaming output being captured for these calls (attaching screenshot). According to the issue #1188, the support for streaming responses was added in openinference.instrumentation.litellm 0.1.6. I am not sure what I am doing wrong, I would really appreciate some help with this! My requirements.txt looks like this

dependencies = [
    "uvicorn[standard]==0.34.0",
    "fastapi[standard]==0.115.6",
    "litellm==1.59.6",
    "arize-phoenix-otel==0.7.1",
    "openinference.instrumentation.litellm==0.1.6",
    "opentelemetry-instrumentation-fastapi==0.50b0"
]

Here is how I am setting it up in my main.py

...
app = create_app()

tracer_provider = register(
    project_name="braince-ai",
    endpoint="http://phoenix.phoenix.svc.cluster.local:4317",  # Sends traces using gRPC
)

LiteLLMInstrumentor().instrument(tracer_provider=tracer_provider)
...

My chat client that calls litellm acompletion looks like this

import logging
from typing import Dict, Optional

import litellm
from litellm.litellm_core_utils.streaming_handler import CustomStreamWrapper
from litellm.types.utils import ModelResponse

logger = logging.getLogger(__name__)

class ChatClient:
    _instance = None

    def __new__(cls):
        if cls._instance is None:
            cls._instance = super(ChatClient, cls).__new__(cls)
        return cls._instance

    @staticmethod
    async def get_response_stream(
            model: str,
        messages: list, 
        tools: Optional[list] = None,
        parallel_tool_calls: Optional[bool] = False,
        response_format: Optional[Dict[str, str]] = {"type": "text"}
    ) -> ModelResponse | CustomStreamWrapper:
        litellm.drop_params=True
        litellm._logging._disable_debugging()
        #litellm.set_verbose=True

        # Prepare the request data
        request_data = {
            "model": model,
            "messages": messages,
            "stream": True,
        }

        # logger.debug(f"Request data: {request_data}")
        
        # Add tools if provided
        if tools:
            request_data["tools"] = tools
            request_data["tool_choice"] = "auto"
            request_data["parallel_tool_calls"] = parallel_tool_calls

        # Create the stream
        stream = await litellm.acompletion(**request_data)
        return stream


chat_client = ChatClient()

dependencies = [ "uvicorn[standard]==0.34.0", "fastapi[standard]==0.115.6", "litellm==1.59.6", "arize-phoenix-otel==0.7.1", "openinference.instrumentation.litellm==0.1.6", "opentelemetry-instrumentation-fastapi==0.50b0" ]

... app = create_app() tracer_provider = register( project_name="braince-ai", endpoint="http://phoenix.phoenix.svc.cluster.local:4317", # Sends traces using gRPC ) LiteLLMInstrumentor().instrument(tracer_provider=tracer_provider) ...

import logging from typing import Dict, Optional import litellm from litellm.litellm_core_utils.streaming_handler import CustomStreamWrapper from litellm.types.utils import ModelResponse logger = logging.getLogger(__name__) class ChatClient: _instance = None def __new__(cls): if cls._instance is None: cls._instance = super(ChatClient, cls).__new__(cls) return cls._instance @staticmethod async def get_response_stream( model: str, messages: list, tools: Optional[list] = None, parallel_tool_calls: Optional[bool] = False, response_format: Optional[Dict[str, str]] = {"type": "text"} ) -> ModelResponse | CustomStreamWrapper: litellm.drop_params=True litellm._logging._disable_debugging() #litellm.set_verbose=True # Prepare the request data request_data = { "model": model, "messages": messages, "stream": True, } # logger.debug(f"Request data: {request_data}") # Add tools if provided if tools: request_data["tools"] = tools request_data["tool_choice"] = "auto" request_data["parallel_tool_calls"] = parallel_tool_calls # Create the stream stream = await litellm.acompletion(**request_data) return stream chat_client = ChatClient()

Troubleshooting LiteLLM Streaming Output with Arize Phoenix Collector

2 comments

Troubleshooting LiteLLM Streaming Output with Arize Phoenix Collector

2 comments