Anybody else have this issue where the LLM spans show up twice? Anything I'm missing ?
Hi Swaraj. Can you share the code that you used to produces this?
from openinference.instrumentation.llama_index import LlamaIndexInstrumentor
from opentelemetry import trace as trace_api
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
from opentelemetry.sdk import trace as trace_sdk
from opentelemetry.sdk.trace.export import SimpleSpanProcessor
def instrument():
tracer_provider = trace_sdk.TracerProvider()
span_exporter = OTLPSpanExporter(
urljoin(os.environ["PHOENIX_COLLECTOR_ENDPOINT"], "/v1/traces")
)
span_processor = SimpleSpanProcessor(span_exporter)
tracer_provider.add_span_processor(span_processor)
trace_api.set_tracer_provider(tracer_provider)
LlamaIndexInstrumentor().instrument()
if os.environ.get("PHOENIX_COLLECTOR_ENDPOINT"):
instrument()how I set up the instrumentation for our azure function app
I'm. using this snippet that generates the double LLM spans
azure_open_ai = CachedAzureOpenAI(
model="gpt-4",
engine="gpt-4",
api_version="2024-02-15-preview",
temperature=0.23,
kv_store_cache=NoOpKVStore(),
)
response = azure_open_ai.chat(
messages=[ChatMessage(content="Why was 6 afraid of 7?")]
)
print(response)and this is the cached azure open ai object which uses AzureOpenAI
class CachedAzureOpenAI(AzureOpenAI):
"""Cached Azure OpenAI."""
llm_string: Optional[str] = Field(default=None)
kv_store_cache: Optional[KVStore] = Field(default=None, private=True)
def __init__(
self,
model: str,
engine: str,
api_version: str,
kv_store_cache: KVStore,
temperature: float = 0.1,
**kwargs,
):
parameters_dict = {
"model": model,
"engine": engine,
"api_version": api_version,
"temperature": temperature,
**kwargs,
}
super().__init__(**parameters_dict)
self.llm_string = str(sorted([(k, v) for k, v in parameters_dict.items()]))
self.kv_store_cache = kv_store_cache
@llm_chat_callback()
def chat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> ChatResponse:
input_params_dict = {
"messages": repr(messages),
**kwargs,
}
input_params_str = str(sorted([(k, v) for k, v in input_params_dict.items()]))
cache_key = f"{self.llm_string}::{input_params_str}"
cached_value = self.kv_store_cache.get(cache_key)
if cached_value:
return ChatResponse.parse_raw(cached_value)
response: ChatResponse = super().chat(messages=messages, **kwargs)
# Raw field of the response contains objects that are not serializable
# so we set it to None
# We do not read the raw field from the response, so it is safe to set it to None
response.raw = None
self.kv_store_cache.set(cache_key, response.json())
return response
@llm_chat_callback()
async def achat(
self, messages: Sequence[ChatMessage], **kwargs: Any
) -> ChatResponse:
input_params_dict = {
"messages": repr(messages),
**kwargs,
}
input_params_str = str(sorted([(k, v) for k, v in input_params_dict.items()]))
cache_key = f"{self.llm_string}::{input_params_str}"
cached_value = self.kv_store_cache.get(cache_key)
if cached_value:
return ChatResponse.parse_raw(cached_value)
response = await super().achat(messages=messages, **kwargs)
# Raw field of the response contains objects that are not serializable
# so we set it to None
# We do not read the raw field from the response, so it is safe to set it to None
response.raw = None
self.kv_store_cache.set(cache_key, response.json())
return responseah interesting if i just do
out = AzureOpenAI(
model="gpt-4",
engine="gpt-4",
api_version="2024-02-15-preview",
temperature=0.23,
).chat(messages=[ChatMessage(content="Why was 6 afraid of 7?")])
print(out)instead of using the cached azure open ai, then I only see one span
you can try removing this decorator
@llm_chat_callback()if you look at super().chat(...) it鈥檚 also decorated by the same decorator
that鈥檚 why two spans apper
each decorator generates a span
i guess technically you would only get two if the cache misses
you can also try to suppress the second span (when cache is missed) using the following technique. First import these
from opentelemetry.context import _SUPPRESS_INSTRUMENTATION_KEY, attach, detach, set_valuethen wrap the super().chat(...) like the following
token = attach(set_value(_SUPPRESS_INSTRUMENTATION_KEY, True))
response: ChatResponse = super().chat(messages=messages, **kwargs)
detach(token)