Hey John G.. Thanks for your comment. Here is the dataset format I'm working with:
chat_history = [
{
"role": "user",
"content": "What's the capital of France?",
"created_at": "2023-01-01T00:00:00Z",
},
{
"role": "assistant",
"content": "The capital of France is Paris.",
"created_at": "2023-01-01T00:00:01Z",
},
{
"role": "user",
"content": "What's the population?",
"created_at": "2023-01-01T00:00:02Z",
},
{
"role": "assistant",
"content": "About 2.1 million as of 2023.",
"created_at": "2023-01-01T00:00:03Z",
},
]
here's a script I wrote to attempt to do the conversion:
def make_span(
name, kind, start, end, span_id, trace_id, parent_id=None, attributes=None
):
return Span(
name=name,
context=SpanContext(trace_id=trace_id, span_id=span_id),
span_kind=kind,
start_time=start,
end_time=end,
parent_id=None,
status_code=StatusCode.OK,
status_message=None,
attributes=attributes or {},
events=[], # optional structured log messages like function call, exception, etc.
conversation=None, # or: SpanConversationAttributes(conversation_id=some_uuid)
)
def chat_to_trace(chat_history):
spans = []
for i in range(0, len(chat_history), 2):
user_msg = chat_history[i]
assistant_msg = (
chat_history[i + 1] if i + 1 < len(chat_history) else {"content": ""}
)
span_id = str(uuid.uuid4())
trace_id = str(uuid.uuid4())
llm_model_name = "gpt-4"
llm_provider = "openai"
# Read timestamps from chat_history created_at fields
user_created_at = user_msg.get("created_at")
assistant_created_at = assistant_msg.get("created_at")
# Use created_at timestamps if available, otherwise generate them
start = datetime.fromisoformat(user_created_at.replace("Z", "+00:00")).replace(
tzinfo=None
)
end = datetime.fromisoformat(
assistant_created_at.replace("Z", "+00:00")
).replace(tzinfo=None)
span = {
"name": "LLM Response",
"span_kind": "LLM",
"parent_id": None,
"start_time": start.isoformat() + "Z",
"end_time": end.isoformat() + "Z",
"status_code": "OK",
"status_message": "",
"events": [],
"context.span_id": span_id,
"context.trace_id": trace_id,
# Core attributes
"attributes.input.value": user_msg["content"],
"attributes.output.value": assistant_msg["content"],
"attributes.input.mime_type": "text/plain",
"attributes.output.mime_type": "text/plain",
"attributes.openinference.span.kind": "llm",
# Optional LLM-specific attributes
"attributes.llm.model_name": llm_model_name,
"attributes.llm.provider": llm_provider,
"attributes.llm.input_messages": [user_msg],
"attributes.llm.output_messages": [assistant_msg],
"attributes.llm.token_count.prompt": 10,
"attributes.llm.token_count.completion": 15,
"attributes.llm.token_count.total": 25,
# Mocked or empty values for now
"attributes.llm.invocation_parameters": {},
"attributes.llm.system": "",
"attributes.llm.tools": [],
"attributes.session.id": str(uuid.uuid4()),
"attributes.user.id": "anonymous",
"attributes.metadata": {},
# Deep token details (placeholders)
"attributes.llm.token_count.prompt_details.audio": 0,
"attributes.llm.token_count.prompt_details.cache_read": False,
"attributes.llm.token_count.completion_details.reasoning": 10,
"attributes.llm.token_count.completion_details.audio": 0,
}
spans.append(span)
return spans
spans = chat_to_trace(chat_history)
spans_df = pd.DataFrame(spans)
When I attempt to log this, I don't see any errors but I don't see the logs appearing on Phoenix as well:
px.Client().log_traces(
trace_dataset=px.TraceDataset(spans_df), project_name="my_project_name"
)
Could you share why that might be happening? If you already have a conversion script, that would be super helpful.