Querying Unassessed Spans in Evaluation Process with Python Code
Hi, from phoenix.session.evaluation import get_qa_with_reference, get_retrieved_documents this line get all span for evaluation even if evaluation is already completed for few spans. Is it possible to get spans for which evaluation data is not there? I checked code
def get_qa_with_reference(obj: CanQuerySpans) -> pd.DataFrame:
return pd.concat(
cast(
List[pd.DataFrame],
obj.query_spans(
SpanQuery().select(**IO).where(IS_ROOT),
SpanQuery()
.where(IS_RETRIEVER)
.select(span_id="parent_id")
.concat(
RETRIEVAL_DOCUMENTS,
reference=DOCUMENT_CONTENT,
),
),
),
axis=1,
join="inner",
)below code log evaluation in parent trace span
hallucination_evaluator = HallucinationEvaluator(eval_model)
qa_correctness_evaluator = QAEvaluator(eval_model)
relevance_evaluator = RelevanceEvaluator(eval_model)
hallucination_eval_df, qa_correctness_eval_df = run_evals(
dataframe=queries_df,
evaluators=[hallucination_evaluator, qa_correctness_evaluator],
provide_explanation=True,
)
relevance_eval_df = run_evals(
dataframe=retrieved_documents_df,
evaluators=[relevance_evaluator],
provide_explanation=True,
)[0]
px.log_evaluations(
SpanEvaluations(eval_name="Hallucination", dataframe=hallucination_eval_df),
SpanEvaluations(eval_name="QA Correctness", dataframe=qa_correctness_eval_df),
DocumentEvaluations(eval_name="Relevance", dataframe=relevance_eval_df),
)