Troubleshooting Arize Experiment Data Upload Errors

·Jun 16, 2025 08:50 AM

Hi Team, I am trying to run the experiments and logging into arize platform using the code snippet below. But some how getting the error shown below Error running experiment: Failed to upload experiment data for hallucination-experiment-20250616_140845 Full error details: Failed to upload experiment data for hallucination-experiment-20250616_140845 Is there anything I was missing here. I was trying to add the debugging logs but couldn't get the much logs.

logging.getLogger("arize").setLevel(logging.DEBUG)

Any help appreciated :)

def generate_response(dataset_row):
    task = dataset_row["Prompt"]

    messages = [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": task}
    ]

    try:
        chat_response = azure_client.chat.completions.create(
            model="ai-assistant-gpt-4o-mini-dev",
            messages=messages,
            temperature=0,
            max_tokens=300
        )

        # Output result
        assistant_message = chat_response.choices[0].message.content  # Response from LLM

        return assistant_message
    except Exception as e:
        assistant_message = e.body['message']
        print(f"Error getting assistant response: {str(e)}")
        return assistant_message


## Define Experiment Evaluator
class MyEval(Evaluator):
    def evaluate(
        self, *, output: str, dataset_row: Dict[str, Any], **_: Any
    ) -> EvaluationResult:
       # Define an evaluation here
       HALLUCINATION_PROMPT_BASE_TEMPLATE = """
           In this task, you will be presented with a query, a reference text and an answer. The answer is
           generated to the question based on the reference text. The answer may contain false information. You
           must use the reference text to determine if the answer to the question contains false information,
           if the answer is a hallucination of facts. Your objective is to determine whether the answer text
           contains factual information and is not a hallucination. A 'hallucination' refers to
           an answer that is not based on the reference text or assumes information that is not available in
           the reference text. Your response should be a single word: either "factual" or "hallucinated", and
           it should not include any other text or characters. "hallucinated" indicates that the answer
           provides factually inaccurate information to the query based on the reference text. "factual"
           indicates that the answer to the question is correct relative to the reference text, and does not
           contain made up information. Please read the query and reference text carefully before determining
           your response.

               [BEGIN DATA]
               ************
               [Query]: {input}
               ************
               [Reference text]: {reference}
               ************
               [Answer]: {output}
               ************
               [END DATA]

               Is the answer above factual or hallucinated based on the query and reference text?
           """

       df_in = pd.DataFrame({
           "input": [dataset_row["Prompt"]],
           "output": [output],
           "reference": [dataset_row["Expected Response"]]
       }, index=[0])

       eval_df = llm_classify(
           dataframe=df_in,
           template=HALLUCINATION_PROMPT_BASE_TEMPLATE,
           model=azure_openai_model,
           rails=["hallucinated", "factual"]
       )

       label = eval_df["label"].iloc[0]
       score = 1 if label == "hallucinated" else 0

       metadata = {
           "input": dataset_row["Prompt"],  # Pass the input query
           "output": output,  # Pass the model output
           "reference": dataset_row["Expected Response"]  # Optionally include reference
       }

       return EvaluationResult(
           score=score,
           label=label,
           explanation=eval_df["explanation"].iloc[0] if "explanation" in eval_df.columns else None,
           # metadata=metadata
       )

try:
    experiment_id = arize_client.run_experiment(
        space_id=space_id,
        dataset_id=dataset_id,
        task=generate_response,
        evaluators=[MyEval()],
        experiment_name=unique_experiment_name,
        concurrency=1,
        dataset_df=small_dataset,
        # dry_run=True,
    )

    print(f"Experiment ID: {experiment_id}")
except Exception as e:
    print(f"Error running experiment: {str(e)}")
    print(f"Full error details: {e}")

def generate_response(dataset_row): task = dataset_row["Prompt"] messages = [ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": task} ] try: chat_response = azure_client.chat.completions.create( model="ai-assistant-gpt-4o-mini-dev", messages=messages, temperature=0, max_tokens=300 ) # Output result assistant_message = chat_response.choices[0].message.content # Response from LLM return assistant_message except Exception as e: assistant_message = e.body['message'] print(f"Error getting assistant response: {str(e)}") return assistant_message ## Define Experiment Evaluator class MyEval(Evaluator): def evaluate( self, *, output: str, dataset_row: Dict[str, Any], **_: Any ) -> EvaluationResult: # Define an evaluation here HALLUCINATION_PROMPT_BASE_TEMPLATE = """ In this task, you will be presented with a query, a reference text and an answer. The answer is generated to the question based on the reference text. The answer may contain false information. You must use the reference text to determine if the answer to the question contains false information, if the answer is a hallucination of facts. Your objective is to determine whether the answer text contains factual information and is not a hallucination. A 'hallucination' refers to an answer that is not based on the reference text or assumes information that is not available in the reference text. Your response should be a single word: either "factual" or "hallucinated", and it should not include any other text or characters. "hallucinated" indicates that the answer provides factually inaccurate information to the query based on the reference text. "factual" indicates that the answer to the question is correct relative to the reference text, and does not contain made up information. Please read the query and reference text carefully before determining your response. [BEGIN DATA] ************ [Query]: {input} ************ [Reference text]: {reference} ************ [Answer]: {output} ************ [END DATA] Is the answer above factual or hallucinated based on the query and reference text? """ df_in = pd.DataFrame({ "input": [dataset_row["Prompt"]], "output": [output], "reference": [dataset_row["Expected Response"]] }, index=[0]) eval_df = llm_classify( dataframe=df_in, template=HALLUCINATION_PROMPT_BASE_TEMPLATE, model=azure_openai_model, rails=["hallucinated", "factual"] ) label = eval_df["label"].iloc[0] score = 1 if label == "hallucinated" else 0 metadata = { "input": dataset_row["Prompt"], # Pass the input query "output": output, # Pass the model output "reference": dataset_row["Expected Response"] # Optionally include reference } return EvaluationResult( score=score, label=label, explanation=eval_df["explanation"].iloc[0] if "explanation" in eval_df.columns else None, # metadata=metadata )

try: experiment_id = arize_client.run_experiment( space_id=space_id, dataset_id=dataset_id, task=generate_response, evaluators=[MyEval()], experiment_name=unique_experiment_name, concurrency=1, dataset_df=small_dataset, # dry_run=True, ) print(f"Experiment ID: {experiment_id}") except Exception as e: print(f"Error running experiment: {str(e)}") print(f"Full error details: {e}")