From 657bb26755d952aa38d808fdd297a86254b15691 Mon Sep 17 00:00:00 2001 From: Jason Dai Date: Thu, 16 Apr 2026 15:32:01 -0700 Subject: [PATCH] chore: GenAI Client(evals) - Fix run_inference producing duplicate 'response' column PiperOrigin-RevId: 900941216 --- tests/unit/vertexai/genai/test_evals.py | 73 +++++++++++++++++++++++++ vertexai/_genai/_evals_common.py | 16 +++++- 2 files changed, 88 insertions(+), 1 deletion(-) diff --git a/tests/unit/vertexai/genai/test_evals.py b/tests/unit/vertexai/genai/test_evals.py index 1123dec251..f1e7d9a49d 100644 --- a/tests/unit/vertexai/genai/test_evals.py +++ b/tests/unit/vertexai/genai/test_evals.py @@ -2241,6 +2241,79 @@ def mock_model_fn(contents): assert inference_result.candidate_name == "mock_model_fn" assert inference_result.gcs_source is None + @mock.patch.object(_evals_utils, "EvalDatasetLoader") + def test_inference_overwrites_existing_response_column_with_callable( + self, mock_eval_dataset_loader + ): + """Tests that run_inference overwrites an existing 'response' column.""" + mock_df = pd.DataFrame( + { + "prompt": ["test prompt"], + "response": ["old response"], + } + ) + mock_eval_dataset_loader.return_value.load.return_value = mock_df.to_dict( + orient="records" + ) + + def mock_model_fn(contents): + return "new response" + + inference_result = self.client.evals.run_inference( + model=mock_model_fn, + src=mock_df, + ) + + result_df = inference_result.eval_dataset_df + # Assert there is exactly one 'response' column (no duplicates). + assert list(result_df.columns).count("response") == 1 + # Assert the 'response' column contains the new inference result. + assert result_df["response"][0] == "new response" + assert "prompt" in result_df.columns + + @mock.patch.object(_evals_common, "Models") + @mock.patch.object(_evals_utils, "EvalDatasetLoader") + def test_inference_overwrites_existing_response_column_with_gemini( + self, mock_eval_dataset_loader, mock_models + ): + """Tests that run_inference with Gemini overwrites an existing 'response' column.""" + mock_df = pd.DataFrame( + { + "prompt": ["test prompt"], + "response": ["old response"], + } + ) + mock_eval_dataset_loader.return_value.load.return_value = mock_df.to_dict( + orient="records" + ) + + mock_generate_content_response = genai_types.GenerateContentResponse( + candidates=[ + genai_types.Candidate( + content=genai_types.Content( + parts=[genai_types.Part(text="new gemini response")] + ), + finish_reason=genai_types.FinishReason.STOP, + ) + ], + prompt_feedback=None, + ) + mock_models.return_value.generate_content.return_value = ( + mock_generate_content_response + ) + + inference_result = self.client.evals.run_inference( + model="gemini-pro", + src=mock_df, + ) + + result_df = inference_result.eval_dataset_df + # Assert there is exactly one 'response' column (no duplicates). + assert list(result_df.columns).count("response") == 1 + # Assert the 'response' column contains the new inference result. + assert result_df["response"][0] == "new gemini response" + assert "prompt" in result_df.columns + @mock.patch.object(_evals_common, "Models") @mock.patch.object(_evals_utils, "EvalDatasetLoader") def test_inference_with_prompt_template( diff --git a/vertexai/_genai/_evals_common.py b/vertexai/_genai/_evals_common.py index 201135b731..72c2fc6eda 100644 --- a/vertexai/_genai/_evals_common.py +++ b/vertexai/_genai/_evals_common.py @@ -949,11 +949,25 @@ def _run_inference_internal( results_df_responses_only = pd.DataFrame( { - "response": responses, + _evals_constant.RESPONSE: responses, } ) prompt_dataset_indexed = prompt_dataset.reset_index(drop=True) + + # Drop existing 'response' column to prevent duplicate column names when + # re-running inference on a dataset that already has responses. + if _evals_constant.RESPONSE in prompt_dataset_indexed.columns: + logger.warning( + "A column named '%s' already exists in the prompt dataset. " + "The existing column will be dropped and replaced with the new " + "inference results.", + _evals_constant.RESPONSE, + ) + prompt_dataset_indexed = prompt_dataset_indexed.drop( + columns=[_evals_constant.RESPONSE] + ) + results_df_responses_only_indexed = results_df_responses_only.reset_index(drop=True) results_df = pd.concat(