From 83e1531850e763feddb526f2749b5691d48bd015 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Tue, 20 Jan 2026 12:35:18 +0100
Subject: [PATCH 01/67] Use the correct path to the cache directory for the
task
---
openml/tasks/functions.py | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)
diff --git a/openml/tasks/functions.py b/openml/tasks/functions.py
index 3df2861c0..2bf1a40f4 100644
--- a/openml/tasks/functions.py
+++ b/openml/tasks/functions.py
@@ -415,9 +415,10 @@ def get_task(
if not isinstance(task_id, int):
raise TypeError(f"Task id should be integer, is {type(task_id)}")
- cache_key_dir = openml.utils._create_cache_directory_for_id(TASKS_CACHE_DIR_NAME, task_id)
- tid_cache_dir = cache_key_dir / str(task_id)
- tid_cache_dir_existed = tid_cache_dir.exists()
+ task_cache_directory = openml.utils._create_cache_directory_for_id(
+ TASKS_CACHE_DIR_NAME, task_id
+ )
+ task_cache_directory_existed = task_cache_directory.exists()
try:
task = _get_task_description(task_id)
dataset = get_dataset(task.dataset_id, **get_dataset_kwargs)
@@ -431,8 +432,8 @@ def get_task(
if download_splits and isinstance(task, OpenMLSupervisedTask):
task.download_split()
except Exception as e:
- if not tid_cache_dir_existed:
- openml.utils._remove_cache_dir_for_id(TASKS_CACHE_DIR_NAME, tid_cache_dir)
+ if not task_cache_directory_existed:
+ openml.utils._remove_cache_dir_for_id(TASKS_CACHE_DIR_NAME, task_cache_directory)
raise e
return task
From f90036debbf81fc3fd6452263d9b80e786ac2806 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Wed, 21 Jan 2026 16:50:09 +0100
Subject: [PATCH 02/67] Push configuration of test server URL exclusively to
config.py
---
openml/cli.py | 2 +-
openml/config.py | 4 +++-
openml/testing.py | 2 +-
tests/conftest.py | 2 +-
tests/test_datasets/test_dataset_functions.py | 14 +++++---------
tests/test_flows/test_flow_functions.py | 15 +++++----------
tests/test_openml/test_config.py | 2 +-
tests/test_runs/test_run_functions.py | 9 +++------
tests/test_tasks/test_task_functions.py | 12 ++++--------
9 files changed, 24 insertions(+), 38 deletions(-)
diff --git a/openml/cli.py b/openml/cli.py
index 0afb089c2..18192a7db 100644
--- a/openml/cli.py
+++ b/openml/cli.py
@@ -109,7 +109,7 @@ def check_server(server: str) -> str:
def replace_shorthand(server: str) -> str:
if server == "test":
- return "https://test.openml.org/api/v1/xml"
+ return f"{config.TEST_SERVER_URL}/api/v1/xml"
if server == "production":
return "https://www.openml.org/api/v1/xml"
return server
diff --git a/openml/config.py b/openml/config.py
index e6104fd7f..5b2d69067 100644
--- a/openml/config.py
+++ b/openml/config.py
@@ -27,6 +27,8 @@
OPENML_SKIP_PARQUET_ENV_VAR = "OPENML_SKIP_PARQUET"
_TEST_SERVER_NORMAL_USER_KEY = "normaluser"
+TEST_SERVER_URL = "https://test.openml.org"
+
class _Config(TypedDict):
apikey: str
@@ -213,7 +215,7 @@ class ConfigurationForExamples:
_last_used_server = None
_last_used_key = None
_start_last_called = False
- _test_server = "https://test.openml.org/api/v1/xml"
+ _test_server = f"{TEST_SERVER_URL}/api/v1/xml"
_test_apikey = _TEST_SERVER_NORMAL_USER_KEY
@classmethod
diff --git a/openml/testing.py b/openml/testing.py
index 8d3bbbd5b..9ee555a91 100644
--- a/openml/testing.py
+++ b/openml/testing.py
@@ -47,7 +47,7 @@ class TestBase(unittest.TestCase):
"user": [],
}
flow_name_tracker: ClassVar[list[str]] = []
- test_server = "https://test.openml.org/api/v1/xml"
+ test_server = f"{openml.config.TEST_SERVER_URL}/api/v1/xml"
admin_key = "abc"
user_key = openml.config._TEST_SERVER_NORMAL_USER_KEY
diff --git a/tests/conftest.py b/tests/conftest.py
index bd974f3f3..29366ce37 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -277,7 +277,7 @@ def with_server(request):
openml.config.apikey = None
yield
return
- openml.config.server = "https://test.openml.org/api/v1/xml"
+ openml.config.server = f"{openml.config.TEST_SERVER_URL}/api/v1/xml"
openml.config.apikey = TestBase.user_key
yield
diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py
index c41664ba7..74faa73ea 100644
--- a/tests/test_datasets/test_dataset_functions.py
+++ b/tests/test_datasets/test_dataset_functions.py
@@ -1730,7 +1730,6 @@ def test_delete_dataset(self):
@mock.patch.object(requests.Session, "delete")
def test_delete_dataset_not_owned(mock_delete, test_files_directory, test_api_key):
- openml.config.start_using_configuration_for_example()
content_file = (
test_files_directory / "mock_responses" / "datasets" / "data_delete_not_owned.xml"
)
@@ -1745,14 +1744,13 @@ def test_delete_dataset_not_owned(mock_delete, test_files_directory, test_api_ke
):
openml.datasets.delete_dataset(40_000)
- dataset_url = "https://test.openml.org/api/v1/xml/data/40000"
+ dataset_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/data/40000"
assert dataset_url == mock_delete.call_args.args[0]
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
@mock.patch.object(requests.Session, "delete")
def test_delete_dataset_with_run(mock_delete, test_files_directory, test_api_key):
- openml.config.start_using_configuration_for_example()
content_file = (
test_files_directory / "mock_responses" / "datasets" / "data_delete_has_tasks.xml"
)
@@ -1767,14 +1765,13 @@ def test_delete_dataset_with_run(mock_delete, test_files_directory, test_api_key
):
openml.datasets.delete_dataset(40_000)
- dataset_url = "https://test.openml.org/api/v1/xml/data/40000"
+ dataset_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/data/40000"
assert dataset_url == mock_delete.call_args.args[0]
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
@mock.patch.object(requests.Session, "delete")
def test_delete_dataset_success(mock_delete, test_files_directory, test_api_key):
- openml.config.start_using_configuration_for_example()
content_file = (
test_files_directory / "mock_responses" / "datasets" / "data_delete_successful.xml"
)
@@ -1786,14 +1783,13 @@ def test_delete_dataset_success(mock_delete, test_files_directory, test_api_key)
success = openml.datasets.delete_dataset(40000)
assert success
- dataset_url = "https://test.openml.org/api/v1/xml/data/40000"
+ dataset_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/data/40000"
assert dataset_url == mock_delete.call_args.args[0]
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
@mock.patch.object(requests.Session, "delete")
def test_delete_unknown_dataset(mock_delete, test_files_directory, test_api_key):
- openml.config.start_using_configuration_for_example()
content_file = (
test_files_directory / "mock_responses" / "datasets" / "data_delete_not_exist.xml"
)
@@ -1808,7 +1804,7 @@ def test_delete_unknown_dataset(mock_delete, test_files_directory, test_api_key)
):
openml.datasets.delete_dataset(9_999_999)
- dataset_url = "https://test.openml.org/api/v1/xml/data/9999999"
+ dataset_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/data/9999999"
assert dataset_url == mock_delete.call_args.args[0]
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
@@ -2010,7 +2006,7 @@ def test_get_dataset_parquet(requests_mock, test_files_directory):
test_files_directory / "mock_responses" / "datasets" / "data_description_61.xml"
)
# While the mocked example is from production, unit tests by default connect to the test server.
- requests_mock.get("https://test.openml.org/api/v1/xml/data/61", text=content_file.read_text())
+ requests_mock.get(f"{openml.config.TEST_SERVER_URL}/api/v1/xml/data/61", text=content_file.read_text())
dataset = openml.datasets.get_dataset(61, download_data=True)
assert dataset._parquet_url is not None
assert dataset.parquet_file is not None
diff --git a/tests/test_flows/test_flow_functions.py b/tests/test_flows/test_flow_functions.py
index 2339b27c8..790686d94 100644
--- a/tests/test_flows/test_flow_functions.py
+++ b/tests/test_flows/test_flow_functions.py
@@ -453,7 +453,6 @@ def test_delete_flow(self):
@mock.patch.object(requests.Session, "delete")
def test_delete_flow_not_owned(mock_delete, test_files_directory, test_api_key):
- openml.config.start_using_configuration_for_example()
content_file = test_files_directory / "mock_responses" / "flows" / "flow_delete_not_owned.xml"
mock_delete.return_value = create_request_response(
status_code=412,
@@ -466,14 +465,13 @@ def test_delete_flow_not_owned(mock_delete, test_files_directory, test_api_key):
):
openml.flows.delete_flow(40_000)
- flow_url = "https://test.openml.org/api/v1/xml/flow/40000"
+ flow_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/flow/40000"
assert flow_url == mock_delete.call_args.args[0]
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
@mock.patch.object(requests.Session, "delete")
def test_delete_flow_with_run(mock_delete, test_files_directory, test_api_key):
- openml.config.start_using_configuration_for_example()
content_file = test_files_directory / "mock_responses" / "flows" / "flow_delete_has_runs.xml"
mock_delete.return_value = create_request_response(
status_code=412,
@@ -486,14 +484,13 @@ def test_delete_flow_with_run(mock_delete, test_files_directory, test_api_key):
):
openml.flows.delete_flow(40_000)
- flow_url = "https://test.openml.org/api/v1/xml/flow/40000"
+ flow_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/flow/40000"
assert flow_url == mock_delete.call_args.args[0]
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
@mock.patch.object(requests.Session, "delete")
def test_delete_subflow(mock_delete, test_files_directory, test_api_key):
- openml.config.start_using_configuration_for_example()
content_file = test_files_directory / "mock_responses" / "flows" / "flow_delete_is_subflow.xml"
mock_delete.return_value = create_request_response(
status_code=412,
@@ -506,14 +503,13 @@ def test_delete_subflow(mock_delete, test_files_directory, test_api_key):
):
openml.flows.delete_flow(40_000)
- flow_url = "https://test.openml.org/api/v1/xml/flow/40000"
+ flow_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/flow/40000"
assert flow_url == mock_delete.call_args.args[0]
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
@mock.patch.object(requests.Session, "delete")
def test_delete_flow_success(mock_delete, test_files_directory, test_api_key):
- openml.config.start_using_configuration_for_example()
content_file = test_files_directory / "mock_responses" / "flows" / "flow_delete_successful.xml"
mock_delete.return_value = create_request_response(
status_code=200,
@@ -523,7 +519,7 @@ def test_delete_flow_success(mock_delete, test_files_directory, test_api_key):
success = openml.flows.delete_flow(33364)
assert success
- flow_url = "https://test.openml.org/api/v1/xml/flow/33364"
+ flow_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/flow/33364"
assert flow_url == mock_delete.call_args.args[0]
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
@@ -531,7 +527,6 @@ def test_delete_flow_success(mock_delete, test_files_directory, test_api_key):
@mock.patch.object(requests.Session, "delete")
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
def test_delete_unknown_flow(mock_delete, test_files_directory, test_api_key):
- openml.config.start_using_configuration_for_example()
content_file = test_files_directory / "mock_responses" / "flows" / "flow_delete_not_exist.xml"
mock_delete.return_value = create_request_response(
status_code=412,
@@ -544,6 +539,6 @@ def test_delete_unknown_flow(mock_delete, test_files_directory, test_api_key):
):
openml.flows.delete_flow(9_999_999)
- flow_url = "https://test.openml.org/api/v1/xml/flow/9999999"
+ flow_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/flow/9999999"
assert flow_url == mock_delete.call_args.args[0]
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
diff --git a/tests/test_openml/test_config.py b/tests/test_openml/test_config.py
index 7ef223504..3ff7bd55e 100644
--- a/tests/test_openml/test_config.py
+++ b/tests/test_openml/test_config.py
@@ -78,7 +78,7 @@ def test_get_config_as_dict(self):
config = openml.config.get_config_as_dict()
_config = {}
_config["apikey"] = TestBase.user_key
- _config["server"] = "https://test.openml.org/api/v1/xml"
+ _config["server"] = f"{openml.config.TEST_SERVER_URL}/api/v1/xml"
_config["cachedir"] = self.workdir
_config["avoid_duplicate_runs"] = False
_config["connection_n_retries"] = 20
diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
index 8f2c505b7..b8bd6abd7 100644
--- a/tests/test_runs/test_run_functions.py
+++ b/tests/test_runs/test_run_functions.py
@@ -1813,7 +1813,6 @@ def test_initialize_model_from_run_nonstrict(self):
@mock.patch.object(requests.Session, "delete")
def test_delete_run_not_owned(mock_delete, test_files_directory, test_api_key):
- openml.config.start_using_configuration_for_example()
content_file = test_files_directory / "mock_responses" / "runs" / "run_delete_not_owned.xml"
mock_delete.return_value = create_request_response(
status_code=412,
@@ -1826,14 +1825,13 @@ def test_delete_run_not_owned(mock_delete, test_files_directory, test_api_key):
):
openml.runs.delete_run(40_000)
- run_url = "https://test.openml.org/api/v1/xml/run/40000"
+ run_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/run/40000"
assert run_url == mock_delete.call_args.args[0]
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
@mock.patch.object(requests.Session, "delete")
def test_delete_run_success(mock_delete, test_files_directory, test_api_key):
- openml.config.start_using_configuration_for_example()
content_file = test_files_directory / "mock_responses" / "runs" / "run_delete_successful.xml"
mock_delete.return_value = create_request_response(
status_code=200,
@@ -1843,14 +1841,13 @@ def test_delete_run_success(mock_delete, test_files_directory, test_api_key):
success = openml.runs.delete_run(10591880)
assert success
- run_url = "https://test.openml.org/api/v1/xml/run/10591880"
+ run_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/run/10591880"
assert run_url == mock_delete.call_args.args[0]
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
@mock.patch.object(requests.Session, "delete")
def test_delete_unknown_run(mock_delete, test_files_directory, test_api_key):
- openml.config.start_using_configuration_for_example()
content_file = test_files_directory / "mock_responses" / "runs" / "run_delete_not_exist.xml"
mock_delete.return_value = create_request_response(
status_code=412,
@@ -1863,7 +1860,7 @@ def test_delete_unknown_run(mock_delete, test_files_directory, test_api_key):
):
openml.runs.delete_run(9_999_999)
- run_url = "https://test.openml.org/api/v1/xml/run/9999999"
+ run_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/run/9999999"
assert run_url == mock_delete.call_args.args[0]
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
diff --git a/tests/test_tasks/test_task_functions.py b/tests/test_tasks/test_task_functions.py
index d44717177..af143a26b 100644
--- a/tests/test_tasks/test_task_functions.py
+++ b/tests/test_tasks/test_task_functions.py
@@ -244,7 +244,6 @@ def test_deletion_of_cache_dir(self):
@mock.patch.object(requests.Session, "delete")
def test_delete_task_not_owned(mock_delete, test_files_directory, test_api_key):
- openml.config.start_using_configuration_for_example()
content_file = test_files_directory / "mock_responses" / "tasks" / "task_delete_not_owned.xml"
mock_delete.return_value = create_request_response(
status_code=412,
@@ -257,14 +256,13 @@ def test_delete_task_not_owned(mock_delete, test_files_directory, test_api_key):
):
openml.tasks.delete_task(1)
- task_url = "https://test.openml.org/api/v1/xml/task/1"
+ task_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/task/1"
assert task_url == mock_delete.call_args.args[0]
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
@mock.patch.object(requests.Session, "delete")
def test_delete_task_with_run(mock_delete, test_files_directory, test_api_key):
- openml.config.start_using_configuration_for_example()
content_file = test_files_directory / "mock_responses" / "tasks" / "task_delete_has_runs.xml"
mock_delete.return_value = create_request_response(
status_code=412,
@@ -277,14 +275,13 @@ def test_delete_task_with_run(mock_delete, test_files_directory, test_api_key):
):
openml.tasks.delete_task(3496)
- task_url = "https://test.openml.org/api/v1/xml/task/3496"
+ task_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/task/3496"
assert task_url == mock_delete.call_args.args[0]
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
@mock.patch.object(requests.Session, "delete")
def test_delete_success(mock_delete, test_files_directory, test_api_key):
- openml.config.start_using_configuration_for_example()
content_file = test_files_directory / "mock_responses" / "tasks" / "task_delete_successful.xml"
mock_delete.return_value = create_request_response(
status_code=200,
@@ -294,14 +291,13 @@ def test_delete_success(mock_delete, test_files_directory, test_api_key):
success = openml.tasks.delete_task(361323)
assert success
- task_url = "https://test.openml.org/api/v1/xml/task/361323"
+ task_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/task/361323"
assert task_url == mock_delete.call_args.args[0]
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
@mock.patch.object(requests.Session, "delete")
def test_delete_unknown_task(mock_delete, test_files_directory, test_api_key):
- openml.config.start_using_configuration_for_example()
content_file = test_files_directory / "mock_responses" / "tasks" / "task_delete_not_exist.xml"
mock_delete.return_value = create_request_response(
status_code=412,
@@ -314,6 +310,6 @@ def test_delete_unknown_task(mock_delete, test_files_directory, test_api_key):
):
openml.tasks.delete_task(9_999_999)
- task_url = "https://test.openml.org/api/v1/xml/task/9999999"
+ task_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/task/9999999"
assert task_url == mock_delete.call_args.args[0]
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
From 3a257abea627f9a37d00feb7766cf1a49b82dbd5 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Wed, 28 Jan 2026 12:12:02 +0100
Subject: [PATCH 03/67] Update the test to use a dataset which does not have a
parquet file
Locally, MinIO already has more parquet files than on the test server.
---
tests/test_datasets/test_dataset_functions.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py
index 74faa73ea..fe5939d7a 100644
--- a/tests/test_datasets/test_dataset_functions.py
+++ b/tests/test_datasets/test_dataset_functions.py
@@ -886,7 +886,7 @@ def test_create_invalid_dataset(self):
@pytest.mark.uses_test_server()
def test_get_online_dataset_arff(self):
- dataset_id = 100 # Australian
+ dataset_id = 128 # iris -- one of the few datasets with parquet file
# lazy loading not used as arff file is checked.
dataset = openml.datasets.get_dataset(dataset_id, download_data=True)
decoder = arff.ArffDecoder()
From 3b79017a48da3ba9f002de813160ff60cb2159db Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Wed, 28 Jan 2026 12:32:30 +0100
Subject: [PATCH 04/67] Replace hard-coded cache directory by configured one
---
tests/test_datasets/test_dataset_functions.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py
index fe5939d7a..9df7e3879 100644
--- a/tests/test_datasets/test_dataset_functions.py
+++ b/tests/test_datasets/test_dataset_functions.py
@@ -527,7 +527,7 @@ def test_deletion_of_cache_dir(self):
def test_deletion_of_cache_dir_faulty_download(self, patch):
patch.side_effect = Exception("Boom!")
self.assertRaisesRegex(Exception, "Boom!", openml.datasets.get_dataset, dataset_id=1)
- datasets_cache_dir = os.path.join(self.workdir, "org", "openml", "test", "datasets")
+ datasets_cache_dir = os.path.join(openml.config.get_cache_directory(), "datasets")
assert len(os.listdir(datasets_cache_dir)) == 0
@pytest.mark.uses_test_server()
From f524d756964ecb03e77f9e932022a446bd1c5a35 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Wed, 28 Jan 2026 16:04:26 +0100
Subject: [PATCH 05/67] Update test to use dataset file that is already in
cache
Note that the previously strategy didn't work anymore if the server
returned a parquet file, which is the case for the new local setup.
---
tests/test_datasets/test_dataset_functions.py | 11 ++---------
1 file changed, 2 insertions(+), 9 deletions(-)
diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py
index 9df7e3879..27d3075fd 100644
--- a/tests/test_datasets/test_dataset_functions.py
+++ b/tests/test_datasets/test_dataset_functions.py
@@ -532,14 +532,7 @@ def test_deletion_of_cache_dir_faulty_download(self, patch):
@pytest.mark.uses_test_server()
def test_publish_dataset(self):
- # lazy loading not possible as we need the arff-file.
- openml.datasets.get_dataset(3, download_data=True)
- file_path = os.path.join(
- openml.config.get_cache_directory(),
- "datasets",
- "3",
- "dataset.arff",
- )
+ arff_file_path = Path(__file__).parent.parent / "files" / "org" / "openml" / "test" / "datasets" / "2" / "dataset.arff"
dataset = OpenMLDataset(
"anneal",
"test",
@@ -547,7 +540,7 @@ def test_publish_dataset(self):
version=1,
licence="public",
default_target_attribute="class",
- data_file=file_path,
+ data_file=arff_file_path,
)
dataset.publish()
TestBase._mark_entity_for_removal("data", dataset.dataset_id)
From 7ef12c25b8c83ff102fac9b2606e7386dbd57a11 Mon Sep 17 00:00:00 2001
From: Satvik Mishra <112589278+satvshr@users.noreply.github.com>
Date: Thu, 29 Jan 2026 11:02:54 +0530
Subject: [PATCH 06/67] Windows test
---
.github/workflows/test.yml | 10 ++++++-
docker-compose.yml | 53 ++++++++++++++++++++++++++++++++++++++
docker/update.sh | 31 ++++++++++++++++++++++
pytest.ini | 4 +++
tests/conftest.py | 42 ++++++++++++++++++++++++++++++
5 files changed, 139 insertions(+), 1 deletion(-)
create mode 100644 docker-compose.yml
create mode 100644 docker/update.sh
create mode 100644 pytest.ini
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index d65cc3796..c52486d0a 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -74,7 +74,15 @@ jobs:
with:
python-version: ${{ matrix.python-version }}
- - name: Install test dependencies and scikit-learn
+ - name: Checkout server-api and patch Docker path
+ if: runner.os == 'Linux'
+ shell: bash
+ run: |
+ git clone --depth 1 https://github.com/openml/server-api.git server-api
+ sed -i 's|\.\./server-api|./server-api|g' docker-compose.yml
+
+ - name: Install test dependencies, scikit-learn, and optional pandas
+ shell: bash
run: |
python -m pip install --upgrade pip
pip install -e .[test] scikit-learn==${{ matrix.scikit-learn }}
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 000000000..20fcef863
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,53 @@
+services:
+ database:
+ image: "openml/test-database:20240105"
+ container_name: "openml-test-db-ci"
+ environment:
+ MYSQL_ROOT_PASSWORD: ok
+ ports:
+ - "33060:3306"
+ healthcheck:
+ test: ["CMD", "mysqladmin" ,"ping", "-h", "localhost"]
+ start_period: 30s
+ interval: 5s
+ retries: 10
+
+ # SETUP WORKER
+ database-setup:
+ image: mysql
+ container_name: "openml-test-setup-ci"
+ volumes:
+ # You MUST save the update.sh content you shared earlier to this path
+ - ./docker/update.sh:/database-update.sh
+ command: /bin/sh -c "/database-update.sh"
+ depends_on:
+ database:
+ condition: service_healthy
+
+ php-api:
+ image: "openml/php-rest-api:v1.2.2"
+ container_name: "openml-php-api-ci"
+ ports:
+ - "9002:80"
+ depends_on:
+ database:
+ condition: service_started
+ environment:
+ - DB_HOST_OPENML=database:3306
+ - DB_HOST_EXPDB=database:3306
+ - BASE_URL=http://localhost:9002/
+ - INDEX_ES_DURING_STARTUP=false
+
+ # V2 API (PYTHON)
+ python-api:
+ container_name: "openml-python-api-ci"
+ build:
+ # TODO: replace with image when available
+ context: ../server-api
+ dockerfile: docker/python/Dockerfile
+ ports:
+ - "9001:8000"
+ depends_on:
+ - database
+ environment:
+ - DATABASE_URL=mysql://root:ok@database:3306/openml
\ No newline at end of file
diff --git a/docker/update.sh b/docker/update.sh
new file mode 100644
index 000000000..7e9864742
--- /dev/null
+++ b/docker/update.sh
@@ -0,0 +1,31 @@
+#/bin/bash
+# Change the filepath of openml.file
+# from "https://www.openml.org/data/download/1666876/phpFsFYVN"
+# to "http://minio:9000/datasets/0000/0001/phpFsFYVN"
+mysql -hdatabase -uroot -pok -e 'UPDATE openml.file SET filepath = CONCAT("http://minio:9000/datasets/0000/", LPAD(id, 4, "0"), "/", SUBSTRING_INDEX(filepath, "/", -1)) WHERE extension="arff";'
+
+# Update openml.expdb.dataset with the same url
+mysql -hdatabase -uroot -pok -e 'UPDATE openml_expdb.dataset DS, openml.file FL SET DS.url = FL.filepath WHERE DS.did = FL.id;'
+
+
+
+
+
+# Create the data_feature_description TABLE. TODO: can we make sure this table exists already?
+mysql -hdatabase -uroot -pok -Dopenml_expdb -e 'CREATE TABLE IF NOT EXISTS `data_feature_description` (
+ `did` int unsigned NOT NULL,
+ `index` int unsigned NOT NULL,
+ `uploader` mediumint unsigned NOT NULL,
+ `date` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
+ `description_type` enum("plain", "ontology") NOT NULL,
+ `value` varchar(256) NOT NULL,
+ KEY `did` (`did`,`index`),
+ CONSTRAINT `data_feature_description_ibfk_1` FOREIGN KEY (`did`, `index`) REFERENCES `data_feature` (`did`, `index`) ON DELETE CASCADE ON UPDATE CASCADE
+)'
+
+# SET dataset 1 to active (used in unittests java)
+mysql -hdatabase -uroot -pok -Dopenml_expdb -e 'INSERT IGNORE INTO dataset_status VALUES (1, "active", "2024-01-01 00:00:00", 1)'
+mysql -hdatabase -uroot -pok -Dopenml_expdb -e 'DELETE FROM dataset_status WHERE did = 2 AND status = "deactivated";'
+
+# Temporary fix in case the database missed the kaggle table. The PHP Rest API expects the table to be there, while indexing.
+mysql -hdatabase -uroot -pok -Dopenml_expdb -e 'CREATE TABLE IF NOT EXISTS `kaggle` (`dataset_id` int(11) DEFAULT NULL, `kaggle_link` varchar(500) DEFAULT NULL)'
\ No newline at end of file
diff --git a/pytest.ini b/pytest.ini
new file mode 100644
index 000000000..69fbd903f
--- /dev/null
+++ b/pytest.ini
@@ -0,0 +1,4 @@
+[pytest]
+env =
+ OPENML_SERVER = http://localhost:9001/api/v2
+ OPENML_API_KEY = AD000000000000000000000000000000
\ No newline at end of file
diff --git a/tests/conftest.py b/tests/conftest.py
index bd974f3f3..890978558 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -24,6 +24,7 @@
from __future__ import annotations
import multiprocessing
+import sys
multiprocessing.set_start_method("spawn", force=True)
@@ -35,6 +36,9 @@
import pytest
import openml_sklearn
+import time
+import subprocess
+import requests
import openml
from openml.testing import TestBase
@@ -296,6 +300,44 @@ def with_test_cache(test_files_directory, request):
if tmp_cache.exists():
shutil.rmtree(tmp_cache)
+# This starts the entire stack once for the whole test run
+@pytest.fixture(scope="session", autouse=True)
+def openml_docker_stack():
+ # if sys.platform == "win32":
+ # yield
+ # return
+ # 1. Start the containers defined in your final docker-compose.yml
+ subprocess.run(["docker", "compose", "up", "-d"], check=True)
+
+ # 2. Wait for the database setup worker to finish its tasks
+ # This ensures update.sh has finished before we hit the APIs
+ subprocess.run(["docker", "wait", "openml-test-setup-ci"], check=True)
+
+ # 3. Quick health check: Wait for the Python API to respond on port 9001
+ timeout = 30
+ start = time.time()
+ while time.time() - start < timeout:
+ try:
+ if requests.get("http://localhost:9001/api/v2/").status_code == 200:
+ break
+ except requests.exceptions.ConnectionError:
+ time.sleep(1)
+
+ yield # Tests run here
+
+ # 4. Tear everything down after tests finish to keep the machine clean
+ subprocess.run(["docker", "compose", "down", "-v"], check=True)
+
+# This resets the database state before every single test to prevent race conditions
+@pytest.fixture(scope="function", autouse=True)
+def reset_db_state():
+ # if sys.platform == "win32":
+ # yield
+ # return
+ # Fast restart of the database container to return to the 'baked-in' state
+ subprocess.run(["docker", "compose", "restart", "database"], check=True)
+ # Re-run the setup worker to ensure paths are still correct
+ subprocess.run(["docker", "compose", "up", "database-setup"], check=True)
@pytest.fixture
def static_cache_dir():
From a5601e3dc849ac4c8759c14292960d624d774ff0 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Thu, 29 Jan 2026 10:05:28 +0100
Subject: [PATCH 07/67] relax assumptions on local file structure
---
tests/test_datasets/test_dataset_functions.py | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py
index 27d3075fd..49b13e4b8 100644
--- a/tests/test_datasets/test_dataset_functions.py
+++ b/tests/test_datasets/test_dataset_functions.py
@@ -1457,8 +1457,9 @@ def test_data_edit_critical_field(self):
raise e
time.sleep(10)
# Delete the cache dir to get the newer version of the dataset
+
shutil.rmtree(
- os.path.join(self.workdir, "org", "openml", "test", "datasets", str(did)),
+ os.path.join(openml.config.get_cache_directory(), "datasets", str(did)),
)
@pytest.mark.uses_test_server()
@@ -1892,9 +1893,8 @@ def _dataset_features_is_downloaded(did: int):
def _dataset_data_file_is_downloaded(did: int):
- parquet_present = _dataset_file_is_downloaded(did, "dataset.pq")
- arff_present = _dataset_file_is_downloaded(did, "dataset.arff")
- return parquet_present or arff_present
+ cache_directory = Path(openml.config.get_cache_directory()) / "datasets" / str(did)
+ return any(f.suffix in (".pq", ".arff") for f in cache_directory.iterdir())
def _assert_datasets_retrieved_successfully(
From d862be2de5ddc4d551efad22dff1fdefb7db3854 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Thu, 29 Jan 2026 10:47:51 +0100
Subject: [PATCH 08/67] Do not use static cache directory
---
tests/test_tasks/test_task_functions.py | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/tests/test_tasks/test_task_functions.py b/tests/test_tasks/test_task_functions.py
index af143a26b..08811add5 100644
--- a/tests/test_tasks/test_task_functions.py
+++ b/tests/test_tasks/test_task_functions.py
@@ -156,13 +156,13 @@ def test_get_task(self):
task = openml.tasks.get_task(1, download_data=True) # anneal; crossvalidation
assert isinstance(task, OpenMLTask)
assert os.path.exists(
- os.path.join(self.workdir, "org", "openml", "test", "tasks", "1", "task.xml")
+ os.path.join(openml.config.get_cache_directory(), "tasks", "1", "task.xml")
)
assert not os.path.exists(
- os.path.join(self.workdir, "org", "openml", "test", "tasks", "1", "datasplits.arff")
+ os.path.join(openml.config.get_cache_directory(), "tasks", "1", "datasplits.arff")
)
assert os.path.exists(
- os.path.join(self.workdir, "org", "openml", "test", "datasets", "1", "dataset.arff")
+ os.path.join(openml.config.get_cache_directory(), "datasets", "1", "dataset_1.pq")
)
@pytest.mark.uses_test_server()
@@ -170,21 +170,21 @@ def test_get_task_lazy(self):
task = openml.tasks.get_task(2, download_data=False) # anneal; crossvalidation
assert isinstance(task, OpenMLTask)
assert os.path.exists(
- os.path.join(self.workdir, "org", "openml", "test", "tasks", "2", "task.xml")
+ os.path.join(openml.config.get_cache_directory(), "tasks", "2", "task.xml")
)
assert task.class_labels == ["1", "2", "3", "4", "5", "U"]
assert not os.path.exists(
- os.path.join(self.workdir, "org", "openml", "test", "tasks", "2", "datasplits.arff")
+ os.path.join(openml.config.get_cache_directory(), "tasks", "2", "datasplits.arff")
)
# Since the download_data=False is propagated to get_dataset
assert not os.path.exists(
- os.path.join(self.workdir, "org", "openml", "test", "datasets", "2", "dataset.arff")
+ os.path.join(openml.config.get_cache_directory(), "datasets", "2", "dataset.arff")
)
task.download_split()
assert os.path.exists(
- os.path.join(self.workdir, "org", "openml", "test", "tasks", "2", "datasplits.arff")
+ os.path.join(openml.config.get_cache_directory(), "tasks", "2", "datasplits.arff")
)
@mock.patch("openml.tasks.functions.get_dataset")
@@ -228,7 +228,7 @@ def test_download_split(self):
split = task.download_split()
assert type(split) == OpenMLSplit
assert os.path.exists(
- os.path.join(self.workdir, "org", "openml", "test", "tasks", "1", "datasplits.arff")
+ os.path.join(openml.config.get_cache_directory(), "tasks", "1", "datasplits.arff")
)
def test_deletion_of_cache_dir(self):
From 16699e6871f6b242fbd4fae1e2893dc78930bf1e Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Thu, 29 Jan 2026 11:18:40 +0100
Subject: [PATCH 09/67] Update expected number to match initial server state
This means it is not reliant on the evaluation engine processing the
dataset. Interestingly, the database state purposely seems to keep
the last task's dataset in preparation explicitly (by having
processing marked as done but having to dataset_status entry).
---
tests/test_tasks/test_task_functions.py | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/tests/test_tasks/test_task_functions.py b/tests/test_tasks/test_task_functions.py
index 08811add5..6951bf36f 100644
--- a/tests/test_tasks/test_task_functions.py
+++ b/tests/test_tasks/test_task_functions.py
@@ -96,7 +96,9 @@ def test_list_tasks_empty(self):
@pytest.mark.uses_test_server()
def test_list_tasks_by_tag(self):
- num_basic_tasks = 100 # number is flexible, check server if fails
+ # Server starts with 99 active tasks with the tag, and one 'in_preparation',
+ # so depending on the processing of the last dataset, there may be 99 or 100 matches.
+ num_basic_tasks = 99
tasks = openml.tasks.list_tasks(tag="OpenML100")
assert len(tasks) >= num_basic_tasks
for task in tasks.to_dict(orient="index").values():
From 7c14c684d35eb409562b590fd225a315f7108ce0 Mon Sep 17 00:00:00 2001
From: Satvik Mishra <112589278+satvshr@users.noreply.github.com>
Date: Thu, 29 Jan 2026 16:35:22 +0530
Subject: [PATCH 10/67] bug fixing
---
.github/workflows/test.yml | 2 +-
pyproject.toml | 15 ---------------
pytest.ini | 14 ++++++++++++++
tests/conftest.py | 16 ++++------------
tests/test_1.py | 14 ++++++++++++++
5 files changed, 33 insertions(+), 28 deletions(-)
create mode 100644 tests/test_1.py
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index c52486d0a..c2b05a6be 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -75,7 +75,7 @@ jobs:
python-version: ${{ matrix.python-version }}
- name: Checkout server-api and patch Docker path
- if: runner.os == 'Linux'
+ # if: matrix.os == 'Linux'
shell: bash
run: |
git clone --depth 1 https://github.com/openml/server-api.git server-api
diff --git a/pyproject.toml b/pyproject.toml
index 93a6ffbfa..0627d0901 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -124,21 +124,6 @@ openml = ["*.txt", "*.md", "py.typed"]
[tool.setuptools.dynamic]
version = {attr = "openml.__version__.__version__"}
-# https://docs.pytest.org/en/7.2.x/reference/reference.html#ini-options-ref
-[tool.pytest.ini_options]
-testpaths = ["tests"]
-minversion = "7.0"
-xfail_strict = true
-filterwarnings=[
- "ignore:the matrix subclass:PendingDeprecationWarning"
-]
-markers = [
- "server: anything that connects to a server",
- "upload: anything that uploads to a server",
- "production: any interaction with the production server",
- "cache: anything that interacts with the (test) cache",
-]
-
# https://github.com/charliermarsh/ruff
[tool.ruff]
target-version = "py310"
diff --git a/pytest.ini b/pytest.ini
index 69fbd903f..12d9fe136 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -1,4 +1,18 @@
[pytest]
+minversion = 7.0
+testpaths = tests
+xfail_strict = true
+
+filterwarnings =
+ ignore:the matrix subclass:PendingDeprecationWarning
+
+markers =
+ server: anything that connects to a server
+ upload: anything that uploads to a server
+ production: any interaction with the production server
+ cache: anything that interacts with the (test) cache
+ uses_test_server: tests that use the local docker stack
+
env =
OPENML_SERVER = http://localhost:9001/api/v2
OPENML_API_KEY = AD000000000000000000000000000000
\ No newline at end of file
diff --git a/tests/conftest.py b/tests/conftest.py
index 890978558..7ea9257f6 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -306,37 +306,29 @@ def openml_docker_stack():
# if sys.platform == "win32":
# yield
# return
- # 1. Start the containers defined in your final docker-compose.yml
subprocess.run(["docker", "compose", "up", "-d"], check=True)
-
- # 2. Wait for the database setup worker to finish its tasks
- # This ensures update.sh has finished before we hit the APIs
subprocess.run(["docker", "wait", "openml-test-setup-ci"], check=True)
- # 3. Quick health check: Wait for the Python API to respond on port 9001
- timeout = 30
+ timeout = 10
start = time.time()
while time.time() - start < timeout:
try:
- if requests.get("http://localhost:9001/api/v2/").status_code == 200:
+ response = requests.get("http://localhost:9001/api/v2/")
+ if response.status_code in [200, 404, 405]:
break
except requests.exceptions.ConnectionError:
time.sleep(1)
- yield # Tests run here
+ yield
- # 4. Tear everything down after tests finish to keep the machine clean
subprocess.run(["docker", "compose", "down", "-v"], check=True)
-# This resets the database state before every single test to prevent race conditions
@pytest.fixture(scope="function", autouse=True)
def reset_db_state():
# if sys.platform == "win32":
# yield
# return
- # Fast restart of the database container to return to the 'baked-in' state
subprocess.run(["docker", "compose", "restart", "database"], check=True)
- # Re-run the setup worker to ensure paths are still correct
subprocess.run(["docker", "compose", "up", "database-setup"], check=True)
@pytest.fixture
diff --git a/tests/test_1.py b/tests/test_1.py
new file mode 100644
index 000000000..169ebbd03
--- /dev/null
+++ b/tests/test_1.py
@@ -0,0 +1,14 @@
+import pytest
+import requests
+
+# Requesting the 'openml_docker_stack' fixture forces it to run!
+def test_can_connect_to_local_docker(openml_docker_stack):
+ print("\nš³ Docker Stack is UP! Checking connection...")
+
+ # Try to talk to the V2 API we just built
+ response = requests.get("http://localhost:9001/api/v2")
+
+ # If we get a 200 OK or 404 (Not Found), the server is alive.
+ # If it fails, this line will crash the test.
+ assert response.status_code in [200, 404]
+ print("ā
Successfully connected to Local V2 API on port 9001")
\ No newline at end of file
From 16ceeaab9f2cb65eb9a9025704c4e31204a6fb57 Mon Sep 17 00:00:00 2001
From: Satvik Mishra <112589278+satvshr@users.noreply.github.com>
Date: Fri, 30 Jan 2026 02:06:38 +0530
Subject: [PATCH 11/67] remove db refresh every test
---
.github/workflows/test.yml | 1 -
tests/conftest.py | 8 --------
tests/test_1.py | 6 ++----
3 files changed, 2 insertions(+), 13 deletions(-)
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 65ebcbe4a..228500278 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -83,7 +83,6 @@ jobs:
python-version: ${{ matrix.python-version }}
- name: Checkout server-api and patch Docker path
- # if: matrix.os == 'Linux'
shell: bash
run: |
git clone --depth 1 https://github.com/openml/server-api.git server-api
diff --git a/tests/conftest.py b/tests/conftest.py
index 7ea9257f6..e9bb08013 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -323,14 +323,6 @@ def openml_docker_stack():
subprocess.run(["docker", "compose", "down", "-v"], check=True)
-@pytest.fixture(scope="function", autouse=True)
-def reset_db_state():
- # if sys.platform == "win32":
- # yield
- # return
- subprocess.run(["docker", "compose", "restart", "database"], check=True)
- subprocess.run(["docker", "compose", "up", "database-setup"], check=True)
-
@pytest.fixture
def static_cache_dir():
return Path(__file__).parent / "files"
diff --git a/tests/test_1.py b/tests/test_1.py
index 169ebbd03..318fa83c1 100644
--- a/tests/test_1.py
+++ b/tests/test_1.py
@@ -3,12 +3,10 @@
# Requesting the 'openml_docker_stack' fixture forces it to run!
def test_can_connect_to_local_docker(openml_docker_stack):
- print("\nš³ Docker Stack is UP! Checking connection...")
# Try to talk to the V2 API we just built
- response = requests.get("http://localhost:9001/api/v2")
+ response = requests.get("http://localhost:9001/docs")
# If we get a 200 OK or 404 (Not Found), the server is alive.
# If it fails, this line will crash the test.
- assert response.status_code in [200, 404]
- print("ā
Successfully connected to Local V2 API on port 9001")
\ No newline at end of file
+ assert response.status_code in [200]
From 015acf46330c5604824b30d9c28a0538a54dd120 Mon Sep 17 00:00:00 2001
From: Satvik Mishra <112589278+satvshr@users.noreply.github.com>
Date: Fri, 30 Jan 2026 02:18:32 +0530
Subject: [PATCH 12/67] bug fixing
---
.github/workflows/test.yml | 8 ++++----
pyproject.toml | 19 +++++++++++++++++++
pytest.ini | 18 ------------------
tests/conftest.py | 9 ++-------
4 files changed, 25 insertions(+), 29 deletions(-)
delete mode 100644 pytest.ini
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 228500278..686440234 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -83,10 +83,10 @@ jobs:
python-version: ${{ matrix.python-version }}
- name: Checkout server-api and patch Docker path
- shell: bash
- run: |
- git clone --depth 1 https://github.com/openml/server-api.git server-api
- sed -i 's|\.\./server-api|./server-api|g' docker-compose.yml
+ shell: bash
+ run: |
+ git clone --depth 1 https://github.com/openml/server-api.git server-api
+ sed -i 's|\.\./server-api|./server-api|g' docker-compose.yml
- name: Install test dependencies, scikit-learn, and optional pandas
shell: bash
diff --git a/pyproject.toml b/pyproject.toml
index 0627d0901..6165f9497 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -124,6 +124,25 @@ openml = ["*.txt", "*.md", "py.typed"]
[tool.setuptools.dynamic]
version = {attr = "openml.__version__.__version__"}
+# https://docs.pytest.org/en/7.2.x/reference/reference.html#ini-options-ref
+[tool.pytest.ini_options]
+testpaths = ["tests"]
+minversion = "7.0"
+xfail_strict = true
+filterwarnings=[
+ "ignore:the matrix subclass:PendingDeprecationWarning"
+]
+markers = [
+ "server: anything that connects to a server",
+ "upload: anything that uploads to a server",
+ "production: any interaction with the production server",
+ "cache: anything that interacts with the (test) cache",
+]
+env = [
+ "OPENML_SERVER=http://localhost:9001/api/v2",
+ "OPENML_API_KEY=AD000000000000000000000000000000",
+]
+
# https://github.com/charliermarsh/ruff
[tool.ruff]
target-version = "py310"
diff --git a/pytest.ini b/pytest.ini
deleted file mode 100644
index 12d9fe136..000000000
--- a/pytest.ini
+++ /dev/null
@@ -1,18 +0,0 @@
-[pytest]
-minversion = 7.0
-testpaths = tests
-xfail_strict = true
-
-filterwarnings =
- ignore:the matrix subclass:PendingDeprecationWarning
-
-markers =
- server: anything that connects to a server
- upload: anything that uploads to a server
- production: any interaction with the production server
- cache: anything that interacts with the (test) cache
- uses_test_server: tests that use the local docker stack
-
-env =
- OPENML_SERVER = http://localhost:9001/api/v2
- OPENML_API_KEY = AD000000000000000000000000000000
\ No newline at end of file
diff --git a/tests/conftest.py b/tests/conftest.py
index e9bb08013..a2c29a6ad 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -300,12 +300,8 @@ def with_test_cache(test_files_directory, request):
if tmp_cache.exists():
shutil.rmtree(tmp_cache)
-# This starts the entire stack once for the whole test run
@pytest.fixture(scope="session", autouse=True)
def openml_docker_stack():
- # if sys.platform == "win32":
- # yield
- # return
subprocess.run(["docker", "compose", "up", "-d"], check=True)
subprocess.run(["docker", "wait", "openml-test-setup-ci"], check=True)
@@ -313,9 +309,8 @@ def openml_docker_stack():
start = time.time()
while time.time() - start < timeout:
try:
- response = requests.get("http://localhost:9001/api/v2/")
- if response.status_code in [200, 404, 405]:
- break
+ requests.get("http://localhost:9001/api/v2/")
+ break
except requests.exceptions.ConnectionError:
time.sleep(1)
From 937fc770adf8a618851e7cc602b2a87e23f504fe Mon Sep 17 00:00:00 2001
From: Satvik Mishra <112589278+satvshr@users.noreply.github.com>
Date: Fri, 30 Jan 2026 02:50:32 +0530
Subject: [PATCH 13/67] bug fixing
---
.github/workflows/test.yml | 19 +++++++++++++------
1 file changed, 13 insertions(+), 6 deletions(-)
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 686440234..107494bf0 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -82,12 +82,6 @@ jobs:
with:
python-version: ${{ matrix.python-version }}
- - name: Checkout server-api and patch Docker path
- shell: bash
- run: |
- git clone --depth 1 https://github.com/openml/server-api.git server-api
- sed -i 's|\.\./server-api|./server-api|g' docker-compose.yml
-
- name: Install test dependencies, scikit-learn, and optional pandas
shell: bash
run: |
@@ -107,6 +101,12 @@ jobs:
echo "BEFORE=$git_status" >> $GITHUB_ENV
echo "Repository status before tests: $git_status"
+ - name: Checkout server-api and patch Docker path
+ shell: bash
+ run: |
+ git clone --depth 1 https://github.com/openml/server-api.git server-api
+ sed -i 's|\.\./server-api|./server-api|g' docker-compose.yml
+
- name: Show installed dependencies
run: python -m pip list
@@ -145,6 +145,13 @@ jobs:
run: | # we need a separate step because of the bash-specific if-statement in the previous one.
pytest -n 4 --durations=20 --dist load -sv --reruns 5 --reruns-delay 1 -m "not uses_test_server"
+ - name: Cleanup Docker setup
+ if: always()
+ shell: bash
+ run: |
+ rm -rf server-api
+ git checkout docker-compose.yml
+
- name: Check for files left behind by test
if: matrix.os != 'windows-latest' && always()
run: |
From 30972f8d7c7249f64fc605a17ca006351a1d6149 Mon Sep 17 00:00:00 2001
From: Satvik Mishra <112589278+satvshr@users.noreply.github.com>
Date: Fri, 30 Jan 2026 02:53:36 +0530
Subject: [PATCH 14/67] bug fixing
---
.github/workflows/test.yml | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 107494bf0..f3d16aeeb 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -146,10 +146,10 @@ jobs:
pytest -n 4 --durations=20 --dist load -sv --reruns 5 --reruns-delay 1 -m "not uses_test_server"
- name: Cleanup Docker setup
- if: always()
- shell: bash
- run: |
- rm -rf server-api
+ if: always()
+ shell: bash
+ run: |
+ rm -rf server-api
git checkout docker-compose.yml
- name: Check for files left behind by test
From 775dcf722f95aa0f78b4dbef16fe8177cec2a6f0 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Fri, 30 Jan 2026 10:30:18 +0100
Subject: [PATCH 15/67] Add symlink to regular test cache directory
---
tests/files/localhost:8080 | 1 +
1 file changed, 1 insertion(+)
create mode 120000 tests/files/localhost:8080
diff --git a/tests/files/localhost:8080 b/tests/files/localhost:8080
new file mode 120000
index 000000000..5a469fa32
--- /dev/null
+++ b/tests/files/localhost:8080
@@ -0,0 +1 @@
+/Users/pietergijsbers/repositories/openml-python/tests/files/org/openml/test
\ No newline at end of file
From 319cb355c7b4488f83e223e3a9b0d9d20e080771 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Fri, 30 Jan 2026 10:47:29 +0100
Subject: [PATCH 16/67] Skip test for 1.8 since expected results differ too
much
---
tests/test_runs/test_run_functions.py | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
index b8bd6abd7..dda940e4d 100644
--- a/tests/test_runs/test_run_functions.py
+++ b/tests/test_runs/test_run_functions.py
@@ -1870,6 +1870,10 @@ def test_delete_unknown_run(mock_delete, test_files_directory, test_api_key):
Version(sklearn.__version__) < Version("0.21"),
reason="couldn't perform local tests successfully w/o bloating RAM",
)
+@unittest.skipIf(
+ Version(sklearn.__version__) >= Version("1.8"),
+ reason="predictions differ significantly",
+ )
@mock.patch("openml_sklearn.SklearnExtension._prevent_optimize_n_jobs")
@pytest.mark.uses_test_server()
def test__run_task_get_arffcontent_2(parallel_mock):
From a680ebe1648ec2bd549259eab164c62e66bb7151 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Fri, 30 Jan 2026 11:08:50 +0100
Subject: [PATCH 17/67] Simplify path to static cache directory
---
tests/test_datasets/test_dataset_functions.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py
index 49b13e4b8..2654721bd 100644
--- a/tests/test_datasets/test_dataset_functions.py
+++ b/tests/test_datasets/test_dataset_functions.py
@@ -532,7 +532,7 @@ def test_deletion_of_cache_dir_faulty_download(self, patch):
@pytest.mark.uses_test_server()
def test_publish_dataset(self):
- arff_file_path = Path(__file__).parent.parent / "files" / "org" / "openml" / "test" / "datasets" / "2" / "dataset.arff"
+ arff_file_path = self.static_cache_dir / "org" / "openml" / "test" / "datasets" / "2" / "dataset.arff"
dataset = OpenMLDataset(
"anneal",
"test",
From b161b3b8ce5d92d31f4564ae60cb836ae5793d57 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Fri, 30 Jan 2026 11:26:24 +0100
Subject: [PATCH 18/67] Update symbolic link to be relative
---
tests/files/localhost:8080 | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tests/files/localhost:8080 b/tests/files/localhost:8080
index 5a469fa32..334c709ef 120000
--- a/tests/files/localhost:8080
+++ b/tests/files/localhost:8080
@@ -1 +1 @@
-/Users/pietergijsbers/repositories/openml-python/tests/files/org/openml/test
\ No newline at end of file
+org/openml/test
\ No newline at end of file
From 0b989d151e45899c0cba0f7981938b293668ad82 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Fri, 30 Jan 2026 11:27:52 +0100
Subject: [PATCH 19/67] Fix typo
---
tests/test_datasets/test_dataset_functions.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py
index 2654721bd..d8a9d80b9 100644
--- a/tests/test_datasets/test_dataset_functions.py
+++ b/tests/test_datasets/test_dataset_functions.py
@@ -879,7 +879,7 @@ def test_create_invalid_dataset(self):
@pytest.mark.uses_test_server()
def test_get_online_dataset_arff(self):
- dataset_id = 128 # iris -- one of the few datasets with parquet file
+ dataset_id = 128 # iris -- one of the few datasets without parquet file
# lazy loading not used as arff file is checked.
dataset = openml.datasets.get_dataset(dataset_id, download_data=True)
decoder = arff.ArffDecoder()
From 892ea6c85ce7eecd5ae0541ad46b2a0f459786b5 Mon Sep 17 00:00:00 2001
From: Satvik Mishra <112589278+satvshr@users.noreply.github.com>
Date: Sat, 31 Jan 2026 20:41:45 +0530
Subject: [PATCH 20/67] trying ot fix multiple threads issue
---
tests/conftest.py | 45 +++++++++++++++++++++++++++++++--------------
1 file changed, 31 insertions(+), 14 deletions(-)
diff --git a/tests/conftest.py b/tests/conftest.py
index a2c29a6ad..262ba2ccb 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -26,6 +26,8 @@
import multiprocessing
import sys
+import fasteners
+
multiprocessing.set_start_method("spawn", force=True)
from collections.abc import Iterator
@@ -300,23 +302,38 @@ def with_test_cache(test_files_directory, request):
if tmp_cache.exists():
shutil.rmtree(tmp_cache)
-@pytest.fixture(scope="session", autouse=True)
-def openml_docker_stack():
- subprocess.run(["docker", "compose", "up", "-d"], check=True)
+def _is_server_responding():
+ """Check if the Docker API is already listening."""
+ try:
+ requests.get("http://localhost:9001/api/v2/", timeout=1)
+ return True
+ except (requests.exceptions.ConnectionError, requests.exceptions.Timeout):
+ return False
+
+def _start_docker():
+ """Logic to spin up the containers and wait for initialization."""
+ subprocess.run(["docker", "compose", "up", "-d"], check=True, capture_output=True, text=True)
subprocess.run(["docker", "wait", "openml-test-setup-ci"], check=True)
+
+@pytest.fixture(scope="session", autouse=True)
+def openml_docker_stack(tmp_path_factory, worker_id):
+ # For local development, single-process runs
+ if worker_id == "master":
+ _start_docker()
+ yield
+ subprocess.run(["docker", "compose", "down", "-v"], check=True)
+ return
+
+ # Case 2: Running in CI with multiple workers (xdist)
+ root_tmp_dir = tmp_path_factory.getbasetemp().parent
+ lock_file = root_tmp_dir / "docker_setup.lock"
- timeout = 10
- start = time.time()
- while time.time() - start < timeout:
- try:
- requests.get("http://localhost:9001/api/v2/")
- break
- except requests.exceptions.ConnectionError:
- time.sleep(1)
-
+ lock = fasteners.InterProcessLock(str(lock_file))
+ with lock:
+ if not _is_server_responding():
+ _start_docker()
+
yield
-
- subprocess.run(["docker", "compose", "down", "-v"], check=True)
@pytest.fixture
def static_cache_dir():
From ae3befb71a66ec5db5ffda3473ef08e53ff62a81 Mon Sep 17 00:00:00 2001
From: Satvik Mishra <112589278+satvshr@users.noreply.github.com>
Date: Sat, 31 Jan 2026 20:42:35 +0530
Subject: [PATCH 21/67] removed test file
---
tests/test_1.py | 12 ------------
1 file changed, 12 deletions(-)
delete mode 100644 tests/test_1.py
diff --git a/tests/test_1.py b/tests/test_1.py
deleted file mode 100644
index 318fa83c1..000000000
--- a/tests/test_1.py
+++ /dev/null
@@ -1,12 +0,0 @@
-import pytest
-import requests
-
-# Requesting the 'openml_docker_stack' fixture forces it to run!
-def test_can_connect_to_local_docker(openml_docker_stack):
-
- # Try to talk to the V2 API we just built
- response = requests.get("http://localhost:9001/docs")
-
- # If we get a 200 OK or 404 (Not Found), the server is alive.
- # If it fails, this line will crash the test.
- assert response.status_code in [200]
From 5f396a020e1c40a5e1814b2dd02f48f21200f969 Mon Sep 17 00:00:00 2001
From: Satvik Mishra <112589278+satvshr@users.noreply.github.com>
Date: Sat, 31 Jan 2026 21:20:51 +0530
Subject: [PATCH 22/67] removed unnecessary code (?)
---
pyproject.toml | 4 ----
1 file changed, 4 deletions(-)
diff --git a/pyproject.toml b/pyproject.toml
index 6165f9497..93a6ffbfa 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -138,10 +138,6 @@ markers = [
"production: any interaction with the production server",
"cache: anything that interacts with the (test) cache",
]
-env = [
- "OPENML_SERVER=http://localhost:9001/api/v2",
- "OPENML_API_KEY=AD000000000000000000000000000000",
-]
# https://github.com/charliermarsh/ruff
[tool.ruff]
From 8a319cd6c057ad27084ab90099ac526913fa3b05 Mon Sep 17 00:00:00 2001
From: Satvik Mishra <112589278+satvshr@users.noreply.github.com>
Date: Sat, 31 Jan 2026 21:26:54 +0530
Subject: [PATCH 23/67] Trigger Build
From 4ba4239242d40b916843a10aa298a9fa1c97c55b Mon Sep 17 00:00:00 2001
From: Satvik Mishra <112589278+satvshr@users.noreply.github.com>
Date: Sun, 1 Feb 2026 17:18:00 +0530
Subject: [PATCH 24/67] Clean up code
---
docker-compose.yml | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/docker-compose.yml b/docker-compose.yml
index 20fcef863..2db258741 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -12,18 +12,17 @@ services:
interval: 5s
retries: 10
- # SETUP WORKER
database-setup:
image: mysql
container_name: "openml-test-setup-ci"
volumes:
- # You MUST save the update.sh content you shared earlier to this path
- ./docker/update.sh:/database-update.sh
command: /bin/sh -c "/database-update.sh"
depends_on:
database:
condition: service_healthy
+# V1 API (PHP)
php-api:
image: "openml/php-rest-api:v1.2.2"
container_name: "openml-php-api-ci"
From 02924041dbbe65dbf1068189e109c0839539e531 Mon Sep 17 00:00:00 2001
From: Satvik Mishra <112589278+satvshr@users.noreply.github.com>
Date: Sun, 1 Feb 2026 17:30:22 +0530
Subject: [PATCH 25/67] comment fixing
---
tests/conftest.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/tests/conftest.py b/tests/conftest.py
index 262ba2ccb..25adf5d53 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -317,14 +317,14 @@ def _start_docker():
@pytest.fixture(scope="session", autouse=True)
def openml_docker_stack(tmp_path_factory, worker_id):
- # For local development, single-process runs
+ # For local development with single worker
if worker_id == "master":
_start_docker()
yield
subprocess.run(["docker", "compose", "down", "-v"], check=True)
return
- # Case 2: Running in CI with multiple workers (xdist)
+ # For CI with multiple workers (xdist)
root_tmp_dir = tmp_path_factory.getbasetemp().parent
lock_file = root_tmp_dir / "docker_setup.lock"
From a7b5d767714da63f87e652d824dc8cecf0df49f0 Mon Sep 17 00:00:00 2001
From: Satvik Mishra <112589278+satvshr@users.noreply.github.com>
Date: Sun, 1 Feb 2026 21:26:02 +0530
Subject: [PATCH 26/67] attempted bug fixing
---
.github/workflows/test.yml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index f3d16aeeb..d27f861e7 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -143,7 +143,7 @@ jobs:
- name: Run tests on Windows
if: matrix.os == 'windows-latest'
run: | # we need a separate step because of the bash-specific if-statement in the previous one.
- pytest -n 4 --durations=20 --dist load -sv --reruns 5 --reruns-delay 1 -m "not uses_test_server"
+ pytest -n auto --durations=20 --dist load -sv --reruns 5 --reruns-delay 1 -m "not uses_test_server"
- name: Cleanup Docker setup
if: always()
From 9b0f3d71f4d87921f666ef48e4b404d874cd0b02 Mon Sep 17 00:00:00 2001
From: Satvik Mishra <112589278+satvshr@users.noreply.github.com>
Date: Sun, 1 Feb 2026 21:40:50 +0530
Subject: [PATCH 27/67] attempted bug fixing
---
.github/workflows/test.yml | 2 +-
tests/conftest.py | 12 ++++++++----
2 files changed, 9 insertions(+), 5 deletions(-)
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index d27f861e7..f3d16aeeb 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -143,7 +143,7 @@ jobs:
- name: Run tests on Windows
if: matrix.os == 'windows-latest'
run: | # we need a separate step because of the bash-specific if-statement in the previous one.
- pytest -n auto --durations=20 --dist load -sv --reruns 5 --reruns-delay 1 -m "not uses_test_server"
+ pytest -n 4 --durations=20 --dist load -sv --reruns 5 --reruns-delay 1 -m "not uses_test_server"
- name: Cleanup Docker setup
if: always()
diff --git a/tests/conftest.py b/tests/conftest.py
index 25adf5d53..e203cbd1e 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -330,10 +330,14 @@ def openml_docker_stack(tmp_path_factory, worker_id):
lock = fasteners.InterProcessLock(str(lock_file))
with lock:
- if not _is_server_responding():
- _start_docker()
-
- yield
+ import socket
+ sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ result = sock.connect_ex(('localhost', 33060))
+ is_port_open = (result == 0)
+ sock.close()
+
+ if not is_port_open:
+ _start_docker()
@pytest.fixture
def static_cache_dir():
From 630f240f36477932a647c261e6d2854b35876671 Mon Sep 17 00:00:00 2001
From: Satvik Mishra <112589278+satvshr@users.noreply.github.com>
Date: Sun, 1 Feb 2026 21:50:19 +0530
Subject: [PATCH 28/67] attempted bug fixing
---
tests/conftest.py | 1 +
1 file changed, 1 insertion(+)
diff --git a/tests/conftest.py b/tests/conftest.py
index e203cbd1e..a1f542a07 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -338,6 +338,7 @@ def openml_docker_stack(tmp_path_factory, worker_id):
if not is_port_open:
_start_docker()
+ yield
@pytest.fixture
def static_cache_dir():
From c61d4109cadc4e4fd19d61df347e57cb25f501c1 Mon Sep 17 00:00:00 2001
From: Satvik Mishra <112589278+satvshr@users.noreply.github.com>
Date: Mon, 2 Feb 2026 00:06:11 +0530
Subject: [PATCH 29/67] attempted bug fixing reverts
---
tests/conftest.py | 11 +++--------
1 file changed, 3 insertions(+), 8 deletions(-)
diff --git a/tests/conftest.py b/tests/conftest.py
index a1f542a07..25adf5d53 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -330,14 +330,9 @@ def openml_docker_stack(tmp_path_factory, worker_id):
lock = fasteners.InterProcessLock(str(lock_file))
with lock:
- import socket
- sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
- result = sock.connect_ex(('localhost', 33060))
- is_port_open = (result == 0)
- sock.close()
-
- if not is_port_open:
- _start_docker()
+ if not _is_server_responding():
+ _start_docker()
+
yield
@pytest.fixture
From 1ab42b7f6ce6b43fa0e6af3ff9d133ad4e495e80 Mon Sep 17 00:00:00 2001
From: Satvik Mishra <112589278+satvshr@users.noreply.github.com>
Date: Mon, 2 Feb 2026 00:36:35 +0530
Subject: [PATCH 30/67] disabling parallel runs
---
.github/workflows/test.yml | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index f3d16aeeb..8177e53db 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -123,7 +123,7 @@ jobs:
marks="not production and not uses_test_server"
fi
- pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks"
+ pytest --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks"
- name: Run tests on Ubuntu Production
if: matrix.os == 'ubuntu-latest'
@@ -138,12 +138,12 @@ jobs:
marks="production and not uses_test_server"
fi
- pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks"
+ pytest --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks"
- name: Run tests on Windows
if: matrix.os == 'windows-latest'
run: | # we need a separate step because of the bash-specific if-statement in the previous one.
- pytest -n 4 --durations=20 --dist load -sv --reruns 5 --reruns-delay 1 -m "not uses_test_server"
+ pytest --durations=20 --dist load -sv --reruns 5 --reruns-delay 1 -m "not uses_test_server"
- name: Cleanup Docker setup
if: always()
From 06405c8e8b4b7170b793ea64014b0e3f504dbded Mon Sep 17 00:00:00 2001
From: Satvik Mishra <112589278+satvshr@users.noreply.github.com>
Date: Mon, 2 Feb 2026 13:37:17 +0530
Subject: [PATCH 31/67] disabling parallel runs
---
.github/workflows/test.yml | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 8177e53db..4b34e74f4 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -123,7 +123,7 @@ jobs:
marks="not production and not uses_test_server"
fi
- pytest --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks"
+ pytest -n 0 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks"
- name: Run tests on Ubuntu Production
if: matrix.os == 'ubuntu-latest'
@@ -138,12 +138,12 @@ jobs:
marks="production and not uses_test_server"
fi
- pytest --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks"
+ pytest -n 0 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks"
- name: Run tests on Windows
if: matrix.os == 'windows-latest'
run: | # we need a separate step because of the bash-specific if-statement in the previous one.
- pytest --durations=20 --dist load -sv --reruns 5 --reruns-delay 1 -m "not uses_test_server"
+ pytest -n 0 --durations=20 --dist load -sv --reruns 5 --reruns-delay 1 -m "not uses_test_server"
- name: Cleanup Docker setup
if: always()
From e22b7ca82bbc1443dc011cde714eda0de3ae3467 Mon Sep 17 00:00:00 2001
From: Satvik Mishra <112589278+satvshr@users.noreply.github.com>
Date: Mon, 2 Feb 2026 17:17:18 +0530
Subject: [PATCH 32/67] disabling windows CI
---
.github/workflows/test.yml | 6 +++---
tests/conftest.py | 8 ++++++++
2 files changed, 11 insertions(+), 3 deletions(-)
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 4b34e74f4..f3d16aeeb 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -123,7 +123,7 @@ jobs:
marks="not production and not uses_test_server"
fi
- pytest -n 0 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks"
+ pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks"
- name: Run tests on Ubuntu Production
if: matrix.os == 'ubuntu-latest'
@@ -138,12 +138,12 @@ jobs:
marks="production and not uses_test_server"
fi
- pytest -n 0 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks"
+ pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks"
- name: Run tests on Windows
if: matrix.os == 'windows-latest'
run: | # we need a separate step because of the bash-specific if-statement in the previous one.
- pytest -n 0 --durations=20 --dist load -sv --reruns 5 --reruns-delay 1 -m "not uses_test_server"
+ pytest -n 4 --durations=20 --dist load -sv --reruns 5 --reruns-delay 1 -m "not uses_test_server"
- name: Cleanup Docker setup
if: always()
diff --git a/tests/conftest.py b/tests/conftest.py
index 25adf5d53..c1420527d 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -317,6 +317,14 @@ def _start_docker():
@pytest.fixture(scope="session", autouse=True)
def openml_docker_stack(tmp_path_factory, worker_id):
+ # Skip Docker setup in CI on Windows given docker images are for Linux
+ is_ci = os.environ.get("CI") == "true"
+ is_windows = sys.platform == "win32" or os.name == "nt"
+
+ if is_ci and is_windows:
+ yield
+ return
+
# For local development with single worker
if worker_id == "master":
_start_docker()
From 1b00a7fb35ca57b4ff14a865865983aa336b790e Mon Sep 17 00:00:00 2001
From: Satvik Mishra <112589278+satvshr@users.noreply.github.com>
Date: Fri, 6 Feb 2026 14:30:58 +0530
Subject: [PATCH 33/67] removed docker from pytest default
---
.github/workflows/test.yml | 9 ++++++
docker-compose.yml | 5 ++++
tests/conftest.py | 56 ++++++++------------------------------
3 files changed, 25 insertions(+), 45 deletions(-)
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index f3d16aeeb..a62562b52 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -107,6 +107,15 @@ jobs:
git clone --depth 1 https://github.com/openml/server-api.git server-api
sed -i 's|\.\./server-api|./server-api|g' docker-compose.yml
+ - name: Start Docker Test Environment
+ if: matrix.os == 'ubuntu-latest'
+ shell: bash
+ run: |
+ sed -i 's/\r$//' docker/update.sh
+ docker compose up -d
+ docker wait openml-test-setup-ci
+ echo "OPENML_TEST_SERVER=local" >> $GITHUB_ENV
+
- name: Show installed dependencies
run: python -m pip list
diff --git a/docker-compose.yml b/docker-compose.yml
index 2db258741..4122f0e18 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -11,6 +11,11 @@ services:
start_period: 30s
interval: 5s
retries: 10
+ networks:
+ default:
+ aliases:
+ - openml-test-database
+ - elasticsearch
database-setup:
image: mysql
diff --git a/tests/conftest.py b/tests/conftest.py
index c1420527d..a64e6d2d0 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -24,9 +24,6 @@
from __future__ import annotations
import multiprocessing
-import sys
-
-import fasteners
multiprocessing.set_start_method("spawn", force=True)
@@ -38,9 +35,6 @@
import pytest
import openml_sklearn
-import time
-import subprocess
-import requests
import openml
from openml.testing import TestBase
@@ -302,46 +296,18 @@ def with_test_cache(test_files_directory, request):
if tmp_cache.exists():
shutil.rmtree(tmp_cache)
-def _is_server_responding():
- """Check if the Docker API is already listening."""
- try:
- requests.get("http://localhost:9001/api/v2/", timeout=1)
- return True
- except (requests.exceptions.ConnectionError, requests.exceptions.Timeout):
- return False
-
-def _start_docker():
- """Logic to spin up the containers and wait for initialization."""
- subprocess.run(["docker", "compose", "up", "-d"], check=True, capture_output=True, text=True)
- subprocess.run(["docker", "wait", "openml-test-setup-ci"], check=True)
-
-@pytest.fixture(scope="session", autouse=True)
-def openml_docker_stack(tmp_path_factory, worker_id):
- # Skip Docker setup in CI on Windows given docker images are for Linux
- is_ci = os.environ.get("CI") == "true"
- is_windows = sys.platform == "win32" or os.name == "nt"
-
- if is_ci and is_windows:
- yield
- return
-
- # For local development with single worker
- if worker_id == "master":
- _start_docker()
- yield
- subprocess.run(["docker", "compose", "down", "-v"], check=True)
- return
-
- # For CI with multiple workers (xdist)
- root_tmp_dir = tmp_path_factory.getbasetemp().parent
- lock_file = root_tmp_dir / "docker_setup.lock"
+@pytest.fixture(scope="session")
+def openml_test_config():
+ """
+ Returns the URL for the test server.
+ """
+ if os.environ.get("OPENML_TEST_SERVER") == "local":
+ return {
+ "v1": "http://localhost:9002/api/v1/",
+ "v2": "http://localhost:9001/"
+ }
- lock = fasteners.InterProcessLock(str(lock_file))
- with lock:
- if not _is_server_responding():
- _start_docker()
-
- yield
+ raise ValueError("Use the environment variable OPENML_TEST_SERVER=local before running docker to run tests against a local OpenML server.")
@pytest.fixture
def static_cache_dir():
From cc6e673852c06fd4e00afee0198046a9bfb58c89 Mon Sep 17 00:00:00 2001
From: Satvik Mishra <112589278+satvshr@users.noreply.github.com>
Date: Fri, 6 Feb 2026 16:00:31 +0530
Subject: [PATCH 34/67] change mysql port
---
docker-compose.yml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docker-compose.yml b/docker-compose.yml
index 4122f0e18..a47a10106 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -5,7 +5,7 @@ services:
environment:
MYSQL_ROOT_PASSWORD: ok
ports:
- - "33060:3306"
+ - "33069:3306"
healthcheck:
test: ["CMD", "mysqladmin" ,"ping", "-h", "localhost"]
start_period: 30s
From c1bf5589a92358d78eed01dfcb8568e534875636 Mon Sep 17 00:00:00 2001
From: Satvik Mishra <112589278+satvshr@users.noreply.github.com>
Date: Fri, 6 Feb 2026 16:40:09 +0530
Subject: [PATCH 35/67] Change order of ci flow
---
.github/workflows/test.yml | 18 +++++++++---------
1 file changed, 9 insertions(+), 9 deletions(-)
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index a62562b52..2a1f4e9ae 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -154,6 +154,15 @@ jobs:
run: | # we need a separate step because of the bash-specific if-statement in the previous one.
pytest -n 4 --durations=20 --dist load -sv --reruns 5 --reruns-delay 1 -m "not uses_test_server"
+ - name: Upload coverage
+ if: matrix.code-cov && always()
+ uses: codecov/codecov-action@v4
+ with:
+ files: coverage.xml
+ token: ${{ secrets.CODECOV_TOKEN }}
+ fail_ci_if_error: true
+ verbose: true
+
- name: Cleanup Docker setup
if: always()
shell: bash
@@ -173,15 +182,6 @@ jobs:
exit 1
fi
- - name: Upload coverage
- if: matrix.code-cov && always()
- uses: codecov/codecov-action@v4
- with:
- files: coverage.xml
- token: ${{ secrets.CODECOV_TOKEN }}
- fail_ci_if_error: true
- verbose: true
-
dummy_windows_py_sk024:
name: (windows-latest, Py, sk0.24.*, sk-only:false)
runs-on: ubuntu-latest
From 1a794feb545caec924be3bee062a9d123cafa02a Mon Sep 17 00:00:00 2001
From: Satvik Mishra <112589278+satvshr@users.noreply.github.com>
Date: Wed, 11 Feb 2026 13:26:16 +0530
Subject: [PATCH 36/67] CI testing
---
.github/workflows/test.yml | 17 ++++--------
docker-compose.yml | 57 --------------------------------------
docker/update.sh | 31 ---------------------
tests/conftest.py | 13 ---------
4 files changed, 6 insertions(+), 112 deletions(-)
delete mode 100644 docker-compose.yml
delete mode 100644 docker/update.sh
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 2a1f4e9ae..30b36a0bf 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -101,20 +101,15 @@ jobs:
echo "BEFORE=$git_status" >> $GITHUB_ENV
echo "Repository status before tests: $git_status"
- - name: Checkout server-api and patch Docker path
- shell: bash
- run: |
- git clone --depth 1 https://github.com/openml/server-api.git server-api
- sed -i 's|\.\./server-api|./server-api|g' docker-compose.yml
+ - name: Clone Services
+ run: git clone --depth 1 https://github.com/openml/services.git
- - name: Start Docker Test Environment
- if: matrix.os == 'ubuntu-latest'
- shell: bash
+ - name: Start Docker Services
+ working-directory: ./services
run: |
- sed -i 's/\r$//' docker/update.sh
- docker compose up -d
+ sudo systemctl stop mysql.service
+ docker compose --profile rest-api --profile minio --profile evaluation-engine up -d
docker wait openml-test-setup-ci
- echo "OPENML_TEST_SERVER=local" >> $GITHUB_ENV
- name: Show installed dependencies
run: python -m pip list
diff --git a/docker-compose.yml b/docker-compose.yml
deleted file mode 100644
index a47a10106..000000000
--- a/docker-compose.yml
+++ /dev/null
@@ -1,57 +0,0 @@
-services:
- database:
- image: "openml/test-database:20240105"
- container_name: "openml-test-db-ci"
- environment:
- MYSQL_ROOT_PASSWORD: ok
- ports:
- - "33069:3306"
- healthcheck:
- test: ["CMD", "mysqladmin" ,"ping", "-h", "localhost"]
- start_period: 30s
- interval: 5s
- retries: 10
- networks:
- default:
- aliases:
- - openml-test-database
- - elasticsearch
-
- database-setup:
- image: mysql
- container_name: "openml-test-setup-ci"
- volumes:
- - ./docker/update.sh:/database-update.sh
- command: /bin/sh -c "/database-update.sh"
- depends_on:
- database:
- condition: service_healthy
-
-# V1 API (PHP)
- php-api:
- image: "openml/php-rest-api:v1.2.2"
- container_name: "openml-php-api-ci"
- ports:
- - "9002:80"
- depends_on:
- database:
- condition: service_started
- environment:
- - DB_HOST_OPENML=database:3306
- - DB_HOST_EXPDB=database:3306
- - BASE_URL=http://localhost:9002/
- - INDEX_ES_DURING_STARTUP=false
-
- # V2 API (PYTHON)
- python-api:
- container_name: "openml-python-api-ci"
- build:
- # TODO: replace with image when available
- context: ../server-api
- dockerfile: docker/python/Dockerfile
- ports:
- - "9001:8000"
- depends_on:
- - database
- environment:
- - DATABASE_URL=mysql://root:ok@database:3306/openml
\ No newline at end of file
diff --git a/docker/update.sh b/docker/update.sh
deleted file mode 100644
index 7e9864742..000000000
--- a/docker/update.sh
+++ /dev/null
@@ -1,31 +0,0 @@
-#/bin/bash
-# Change the filepath of openml.file
-# from "https://www.openml.org/data/download/1666876/phpFsFYVN"
-# to "http://minio:9000/datasets/0000/0001/phpFsFYVN"
-mysql -hdatabase -uroot -pok -e 'UPDATE openml.file SET filepath = CONCAT("http://minio:9000/datasets/0000/", LPAD(id, 4, "0"), "/", SUBSTRING_INDEX(filepath, "/", -1)) WHERE extension="arff";'
-
-# Update openml.expdb.dataset with the same url
-mysql -hdatabase -uroot -pok -e 'UPDATE openml_expdb.dataset DS, openml.file FL SET DS.url = FL.filepath WHERE DS.did = FL.id;'
-
-
-
-
-
-# Create the data_feature_description TABLE. TODO: can we make sure this table exists already?
-mysql -hdatabase -uroot -pok -Dopenml_expdb -e 'CREATE TABLE IF NOT EXISTS `data_feature_description` (
- `did` int unsigned NOT NULL,
- `index` int unsigned NOT NULL,
- `uploader` mediumint unsigned NOT NULL,
- `date` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
- `description_type` enum("plain", "ontology") NOT NULL,
- `value` varchar(256) NOT NULL,
- KEY `did` (`did`,`index`),
- CONSTRAINT `data_feature_description_ibfk_1` FOREIGN KEY (`did`, `index`) REFERENCES `data_feature` (`did`, `index`) ON DELETE CASCADE ON UPDATE CASCADE
-)'
-
-# SET dataset 1 to active (used in unittests java)
-mysql -hdatabase -uroot -pok -Dopenml_expdb -e 'INSERT IGNORE INTO dataset_status VALUES (1, "active", "2024-01-01 00:00:00", 1)'
-mysql -hdatabase -uroot -pok -Dopenml_expdb -e 'DELETE FROM dataset_status WHERE did = 2 AND status = "deactivated";'
-
-# Temporary fix in case the database missed the kaggle table. The PHP Rest API expects the table to be there, while indexing.
-mysql -hdatabase -uroot -pok -Dopenml_expdb -e 'CREATE TABLE IF NOT EXISTS `kaggle` (`dataset_id` int(11) DEFAULT NULL, `kaggle_link` varchar(500) DEFAULT NULL)'
\ No newline at end of file
diff --git a/tests/conftest.py b/tests/conftest.py
index a64e6d2d0..08db800df 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -296,19 +296,6 @@ def with_test_cache(test_files_directory, request):
if tmp_cache.exists():
shutil.rmtree(tmp_cache)
-@pytest.fixture(scope="session")
-def openml_test_config():
- """
- Returns the URL for the test server.
- """
- if os.environ.get("OPENML_TEST_SERVER") == "local":
- return {
- "v1": "http://localhost:9002/api/v1/",
- "v2": "http://localhost:9001/"
- }
-
- raise ValueError("Use the environment variable OPENML_TEST_SERVER=local before running docker to run tests against a local OpenML server.")
-
@pytest.fixture
def static_cache_dir():
return Path(__file__).parent / "files"
From dbe77827401b802fc47887dc07c9c9b486e2aa57 Mon Sep 17 00:00:00 2001
From: Satvik Mishra <112589278+satvshr@users.noreply.github.com>
Date: Wed, 11 Feb 2026 13:27:50 +0530
Subject: [PATCH 37/67] CI testing
---
.github/workflows/test.yml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 30b36a0bf..8b857a435 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -162,7 +162,7 @@ jobs:
if: always()
shell: bash
run: |
- rm -rf server-api
+ rm -rf services
git checkout docker-compose.yml
- name: Check for files left behind by test
From d8be5f12a47e520fa1f2697b299a0d6c5e1e0856 Mon Sep 17 00:00:00 2001
From: Satvik Mishra <112589278+satvshr@users.noreply.github.com>
Date: Wed, 11 Feb 2026 13:28:13 +0530
Subject: [PATCH 38/67] CI testing
---
tests/conftest.py | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/tests/conftest.py b/tests/conftest.py
index 08db800df..0fa4b959a 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -295,7 +295,8 @@ def with_test_cache(test_files_directory, request):
openml.config.set_root_cache_directory(_root_cache_directory)
if tmp_cache.exists():
shutil.rmtree(tmp_cache)
-
+
+
@pytest.fixture
def static_cache_dir():
return Path(__file__).parent / "files"
From b20484521e24eced3e456e17c3424ee76f98e11e Mon Sep 17 00:00:00 2001
From: Satvik Mishra <112589278+satvshr@users.noreply.github.com>
Date: Wed, 11 Feb 2026 13:44:47 +0530
Subject: [PATCH 39/67] CI testing
---
.github/workflows/test.yml | 14 ++++++++------
tests/conftest.py | 2 +-
2 files changed, 9 insertions(+), 7 deletions(-)
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 8b857a435..ea8a22c26 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -1,16 +1,13 @@
----
name: Tests
on:
workflow_dispatch:
-
push:
branches:
- main
- develop
tags:
- "v*.*.*"
-
pull_request:
branches:
- main
@@ -102,20 +99,24 @@ jobs:
echo "Repository status before tests: $git_status"
- name: Clone Services
+ if: matrix.os == 'ubuntu-latest'
run: git clone --depth 1 https://github.com/openml/services.git
- name: Start Docker Services
+ if: matrix.os == 'ubuntu-latest'
working-directory: ./services
run: |
sudo systemctl stop mysql.service
docker compose --profile rest-api --profile minio --profile evaluation-engine up -d
- docker wait openml-test-setup-ci
+ docker wait openml-test-database-setup
- name: Show installed dependencies
run: python -m pip list
- name: Run tests on Ubuntu Test
if: matrix.os == 'ubuntu-latest'
+ env:
+ TEST_SERVER_URL: "http://localhost:8000"
run: |
if [ "${{ matrix.code-cov }}" = "true" ]; then
codecov="--cov=openml --long --cov-report=xml"
@@ -131,6 +132,8 @@ jobs:
- name: Run tests on Ubuntu Production
if: matrix.os == 'ubuntu-latest'
+ env:
+ TEST_SERVER_URL: "http://localhost:8000"
run: |
if [ "${{ matrix.code-cov }}" = "true" ]; then
codecov="--cov=openml --long --cov-report=xml"
@@ -162,8 +165,7 @@ jobs:
if: always()
shell: bash
run: |
- rm -rf services
- git checkout docker-compose.yml
+ sudo rm -rf services
- name: Check for files left behind by test
if: matrix.os != 'windows-latest' && always()
diff --git a/tests/conftest.py b/tests/conftest.py
index 0fa4b959a..ae67f2f43 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -296,7 +296,7 @@ def with_test_cache(test_files_directory, request):
if tmp_cache.exists():
shutil.rmtree(tmp_cache)
-
+
@pytest.fixture
def static_cache_dir():
return Path(__file__).parent / "files"
From 54725fa2d0b95855e1b329d34b5921f28253a9e8 Mon Sep 17 00:00:00 2001
From: Satvik Mishra <112589278+satvshr@users.noreply.github.com>
Date: Wed, 11 Feb 2026 13:52:20 +0530
Subject: [PATCH 40/67] Windows CI bugfixing
---
.github/workflows/test.yml | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index ea8a22c26..a21992474 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -162,8 +162,7 @@ jobs:
verbose: true
- name: Cleanup Docker setup
- if: always()
- shell: bash
+ if: matrix.os == 'ubuntu-latest'
run: |
sudo rm -rf services
From abc44a5493e2a8f0210cd29da27e45e7b369eccc Mon Sep 17 00:00:00 2001
From: Satvik Mishra <112589278+satvshr@users.noreply.github.com>
Date: Wed, 11 Feb 2026 20:52:45 +0530
Subject: [PATCH 41/67] merging 2 branches
---
.github/workflows/test.yml | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index a21992474..8778dc33c 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -100,7 +100,13 @@ jobs:
- name: Clone Services
if: matrix.os == 'ubuntu-latest'
- run: git clone --depth 1 https://github.com/openml/services.git
+ run: |
+ git clone --depth 1 https://github.com/openml/services.git
+ git fetch origin setup-test-locally:setup-test-locally
+ git fetch origin add/python-rest-api:add/python-rest-api
+
+ git merge setup-test-locally
+ git merge add/python-rest-api
- name: Start Docker Services
if: matrix.os == 'ubuntu-latest'
From b034687ff0ba29195fd49001eec53bd2462e0361 Mon Sep 17 00:00:00 2001
From: Satvik Mishra <112589278+satvshr@users.noreply.github.com>
Date: Wed, 11 Feb 2026 20:55:05 +0530
Subject: [PATCH 42/67] merging 2 branches
---
.github/workflows/test.yml | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 8778dc33c..748798856 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -102,8 +102,8 @@ jobs:
if: matrix.os == 'ubuntu-latest'
run: |
git clone --depth 1 https://github.com/openml/services.git
- git fetch origin setup-test-locally:setup-test-locally
- git fetch origin add/python-rest-api:add/python-rest-api
+ git fetch origin setup-test-locally
+ git fetch origin add/python-rest-api
git merge setup-test-locally
git merge add/python-rest-api
From b8826f5f5fd18b89593dbbfe20bd3a9b8dec8134 Mon Sep 17 00:00:00 2001
From: Satvik Mishra <112589278+satvshr@users.noreply.github.com>
Date: Wed, 11 Feb 2026 21:01:12 +0530
Subject: [PATCH 43/67] merging 2 branches
---
.github/workflows/test.yml | 14 ++++++++++----
1 file changed, 10 insertions(+), 4 deletions(-)
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 748798856..7c0136d5b 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -102,11 +102,17 @@ jobs:
if: matrix.os == 'ubuntu-latest'
run: |
git clone --depth 1 https://github.com/openml/services.git
- git fetch origin setup-test-locally
- git fetch origin add/python-rest-api
+ git clone --depth 1 https://github.com/openml/services.git
+ cd services
- git merge setup-test-locally
- git merge add/python-rest-api
+ git config user.email "ci@openml.org"
+ git config user.name "CI"
+
+ git fetch origin pull/13/head:pr-13
+ git merge pr-13 --no-edit
+
+ git fetch origin pull/15/head:pr-15
+ git merge pr-15 --no-edit
- name: Start Docker Services
if: matrix.os == 'ubuntu-latest'
From 445cbe807a9859421f38d4d8642694d2a5bcce87 Mon Sep 17 00:00:00 2001
From: Satvik Mishra <112589278+satvshr@users.noreply.github.com>
Date: Wed, 11 Feb 2026 21:04:30 +0530
Subject: [PATCH 44/67] merging 2 branches
---
.github/workflows/test.yml | 1 -
1 file changed, 1 deletion(-)
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 7c0136d5b..43264c913 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -101,7 +101,6 @@ jobs:
- name: Clone Services
if: matrix.os == 'ubuntu-latest'
run: |
- git clone --depth 1 https://github.com/openml/services.git
git clone --depth 1 https://github.com/openml/services.git
cd services
From 295ef9339f4e09627be1e6c1a4fbbe4afc7f05b8 Mon Sep 17 00:00:00 2001
From: Satvik Mishra <112589278+satvshr@users.noreply.github.com>
Date: Wed, 11 Feb 2026 21:20:57 +0530
Subject: [PATCH 45/67] curl to verify server is running
---
.github/workflows/test.yml | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 43264c913..ad08a477a 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -121,6 +121,12 @@ jobs:
docker compose --profile rest-api --profile minio --profile evaluation-engine up -d
docker wait openml-test-database-setup
+ - name: Verify API is Reachable
+ if: matrix.os == 'ubuntu-latest'
+ run: |
+ timeout 20s bash -c 'until curl -sSf http://localhost:8000/api/v1/xml/data/1 > /dev/null; do sleep 3; done'
+ curl -I http://localhost:8000/api/v1/xml/data/1
+
- name: Show installed dependencies
run: python -m pip list
From 488f40934267cfea6d44e954568922f7cd4ba68a Mon Sep 17 00:00:00 2001
From: Satvik Mishra <112589278+satvshr@users.noreply.github.com>
Date: Wed, 11 Feb 2026 21:42:04 +0530
Subject: [PATCH 46/67] path fix
---
.github/workflows/test.yml | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index ad08a477a..b229cb6a9 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -133,7 +133,7 @@ jobs:
- name: Run tests on Ubuntu Test
if: matrix.os == 'ubuntu-latest'
env:
- TEST_SERVER_URL: "http://localhost:8000"
+ TEST_SERVER_URL: "http://localhost:8000/"
run: |
if [ "${{ matrix.code-cov }}" = "true" ]; then
codecov="--cov=openml --long --cov-report=xml"
@@ -150,7 +150,7 @@ jobs:
- name: Run tests on Ubuntu Production
if: matrix.os == 'ubuntu-latest'
env:
- TEST_SERVER_URL: "http://localhost:8000"
+ TEST_SERVER_URL: "http://localhost:8000/"
run: |
if [ "${{ matrix.code-cov }}" = "true" ]; then
codecov="--cov=openml --long --cov-report=xml"
From 45e72578d6c1cb4faee5aa940430bd4db82fc5f5 Mon Sep 17 00:00:00 2001
From: Satvik Mishra <112589278+satvshr@users.noreply.github.com>
Date: Wed, 11 Feb 2026 23:52:12 +0530
Subject: [PATCH 47/67] run all test server tests
---
.github/workflows/test.yml | 15 +++++++++------
tests/files/localhost:8080 | 1 -
2 files changed, 9 insertions(+), 7 deletions(-)
delete mode 120000 tests/files/localhost:8080
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index b229cb6a9..5b608d501 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -1,13 +1,16 @@
+---
name: Tests
on:
workflow_dispatch:
+
push:
branches:
- main
- develop
tags:
- "v*.*.*"
+
pull_request:
branches:
- main
@@ -125,7 +128,7 @@ jobs:
if: matrix.os == 'ubuntu-latest'
run: |
timeout 20s bash -c 'until curl -sSf http://localhost:8000/api/v1/xml/data/1 > /dev/null; do sleep 3; done'
- curl -I http://localhost:8000/api/v1/xml/data/1
+ curl -I http://localhost:8000/api/v1/task/1
- name: Show installed dependencies
run: python -m pip list
@@ -140,9 +143,9 @@ jobs:
fi
if [ "${{ matrix.sklearn-only }}" = "true" ]; then
- marks="sklearn and not production and not uses_test_server"
+ marks="sklearn and not production"
else
- marks="not production and not uses_test_server"
+ marks="not production"
fi
pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks"
@@ -157,9 +160,9 @@ jobs:
fi
if [ "${{ matrix.sklearn-only }}" = "true" ]; then
- marks="sklearn and production and not uses_test_server"
+ marks="sklearn and production"
else
- marks="production and not uses_test_server"
+ marks="production"
fi
pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks"
@@ -167,7 +170,7 @@ jobs:
- name: Run tests on Windows
if: matrix.os == 'windows-latest'
run: | # we need a separate step because of the bash-specific if-statement in the previous one.
- pytest -n 4 --durations=20 --dist load -sv --reruns 5 --reruns-delay 1 -m "not uses_test_server"
+ pytest -n 4 --durations=20 --dist load -sv --reruns 5 --reruns-delay 1
- name: Upload coverage
if: matrix.code-cov && always()
diff --git a/tests/files/localhost:8080 b/tests/files/localhost:8080
deleted file mode 120000
index 334c709ef..000000000
--- a/tests/files/localhost:8080
+++ /dev/null
@@ -1 +0,0 @@
-org/openml/test
\ No newline at end of file
From 7fcf039fb215c840faa4bc6d0607eb30d133cf67 Mon Sep 17 00:00:00 2001
From: Satvik Mishra <112589278+satvshr@users.noreply.github.com>
Date: Thu, 12 Feb 2026 00:29:40 +0530
Subject: [PATCH 48/67] fix 'Cleanup Docker setup'
---
.github/workflows/test.yml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 5b608d501..78db57bdc 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -182,7 +182,7 @@ jobs:
verbose: true
- name: Cleanup Docker setup
- if: matrix.os == 'ubuntu-latest'
+ if: matrix.os == 'ubuntu-latest' && always()
run: |
sudo rm -rf services
From 37cfb2eea805f42181f61c7a6246ba8f598cdca4 Mon Sep 17 00:00:00 2001
From: Satvik Mishra <112589278+satvshr@users.noreply.github.com>
Date: Thu, 12 Feb 2026 00:33:28 +0530
Subject: [PATCH 49/67] skipping windows given docker binaries do not match
---
.github/workflows/test.yml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 78db57bdc..fabad7757 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -170,7 +170,7 @@ jobs:
- name: Run tests on Windows
if: matrix.os == 'windows-latest'
run: | # we need a separate step because of the bash-specific if-statement in the previous one.
- pytest -n 4 --durations=20 --dist load -sv --reruns 5 --reruns-delay 1
+ pytest -n 4 --durations=20 --dist load -sv --reruns 5 --reruns-delay 1 -m "not uses_test_server"
- name: Upload coverage
if: matrix.code-cov && always()
From 9290010e8ad897c25cccf4e39330d9b1a1b339a0 Mon Sep 17 00:00:00 2001
From: Satvik Mishra <112589278+satvshr@users.noreply.github.com>
Date: Thu, 12 Feb 2026 14:47:26 +0530
Subject: [PATCH 50/67] testing out locally
---
.github/workflows/test.yml | 4 ----
openml/config.py | 2 +-
2 files changed, 1 insertion(+), 5 deletions(-)
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index fabad7757..219f01e70 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -135,8 +135,6 @@ jobs:
- name: Run tests on Ubuntu Test
if: matrix.os == 'ubuntu-latest'
- env:
- TEST_SERVER_URL: "http://localhost:8000/"
run: |
if [ "${{ matrix.code-cov }}" = "true" ]; then
codecov="--cov=openml --long --cov-report=xml"
@@ -152,8 +150,6 @@ jobs:
- name: Run tests on Ubuntu Production
if: matrix.os == 'ubuntu-latest'
- env:
- TEST_SERVER_URL: "http://localhost:8000/"
run: |
if [ "${{ matrix.code-cov }}" = "true" ]; then
codecov="--cov=openml --long --cov-report=xml"
diff --git a/openml/config.py b/openml/config.py
index 5b2d69067..3f46c7480 100644
--- a/openml/config.py
+++ b/openml/config.py
@@ -27,7 +27,7 @@
OPENML_SKIP_PARQUET_ENV_VAR = "OPENML_SKIP_PARQUET"
_TEST_SERVER_NORMAL_USER_KEY = "normaluser"
-TEST_SERVER_URL = "https://test.openml.org"
+TEST_SERVER_URL = "http://localhost:8000"
class _Config(TypedDict):
From bbfa193afaaa90ca77f7adddf77f9b4b58edbe2a Mon Sep 17 00:00:00 2001
From: Satvik Mishra <112589278+satvshr@users.noreply.github.com>
Date: Thu, 12 Feb 2026 17:07:02 +0530
Subject: [PATCH 51/67] replacing with 8080
---
openml/config.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/openml/config.py b/openml/config.py
index 3f46c7480..0e8d21618 100644
--- a/openml/config.py
+++ b/openml/config.py
@@ -27,7 +27,7 @@
OPENML_SKIP_PARQUET_ENV_VAR = "OPENML_SKIP_PARQUET"
_TEST_SERVER_NORMAL_USER_KEY = "normaluser"
-TEST_SERVER_URL = "http://localhost:8000"
+TEST_SERVER_URL = "http://localhost:8080"
class _Config(TypedDict):
From 4531cbc4afb14c5a9e01e1c2c062c17756b18da0 Mon Sep 17 00:00:00 2001
From: Satvik Mishra <112589278+satvshr@users.noreply.github.com>
Date: Thu, 12 Feb 2026 21:34:44 +0530
Subject: [PATCH 52/67] test
---
.github/workflows/test.yml | 15 ++++++++++++++-
openml/config.py | 2 +-
2 files changed, 15 insertions(+), 2 deletions(-)
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 219f01e70..328045554 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -121,8 +121,21 @@ jobs:
working-directory: ./services
run: |
sudo systemctl stop mysql.service
- docker compose --profile rest-api --profile minio --profile evaluation-engine up -d
+ docker compose --profile rest-api --profile minio up -d
+
+ echo "1. Waiting for Database population..."
docker wait openml-test-database-setup
+
+ echo "2. Waiting for Elasticsearch (this is the slow part)..."
+ # Wait up to 5 minutes for ES to go green
+ timeout 300s bash -c 'until [ "$(docker inspect -f {{.State.Health.Status}} openml-elasticsearch)" == "healthy" ]; do sleep 5; done'
+
+ echo "3. Waiting for PHP API..."
+ # Wait up to 5 minutes for PHP to accept connections
+ timeout 300s bash -c 'until [ "$(docker inspect -f {{.State.Health.Status}} openml-php-rest-api)" == "healthy" ]; do sleep 5; done'
+
+ echo "4. Docker Stack is Healthy!"
+ docker ps
- name: Verify API is Reachable
if: matrix.os == 'ubuntu-latest'
diff --git a/openml/config.py b/openml/config.py
index 0e8d21618..3f46c7480 100644
--- a/openml/config.py
+++ b/openml/config.py
@@ -27,7 +27,7 @@
OPENML_SKIP_PARQUET_ENV_VAR = "OPENML_SKIP_PARQUET"
_TEST_SERVER_NORMAL_USER_KEY = "normaluser"
-TEST_SERVER_URL = "http://localhost:8080"
+TEST_SERVER_URL = "http://localhost:8000"
class _Config(TypedDict):
From d90615a30c53ada2b1b84caaea975f87ac21634c Mon Sep 17 00:00:00 2001
From: Satvik Mishra <112589278+satvshr@users.noreply.github.com>
Date: Thu, 12 Feb 2026 21:46:48 +0530
Subject: [PATCH 53/67] test
---
.github/workflows/test.yml | 34 ++++++++++++++++++++++++++++++++--
1 file changed, 32 insertions(+), 2 deletions(-)
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 328045554..7f832d982 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -140,8 +140,38 @@ jobs:
- name: Verify API is Reachable
if: matrix.os == 'ubuntu-latest'
run: |
- timeout 20s bash -c 'until curl -sSf http://localhost:8000/api/v1/xml/data/1 > /dev/null; do sleep 3; done'
- curl -I http://localhost:8000/api/v1/task/1
+ echo "Waiting for API to be ready (Handling 412 Sync Errors)..."
+
+ # Helper function to check status
+ check_api() {
+ # Fetch HTTP code
+ code=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8000/api/v1/xml/data/1)
+ if [ "$code" == "200" ]; then
+ return 0
+ else
+ return 1
+ fi
+ }
+
+ # Loop for up to 60 seconds
+ count=0
+ while [ $count -lt 12 ]; do
+ if check_api; then
+ echo "API is Ready (200 OK)!"
+ exit 0
+ fi
+ echo "API responded with status $(curl -s -o /dev/null -w "%{http_code}" http://localhost:8000/api/v1/xml/data/1). Retrying in 5s..."
+ sleep 5
+ count=$((count+1))
+ done
+
+ echo "API failed to initialize. Printing last response body for debugging:"
+ curl -v http://localhost:8000/api/v1/xml/data/1
+
+ # Also print PHP logs to see the specific OpenML Exception
+ echo "=== PHP API LOGS ==="
+ docker logs openml-php-rest-api
+ exit 1
- name: Show installed dependencies
run: python -m pip list
From 9b12d6fb1376eea87d7e27e890b39ed1c116483c Mon Sep 17 00:00:00 2001
From: Satvik Mishra <112589278+satvshr@users.noreply.github.com>
Date: Thu, 12 Feb 2026 23:59:08 +0530
Subject: [PATCH 54/67] test
---
.github/workflows/test.yml | 43 ++++++++++----------------------------
1 file changed, 11 insertions(+), 32 deletions(-)
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 7f832d982..deb4620f2 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -137,41 +137,20 @@ jobs:
echo "4. Docker Stack is Healthy!"
docker ps
- - name: Verify API is Reachable
+ - name: Verify API and Splits
if: matrix.os == 'ubuntu-latest'
run: |
- echo "Waiting for API to be ready (Handling 412 Sync Errors)..."
+ echo "Checking Data API..."
+ timeout 60s bash -c 'until [ "$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8000/api/v1/xml/data/1)" == "200" ]; do sleep 5; done'
+
+ echo "Checking Task Splits (The 412 Killer)..."
+ # If this fails, the evaluation engine is broken
+ timeout 120s bash -c 'until [ "$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8000/api_splits/get/1/Task_1_splits.arff)" == "200" ]; do
+ echo "Splits not ready yet. Waiting..."
+ sleep 5
+ done'
- # Helper function to check status
- check_api() {
- # Fetch HTTP code
- code=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8000/api/v1/xml/data/1)
- if [ "$code" == "200" ]; then
- return 0
- else
- return 1
- fi
- }
-
- # Loop for up to 60 seconds
- count=0
- while [ $count -lt 12 ]; do
- if check_api; then
- echo "API is Ready (200 OK)!"
- exit 0
- fi
- echo "API responded with status $(curl -s -o /dev/null -w "%{http_code}" http://localhost:8000/api/v1/xml/data/1). Retrying in 5s..."
- sleep 5
- count=$((count+1))
- done
-
- echo "API failed to initialize. Printing last response body for debugging:"
- curl -v http://localhost:8000/api/v1/xml/data/1
-
- # Also print PHP logs to see the specific OpenML Exception
- echo "=== PHP API LOGS ==="
- docker logs openml-php-rest-api
- exit 1
+ echo "System is fully operational."
- name: Show installed dependencies
run: python -m pip list
From 45d34234015dd999f1de178b69f1fde55549c9ba Mon Sep 17 00:00:00 2001
From: Satvik Mishra <112589278+satvshr@users.noreply.github.com>
Date: Thu, 12 Feb 2026 23:59:34 +0530
Subject: [PATCH 55/67] test
---
.github/workflows/test.yml | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index deb4620f2..8207a9b78 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -168,7 +168,7 @@ jobs:
marks="not production"
fi
- pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks"
+ pytest --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks"
- name: Run tests on Ubuntu Production
if: matrix.os == 'ubuntu-latest'
@@ -183,7 +183,7 @@ jobs:
marks="production"
fi
- pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks"
+ pytest --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks"
- name: Run tests on Windows
if: matrix.os == 'windows-latest'
From 16f22b12d1e6a46802b6140c3a0bfbdfd67a8c71 Mon Sep 17 00:00:00 2001
From: Satvik Mishra <112589278+satvshr@users.noreply.github.com>
Date: Fri, 13 Feb 2026 00:18:58 +0530
Subject: [PATCH 56/67] test
---
.github/workflows/test.yml | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 8207a9b78..4a8a983c4 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -121,7 +121,7 @@ jobs:
working-directory: ./services
run: |
sudo systemctl stop mysql.service
- docker compose --profile rest-api --profile minio up -d
+ docker compose --profile rest-api --profile minio --profile evaluation-engine up -d
echo "1. Waiting for Database population..."
docker wait openml-test-database-setup
@@ -145,7 +145,7 @@ jobs:
echo "Checking Task Splits (The 412 Killer)..."
# If this fails, the evaluation engine is broken
- timeout 120s bash -c 'until [ "$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8000/api_splits/get/1/Task_1_splits.arff)" == "200" ]; do
+ timeout 180s bash -c 'until [ "$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8000/api_splits/get/1/Task_1_splits.arff)" == "200" ]; do
echo "Splits not ready yet. Waiting..."
sleep 5
done'
@@ -168,7 +168,7 @@ jobs:
marks="not production"
fi
- pytest --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks"
+ pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks"
- name: Run tests on Ubuntu Production
if: matrix.os == 'ubuntu-latest'
@@ -183,7 +183,7 @@ jobs:
marks="production"
fi
- pytest --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks"
+ pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks"
- name: Run tests on Windows
if: matrix.os == 'windows-latest'
From dd2ce686e1e6d8cdb9d07a705d034e89ca010e93 Mon Sep 17 00:00:00 2001
From: Satvik Mishra <112589278+satvshr@users.noreply.github.com>
Date: Fri, 13 Feb 2026 00:52:09 +0530
Subject: [PATCH 57/67] test
---
.github/workflows/test.yml | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 4a8a983c4..d80fb14db 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -121,7 +121,7 @@ jobs:
working-directory: ./services
run: |
sudo systemctl stop mysql.service
- docker compose --profile rest-api --profile minio --profile evaluation-engine up -d
+ docker compose --profile rest-api --profile minio up -d --build
echo "1. Waiting for Database population..."
docker wait openml-test-database-setup
@@ -145,7 +145,7 @@ jobs:
echo "Checking Task Splits (The 412 Killer)..."
# If this fails, the evaluation engine is broken
- timeout 180s bash -c 'until [ "$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8000/api_splits/get/1/Task_1_splits.arff)" == "200" ]; do
+ timeout 120s bash -c 'until [ "$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8000/api_splits/get/1/Task_1_splits.arff)" == "200" ]; do
echo "Splits not ready yet. Waiting..."
sleep 5
done'
From ebecceaf8a6c9f7bff7cb63024eaea3581250328 Mon Sep 17 00:00:00 2001
From: Satvik Mishra <112589278+satvshr@users.noreply.github.com>
Date: Fri, 13 Feb 2026 00:59:36 +0530
Subject: [PATCH 58/67] test
---
.github/workflows/test.yml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index d80fb14db..33f96a592 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -121,7 +121,7 @@ jobs:
working-directory: ./services
run: |
sudo systemctl stop mysql.service
- docker compose --profile rest-api --profile minio up -d --build
+ docker compose --profile rest-api --profile minio --profile evaluation-engine up -d --build
echo "1. Waiting for Database population..."
docker wait openml-test-database-setup
From a0ac6b99126ff48b84cce26d3b476c13b68b8ffe Mon Sep 17 00:00:00 2001
From: Satvik Mishra <112589278+satvshr@users.noreply.github.com>
Date: Fri, 13 Feb 2026 01:06:49 +0530
Subject: [PATCH 59/67] test
---
.github/workflows/test.yml | 34 ++++++++++++++++++++++++++++++++++
1 file changed, 34 insertions(+)
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 33f96a592..da689aecf 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -136,7 +136,41 @@ jobs:
echo "4. Docker Stack is Healthy!"
docker ps
+ - name: Error
+ working-directory: ./services
+ run: |
+ echo "---------------------------------------------------"
+ echo "1. PROBING: Can we reach the API at all?"
+ timeout 60s bash -c 'until curl -sSf http://localhost:8000/api/v1/xml/data/1 > /dev/null; do sleep 5; done' || echo "WARNING: Main API is slow/down"
+
+ echo "---------------------------------------------------"
+ echo "2. PROBING: Waiting for Task 119 Splits (The Failure Point)..."
+ # We wait 60s. If it works, great. If not, we want the logs.
+ timeout 60s bash -c 'until curl -sSf http://localhost:8000/api_splits/get/119/Task_119_splits.arff > /dev/null; do
+ echo " ... file not ready yet"
+ sleep 5
+ done' || echo "FAILURE: Task 119 splits were NOT generated."
+
+ echo "---------------------------------------------------"
+ echo "3.DUMPING EVALUATION ENGINE LOGS (STDOUT)"
+ docker logs openml-evaluation-engine
+
+ echo "---------------------------------------------------"
+ echo "4.DUMPING INTERNAL CRON LOGS (The Hidden Logs)"
+ # The engine runs via cron, so the real errors are often in this file, NOT in docker logs
+ docker exec openml-evaluation-engine cat /cron.log || echo "Could not read /cron.log"
+
+ echo "---------------------------------------------------"
+ echo "5.DUMPING PHP API LOGS (Why did it throw 412?)"
+ docker logs openml-php-rest-api | grep "412" -B 5 -A 5 || echo "No 412 errors found in logs?"
+
+ echo "---------------------------------------------------"
+ echo "6.CHECKING NETWORK (Can the container see Nginx?)"
+ # This checks if the container can actually resolve 'localhost' to the host machine
+ docker exec openml-evaluation-engine curl -v http://localhost:8000/api/v1/xml/data/1 || echo "Container cannot connect to localhost:8000"
+ # Force fail so you see the red X and check logs
+ exit 1
- name: Verify API and Splits
if: matrix.os == 'ubuntu-latest'
run: |
From 439e683d9ba19f7820d660e879fe6a5b2c0d89db Mon Sep 17 00:00:00 2001
From: Satvik Mishra <112589278+satvshr@users.noreply.github.com>
Date: Fri, 13 Feb 2026 01:14:50 +0530
Subject: [PATCH 60/67] test
---
.github/workflows/test.yml | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index da689aecf..2e592a8a7 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -115,6 +115,10 @@ jobs:
git fetch origin pull/15/head:pr-15
git merge pr-15 --no-edit
+ sed -i 's/localhost:8000/172.28.0.2:8000/g' config/database/update.sh
+
+ # Verify the change
+ grep "172.28.0.2" config/database/update.sh || echo "Patch failed!"
- name: Start Docker Services
if: matrix.os == 'ubuntu-latest'
@@ -136,6 +140,7 @@ jobs:
echo "4. Docker Stack is Healthy!"
docker ps
+
- name: Error
working-directory: ./services
run: |
From f87051bdec7513698e0a7c114027b8c06c718a53 Mon Sep 17 00:00:00 2001
From: Satvik Mishra <112589278+satvshr@users.noreply.github.com>
Date: Fri, 13 Feb 2026 01:22:59 +0530
Subject: [PATCH 61/67] test
---
.github/workflows/test.yml | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 2e592a8a7..d2926a790 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -117,8 +117,13 @@ jobs:
git merge pr-15 --no-edit
sed -i 's/localhost:8000/172.28.0.2:8000/g' config/database/update.sh
- # Verify the change
- grep "172.28.0.2" config/database/update.sh || echo "Patch failed!"
+ # === PATCH 2: Fix MinIO Path Mismatch ===
+ # The PR uses '/minio/' but Nginx usually expects '/data/' for MinIO
+ # We replace '/minio/' with '/data/' in the URL rewrite script
+ sed -i 's|/minio/|/data/|g' config/database/update.sh
+
+ echo "=== Patched Update Script ==="
+ cat config/database/update.sh | grep "172.28.0.2"
- name: Start Docker Services
if: matrix.os == 'ubuntu-latest'
From 4077a5628aff3192abbe0181e4a8ad010e2100d0 Mon Sep 17 00:00:00 2001
From: Satvik Mishra <112589278+satvshr@users.noreply.github.com>
Date: Fri, 13 Feb 2026 01:31:04 +0530
Subject: [PATCH 62/67] test
---
.github/workflows/test.yml | 29 ++++++++++++++++-------------
1 file changed, 16 insertions(+), 13 deletions(-)
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index d2926a790..9cda74f35 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -101,29 +101,32 @@ jobs:
echo "BEFORE=$git_status" >> $GITHUB_ENV
echo "Repository status before tests: $git_status"
- - name: Clone Services
+ - name: Configure Host Network (The "Magic" Step)
+ run: |
+ # Map 'nginx' to localhost so the Host machine can resolve the URLs in the database
+ echo "127.0.0.1 nginx" | sudo tee -a /etc/hosts
+ ping -c 1 nginx
+
+ - name: Clone Services & Apply Universal Patch
if: matrix.os == 'ubuntu-latest'
run: |
git clone --depth 1 https://github.com/openml/services.git
cd services
-
git config user.email "ci@openml.org"
git config user.name "CI"
-
- git fetch origin pull/13/head:pr-13
- git merge pr-13 --no-edit
-
- git fetch origin pull/15/head:pr-15
- git merge pr-15 --no-edit
- sed -i 's/localhost:8000/172.28.0.2:8000/g' config/database/update.sh
+ git fetch origin pull/13/head:pr-13 && git merge pr-13 --no-edit
+ git fetch origin pull/15/head:pr-15 && git merge pr-15 --no-edit
+
+ # === PATCH 1: Use 'nginx' hostname ===
+ # This works inside Docker (DNS) and on Host (via /etc/hosts hack above)
+ sed -i 's/localhost:8000/nginx:8000/g' config/database/update.sh
- # === PATCH 2: Fix MinIO Path Mismatch ===
- # The PR uses '/minio/' but Nginx usually expects '/data/' for MinIO
- # We replace '/minio/' with '/data/' in the URL rewrite script
+ # === PATCH 2: Fix Path Mismatch ===
+ # Ensure we use /data/ which Nginx recognizes
sed -i 's|/minio/|/data/|g' config/database/update.sh
echo "=== Patched Update Script ==="
- cat config/database/update.sh | grep "172.28.0.2"
+ cat config/database/update.sh | grep "nginx"
- name: Start Docker Services
if: matrix.os == 'ubuntu-latest'
From fad1ee7dbe052f824706dafbcdc974ff49d6cd5e Mon Sep 17 00:00:00 2001
From: Satvik Mishra <112589278+satvshr@users.noreply.github.com>
Date: Fri, 13 Feb 2026 02:07:20 +0530
Subject: [PATCH 63/67] test
---
.github/workflows/test.yml | 30 +++++++++++++++++++++++++++---
1 file changed, 27 insertions(+), 3 deletions(-)
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 9cda74f35..4c4fac0e7 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -105,7 +105,6 @@ jobs:
run: |
# Map 'nginx' to localhost so the Host machine can resolve the URLs in the database
echo "127.0.0.1 nginx" | sudo tee -a /etc/hosts
- ping -c 1 nginx
- name: Clone Services & Apply Universal Patch
if: matrix.os == 'ubuntu-latest'
@@ -145,9 +144,34 @@ jobs:
echo "3. Waiting for PHP API..."
# Wait up to 5 minutes for PHP to accept connections
timeout 300s bash -c 'until [ "$(docker inspect -f {{.State.Health.Status}} openml-php-rest-api)" == "healthy" ]; do sleep 5; done'
+
+ - name: Finalize Setup & Verify Splits
+ if: matrix.os == 'ubuntu-latest'
+ run: |
+ echo "1. Forcing Elasticsearch Indexing Sync..."
+ # This helps clear the 412 errors the Engine is hitting
+ curl -s http://nginx:8000/api/v1/xml/admin/index/sync || echo "Sync endpoint not found, skipping..."
+
+ echo "2. Waiting for Evaluation Engine to process Task 119..."
+ echo "Targeting Task 119 (The primary failure point)."
+
+ # Give it 5 minutes (300s). Java + indexing + splits generation is heavy.
+ count=0
+ while [ $count -lt 30 ]; do
+ code=$(curl -s -o /dev/null -w "%{http_code}" http://nginx:8000/api_splits/get/119/Task_119_splits.arff)
+ if [ "$code" == "200" ]; then
+ echo "ā
SUCCESS: Task 119 splits are ready!"
+ exit 0
+ fi
+ echo " ... waiting for split generation (Current Status: $code)"
+ sleep 10
+ count=$((count+1))
+ done
- echo "4. Docker Stack is Healthy!"
- docker ps
+ echo "ā ERROR: Evaluation Engine timed out."
+ # Dump logs only if this step fails
+ docker exec openml-evaluation-engine cat /logs/evaluation.log || echo "Could not read log file"
+ exit 1
- name: Error
working-directory: ./services
From 4086730d5c206c416cea8ff2ec6cd9cf4850a481 Mon Sep 17 00:00:00 2001
From: Satvik Mishra <112589278+satvshr@users.noreply.github.com>
Date: Fri, 13 Feb 2026 02:18:12 +0530
Subject: [PATCH 64/67] test
---
.github/workflows/test.yml | 21 +++++++++++++++------
1 file changed, 15 insertions(+), 6 deletions(-)
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 4c4fac0e7..991caf076 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -148,14 +148,14 @@ jobs:
- name: Finalize Setup & Verify Splits
if: matrix.os == 'ubuntu-latest'
run: |
- echo "1. Forcing Elasticsearch Indexing Sync..."
- # This helps clear the 412 errors the Engine is hitting
- curl -s http://nginx:8000/api/v1/xml/admin/index/sync || echo "Sync endpoint not found, skipping..."
+ echo "1. Forcing Elasticsearch Indexing Sync (With Auth)..."
+ # We append the default test API Key (AD0...0) to authorized the admin action
+ curl -s "http://nginx:8000/api/v1/xml/admin/index/sync?api_key=AD000000000000000000000000000000" || echo "Sync request failed"
echo "2. Waiting for Evaluation Engine to process Task 119..."
echo "Targeting Task 119 (The primary failure point)."
- # Give it 5 minutes (300s). Java + indexing + splits generation is heavy.
+ # We give it 5 minutes (300s) to handle the queue.
count=0
while [ $count -lt 30 ]; do
code=$(curl -s -o /dev/null -w "%{http_code}" http://nginx:8000/api_splits/get/119/Task_119_splits.arff)
@@ -169,8 +169,17 @@ jobs:
done
echo "ā ERROR: Evaluation Engine timed out."
- # Dump logs only if this step fails
- docker exec openml-evaluation-engine cat /logs/evaluation.log || echo "Could not read log file"
+
+ echo "=== DEBUG: LISTING LOG DIR ==="
+ docker exec openml-evaluation-engine ls -R /logs/ || echo "Dir empty"
+
+ echo "=== DEBUG: DUMPING ALL LOGS ==="
+ # Use wildcard to catch whatever the filename actually is
+ docker exec openml-evaluation-engine sh -c "cat /logs/*.log" || echo "Could not read logs"
+
+ # Also check the cron log again, just in case
+ docker exec openml-evaluation-engine cat /cron.log || echo "Cron log empty"
+
exit 1
- name: Error
From fecebbccd71bebe8a9d4e7538a5c27d67237ee91 Mon Sep 17 00:00:00 2001
From: Satvik Mishra <112589278+satvshr@users.noreply.github.com>
Date: Fri, 13 Feb 2026 02:37:54 +0530
Subject: [PATCH 65/67] windows fix?
---
openml/config.py | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/openml/config.py b/openml/config.py
index 3f46c7480..233fbcf24 100644
--- a/openml/config.py
+++ b/openml/config.py
@@ -9,6 +9,7 @@
import os
import platform
import shutil
+import sys
import warnings
from collections.abc import Iterator
from contextlib import contextmanager
@@ -27,7 +28,10 @@
OPENML_SKIP_PARQUET_ENV_VAR = "OPENML_SKIP_PARQUET"
_TEST_SERVER_NORMAL_USER_KEY = "normaluser"
-TEST_SERVER_URL = "http://localhost:8000"
+if sys.platform.startswith("win"):
+ TEST_SERVER_URL = "http://localhost"
+else:
+ TEST_SERVER_URL = "http://localhost:8000"
class _Config(TypedDict):
From 4845a1ed259a48caf9291a2d8eeafa33048ec5e4 Mon Sep 17 00:00:00 2001
From: Satvik Mishra <112589278+satvshr@users.noreply.github.com>
Date: Fri, 13 Feb 2026 02:42:15 +0530
Subject: [PATCH 66/67] windows fix?
---
.github/workflows/test.yml | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 991caf076..bb666cbdc 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -101,7 +101,8 @@ jobs:
echo "BEFORE=$git_status" >> $GITHUB_ENV
echo "Repository status before tests: $git_status"
- - name: Configure Host Network (The "Magic" Step)
+ - name: Configure Host Network
+ if: matrix.os == 'ubuntu-latest'
run: |
# Map 'nginx' to localhost so the Host machine can resolve the URLs in the database
echo "127.0.0.1 nginx" | sudo tee -a /etc/hosts
From a2470507a570ef4582017dadf392f72f172e6200 Mon Sep 17 00:00:00 2001
From: Satvik Mishra <112589278+satvshr@users.noreply.github.com>
Date: Fri, 13 Feb 2026 02:46:28 +0530
Subject: [PATCH 67/67] windows fix?
---
.github/workflows/test.yml | 50 --------------------------------------
1 file changed, 50 deletions(-)
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index bb666cbdc..f8319300d 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -183,56 +183,6 @@ jobs:
exit 1
- - name: Error
- working-directory: ./services
- run: |
- echo "---------------------------------------------------"
- echo "1. PROBING: Can we reach the API at all?"
- timeout 60s bash -c 'until curl -sSf http://localhost:8000/api/v1/xml/data/1 > /dev/null; do sleep 5; done' || echo "WARNING: Main API is slow/down"
-
- echo "---------------------------------------------------"
- echo "2. PROBING: Waiting for Task 119 Splits (The Failure Point)..."
- # We wait 60s. If it works, great. If not, we want the logs.
- timeout 60s bash -c 'until curl -sSf http://localhost:8000/api_splits/get/119/Task_119_splits.arff > /dev/null; do
- echo " ... file not ready yet"
- sleep 5
- done' || echo "FAILURE: Task 119 splits were NOT generated."
-
- echo "---------------------------------------------------"
- echo "3.DUMPING EVALUATION ENGINE LOGS (STDOUT)"
- docker logs openml-evaluation-engine
-
- echo "---------------------------------------------------"
- echo "4.DUMPING INTERNAL CRON LOGS (The Hidden Logs)"
- # The engine runs via cron, so the real errors are often in this file, NOT in docker logs
- docker exec openml-evaluation-engine cat /cron.log || echo "Could not read /cron.log"
-
- echo "---------------------------------------------------"
- echo "5.DUMPING PHP API LOGS (Why did it throw 412?)"
- docker logs openml-php-rest-api | grep "412" -B 5 -A 5 || echo "No 412 errors found in logs?"
-
- echo "---------------------------------------------------"
- echo "6.CHECKING NETWORK (Can the container see Nginx?)"
- # This checks if the container can actually resolve 'localhost' to the host machine
- docker exec openml-evaluation-engine curl -v http://localhost:8000/api/v1/xml/data/1 || echo "Container cannot connect to localhost:8000"
-
- # Force fail so you see the red X and check logs
- exit 1
- - name: Verify API and Splits
- if: matrix.os == 'ubuntu-latest'
- run: |
- echo "Checking Data API..."
- timeout 60s bash -c 'until [ "$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8000/api/v1/xml/data/1)" == "200" ]; do sleep 5; done'
-
- echo "Checking Task Splits (The 412 Killer)..."
- # If this fails, the evaluation engine is broken
- timeout 120s bash -c 'until [ "$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8000/api_splits/get/1/Task_1_splits.arff)" == "200" ]; do
- echo "Splits not ready yet. Waiting..."
- sleep 5
- done'
-
- echo "System is fully operational."
-
- name: Show installed dependencies
run: python -m pip list