From 0380d0f774a7e14e2884711669922c446f5a9534 Mon Sep 17 00:00:00 2001
From: AniruthKarthik <aniruthkarthik10@gmail.com>
Date: Thu, 5 Feb 2026 12:46:35 +0530
Subject: [PATCH 1/4] Register uses_test_server pytest marker

---
 pytest.ini | 4 ++++
 1 file changed, 4 insertions(+)
 create mode 100644 pytest.ini

diff --git a/pytest.ini b/pytest.ini
new file mode 100644
index 000000000..05d163208
--- /dev/null
+++ b/pytest.ini
@@ -0,0 +1,4 @@
+[pytest]
+markers =
+    uses_test_server: tests that require the OpenML test server
+

From d1b765f484569b5fe0376ba7a941ef8939fe3157 Mon Sep 17 00:00:00 2001
From: AniruthKarthik <aniruthkarthik10@gmail.com>
Date: Fri, 6 Feb 2026 22:46:02 +0530
Subject: [PATCH 2/4] Register uses_test_server pytest marker in pyproject.toml

---
 pytest.ini | 4 ----
 1 file changed, 4 deletions(-)
 delete mode 100644 pytest.ini

diff --git a/pytest.ini b/pytest.ini
deleted file mode 100644
index 05d163208..000000000
--- a/pytest.ini
+++ /dev/null
@@ -1,4 +0,0 @@
-[pytest]
-markers =
-    uses_test_server: tests that require the OpenML test server
-

From 18cc404de31eb39079621fe505e1a1b150d4e044 Mon Sep 17 00:00:00 2001
From: AniruthKarthik <aniruthkarthik10@gmail.com>
Date: Mon, 16 Feb 2026 21:07:52 +0530
Subject: [PATCH 3/4] Register uses_test_server pytest marker in pyproject.toml

---
 pyproject.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pyproject.toml b/pyproject.toml
index 93a6ffbfa..9e1aa20e5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -137,6 +137,7 @@ markers = [
   "upload: anything that uploads to a server",
   "production: any interaction with the production server",
   "cache: anything that interacts with the (test) cache",
+  "uses_test_server: tests that require the OpenML test server",
 ]
 
 # https://github.com/charliermarsh/ruff

From 1ff94fe1af1cf220937270581c6c0a52e574807f Mon Sep 17 00:00:00 2001
From: AniruthKarthik <aniruthkarthik10@gmail.com>
Date: Mon, 16 Feb 2026 23:10:28 +0530
Subject: [PATCH 4/4] Rename uses_test_server to test_server and remove unused
 server marker

---
 pyproject.toml                                |   3 +-
 tests/test_datasets/test_dataset.py           |  16 +--
 tests/test_datasets/test_dataset_functions.py | 116 +++++++++---------
 .../test_evaluation_functions.py              |   4 +-
 tests/test_flows/test_flow.py                 |  16 +--
 tests/test_flows/test_flow_functions.py       |  10 +-
 tests/test_openml/test_api_calls.py           |   6 +-
 tests/test_runs/test_run.py                   |  12 +-
 tests/test_runs/test_run_functions.py         |  70 +++++------
 tests/test_setups/test_setup_functions.py     |  14 +--
 tests/test_study/test_study_functions.py      |  10 +-
 tests/test_tasks/test_classification_task.py  |   6 +-
 tests/test_tasks/test_clustering_task.py      |   4 +-
 tests/test_tasks/test_learning_curve_task.py  |   6 +-
 tests/test_tasks/test_regression_task.py      |   4 +-
 tests/test_tasks/test_supervised_task.py      |   2 +-
 tests/test_tasks/test_task.py                 |   4 +-
 tests/test_tasks/test_task_functions.py       |  32 ++---
 tests/test_tasks/test_task_methods.py         |   4 +-
 tests/test_utils/test_utils.py                |  20 +--
 20 files changed, 179 insertions(+), 180 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 9e1aa20e5..880cc4f11 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -133,11 +133,10 @@ filterwarnings=[
     "ignore:the matrix subclass:PendingDeprecationWarning"
 ]
 markers = [
-  "server: anything that connects to a server",
   "upload: anything that uploads to a server",
   "production: any interaction with the production server",
   "cache: anything that interacts with the (test) cache",
-  "uses_test_server: tests that require the OpenML test server",
+  "test_server: tests that require the OpenML test server",
 ]
 
 # https://github.com/charliermarsh/ruff
diff --git a/tests/test_datasets/test_dataset.py b/tests/test_datasets/test_dataset.py
index b13bac30b..bc2270fd6 100644
--- a/tests/test_datasets/test_dataset.py
+++ b/tests/test_datasets/test_dataset.py
@@ -281,7 +281,7 @@ def test_equality_comparison(self):
         self.assertNotEqual(self.titanic, "Wrong_object")
 
 
-@pytest.mark.uses_test_server()
+@pytest.mark.test_server()
 def test_tagging():
     dataset = openml.datasets.get_dataset(125, download_data=False)
 
@@ -298,7 +298,7 @@ def test_tagging():
     datasets = openml.datasets.list_datasets(tag=tag)
     assert datasets.empty
 
-@pytest.mark.uses_test_server()
+@pytest.mark.test_server()
 def test_get_feature_with_ontology_data_id_11():
     # test on car dataset, which has built-in ontology references
     dataset = openml.datasets.get_dataset(11)
@@ -307,7 +307,7 @@ def test_get_feature_with_ontology_data_id_11():
     assert len(dataset.features[2].ontologies) >= 1
     assert len(dataset.features[3].ontologies) >= 1   
 
-@pytest.mark.uses_test_server()
+@pytest.mark.test_server()
 def test_add_remove_ontology_to_dataset():
     did = 1
     feature_index = 1
@@ -315,7 +315,7 @@ def test_add_remove_ontology_to_dataset():
     openml.datasets.functions.data_feature_add_ontology(did, feature_index, ontology)
     openml.datasets.functions.data_feature_remove_ontology(did, feature_index, ontology)    
 
-@pytest.mark.uses_test_server()
+@pytest.mark.test_server()
 def test_add_same_ontology_multiple_features():
     did = 1
     ontology = "https://www.openml.org/unittest/" + str(time())
@@ -324,7 +324,7 @@ def test_add_same_ontology_multiple_features():
         openml.datasets.functions.data_feature_add_ontology(did, i, ontology)    
 
 
-@pytest.mark.uses_test_server()
+@pytest.mark.test_server()
 def test_add_illegal_long_ontology():
     did = 1
     ontology = "http://www.google.com/" + ("a" * 257)
@@ -336,7 +336,7 @@ def test_add_illegal_long_ontology():
     
 
 
-@pytest.mark.uses_test_server()
+@pytest.mark.test_server()
 def test_add_illegal_url_ontology():
     did = 1
     ontology = "not_a_url" + str(time())
@@ -408,7 +408,7 @@ def test_get_sparse_categorical_data_id_395(self):
         assert len(feature.nominal_values) == 25
 
 
-@pytest.mark.uses_test_server()
+@pytest.mark.test_server()
 def test__read_features(mocker, workdir, static_cache_dir):
     """Test we read the features from the xml if no cache pickle is available.
     This test also does some simple checks to verify that the features are read correctly
@@ -440,7 +440,7 @@ def test__read_features(mocker, workdir, static_cache_dir):
     assert pickle_mock.dump.call_count == 1
 
 
-@pytest.mark.uses_test_server()
+@pytest.mark.test_server()
 def test__read_qualities(static_cache_dir, workdir, mocker):
     """Test we read the qualities from the xml if no cache pickle is available.
     This test also does some minor checks to ensure that the qualities are read correctly.
diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py
index c41664ba7..f67379a2d 100644
--- a/tests/test_datasets/test_dataset_functions.py
+++ b/tests/test_datasets/test_dataset_functions.py
@@ -107,7 +107,7 @@ def _check_datasets(self, datasets):
         for did in datasets:
             self._check_dataset(datasets[did])
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_tag_untag_dataset(self):
         tag = "test_tag_%d" % random.randint(1, 1000000)
         all_tags = _tag_entity("data", 1, tag)
@@ -115,12 +115,12 @@ def test_tag_untag_dataset(self):
         all_tags = _tag_entity("data", 1, tag, untag=True)
         assert tag not in all_tags
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_list_datasets_length(self):
         datasets = openml.datasets.list_datasets()
         assert len(datasets) >= 100
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_list_datasets_paginate(self):
         size = 10
         max = 100
@@ -135,7 +135,7 @@ def test_list_datasets_paginate(self):
                 categories=["in_preparation", "active", "deactivated"],
             )
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_list_datasets_empty(self):
         datasets = openml.datasets.list_datasets(tag="NoOneWouldUseThisTagAnyway")
         assert datasets.empty
@@ -159,7 +159,7 @@ def test_check_datasets_active(self):
         )
         openml.config.server = self.test_server
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_illegal_character_tag(self):
         dataset = openml.datasets.get_dataset(1)
         tag = "illegal_tag&"
@@ -169,7 +169,7 @@ def test_illegal_character_tag(self):
         except openml.exceptions.OpenMLServerException as e:
             assert e.code == 477
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_illegal_length_tag(self):
         dataset = openml.datasets.get_dataset(1)
         tag = "a" * 65
@@ -211,7 +211,7 @@ def test__name_to_id_with_multiple_active_error(self):
             error_if_multiple=True,
         )
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test__name_to_id_name_does_not_exist(self):
         """With multiple active datasets, retrieve the least recent active."""
         self.assertRaisesRegex(
@@ -221,7 +221,7 @@ def test__name_to_id_name_does_not_exist(self):
             dataset_name="does_not_exist",
         )
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test__name_to_id_version_does_not_exist(self):
         """With multiple active datasets, retrieve the least recent active."""
         self.assertRaisesRegex(
@@ -232,7 +232,7 @@ def test__name_to_id_version_does_not_exist(self):
             version=100000,
         )
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_get_datasets_by_name(self):
         # did 1 and 2 on the test server:
         dids = ["anneal", "kr-vs-kp"]
@@ -240,7 +240,7 @@ def test_get_datasets_by_name(self):
         assert len(datasets) == 2
         _assert_datasets_retrieved_successfully([1, 2])
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_get_datasets_by_mixed(self):
         # did 1 and 2 on the test server:
         dids = ["anneal", 2]
@@ -248,14 +248,14 @@ def test_get_datasets_by_mixed(self):
         assert len(datasets) == 2
         _assert_datasets_retrieved_successfully([1, 2])
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_get_datasets(self):
         dids = [1, 2]
         datasets = openml.datasets.get_datasets(dids)
         assert len(datasets) == 2
         _assert_datasets_retrieved_successfully([1, 2])
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_get_dataset_by_name(self):
         dataset = openml.datasets.get_dataset("anneal")
         assert type(dataset) == OpenMLDataset
@@ -274,7 +274,7 @@ def test_get_dataset_download_all_files(self):
         # test_get_dataset_lazy
         raise NotImplementedError
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_get_dataset_uint8_dtype(self):
         dataset = openml.datasets.get_dataset(1)
         assert type(dataset) == OpenMLDataset
@@ -293,7 +293,7 @@ def test_dataset_by_name_cannot_access_private_data(self):
         self.use_production_server()
         self.assertRaises(OpenMLPrivateDatasetError, openml.datasets.get_dataset, "NAME_GOES_HERE")
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_get_dataset_lazy_all_functions(self):
         """Test that all expected functionality is available without downloading the dataset."""
         dataset = openml.datasets.get_dataset(1)
@@ -323,28 +323,28 @@ def ensure_absence_of_real_data():
         assert classes == ["1", "2", "3", "4", "5", "U"]
         ensure_absence_of_real_data()
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_get_dataset_sparse(self):
         dataset = openml.datasets.get_dataset(102)
         X, *_ = dataset.get_data()
         assert isinstance(X, pd.DataFrame)
         assert all(isinstance(col, pd.SparseDtype) for col in X.dtypes)
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_download_rowid(self):
         # Smoke test which checks that the dataset has the row-id set correctly
         did = 44
         dataset = openml.datasets.get_dataset(did)
         assert dataset.row_id_attribute == "Counter"
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test__get_dataset_description(self):
         description = _get_dataset_description(self.workdir, 2)
         assert isinstance(description, dict)
         description_xml_path = os.path.join(self.workdir, "description.xml")
         assert os.path.exists(description_xml_path)
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test__getarff_path_dataset_arff(self):
         openml.config.set_root_cache_directory(self.static_cache_dir)
         description = _get_dataset_description(self.workdir, 2)
@@ -408,7 +408,7 @@ def test__download_minio_file_works_with_bucket_subdirectory(self):
 
 
     @mock.patch("openml._api_calls._download_minio_file")
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test__get_dataset_parquet_is_cached(self, patch):
         openml.config.set_root_cache_directory(self.static_cache_dir)
         patch.side_effect = RuntimeError(
@@ -449,21 +449,21 @@ def test__getarff_md5_issue(self):
 
         openml.config.connection_n_retries = n
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test__get_dataset_features(self):
         features_file = _get_dataset_features_file(self.workdir, 2)
         assert isinstance(features_file, Path)
         features_xml_path = self.workdir / "features.xml"
         assert features_xml_path.exists()
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test__get_dataset_qualities(self):
         qualities = _get_dataset_qualities_file(self.workdir, 2)
         assert isinstance(qualities, Path)
         qualities_xml_path = self.workdir / "qualities.xml"
         assert qualities_xml_path.exists()
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_get_dataset_force_refresh_cache(self):
         did_cache_dir = _create_cache_directory_for_id(
             DATASETS_CACHE_DIR_NAME,
@@ -486,7 +486,7 @@ def test_get_dataset_force_refresh_cache(self):
             did_cache_dir,
         )
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_get_dataset_force_refresh_cache_clean_start(self):
         did_cache_dir = _create_cache_directory_for_id(
             DATASETS_CACHE_DIR_NAME,
@@ -523,14 +523,14 @@ def test_deletion_of_cache_dir(self):
 
     # get_dataset_description is the only data guaranteed to be downloaded
     @mock.patch("openml.datasets.functions._get_dataset_description")
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_deletion_of_cache_dir_faulty_download(self, patch):
         patch.side_effect = Exception("Boom!")
         self.assertRaisesRegex(Exception, "Boom!", openml.datasets.get_dataset, dataset_id=1)
         datasets_cache_dir = os.path.join(self.workdir, "org", "openml", "test", "datasets")
         assert len(os.listdir(datasets_cache_dir)) == 0
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_publish_dataset(self):
         # lazy loading not possible as we need the arff-file.
         openml.datasets.get_dataset(3, download_data=True)
@@ -556,7 +556,7 @@ def test_publish_dataset(self):
         )
         assert isinstance(dataset.dataset_id, int)
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test__retrieve_class_labels(self):
         openml.config.set_root_cache_directory(self.static_cache_dir)
         labels = openml.datasets.get_dataset(2).retrieve_class_labels()
@@ -573,7 +573,7 @@ def test__retrieve_class_labels(self):
         labels = custom_ds.retrieve_class_labels(target_name=custom_ds.features[31].name)
         assert labels == ["COIL", "SHEET"]
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_upload_dataset_with_url(self):
         dataset = OpenMLDataset(
             f"{self._get_sentinel()}-UploadTestWithURL",
@@ -600,7 +600,7 @@ def _assert_status_of_dataset(self, *, did: int, status: str):
         assert result[did]["status"] == status
 
     @pytest.mark.flaky()
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_data_status(self):
         dataset = OpenMLDataset(
             f"{self._get_sentinel()}-UploadTestWithURL",
@@ -692,7 +692,7 @@ def test_attributes_arff_from_df_unknown_dtype(self):
             with pytest.raises(ValueError, match=err_msg):
                 attributes_arff_from_df(df)
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_create_dataset_numpy(self):
         data = np.array([[1, 2, 3], [1.2, 2.5, 3.8], [2, 5, 8], [0, 1, 0]]).T
 
@@ -726,7 +726,7 @@ def test_create_dataset_numpy(self):
         ), "Uploaded arff does not match original one"
         assert _get_online_dataset_format(dataset.id) == "arff", "Wrong format for dataset"
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_create_dataset_list(self):
         data = [
             ["a", "sunny", 85.0, 85.0, "FALSE", "no"],
@@ -781,7 +781,7 @@ def test_create_dataset_list(self):
         ), "Uploaded ARFF does not match original one"
         assert _get_online_dataset_format(dataset.id) == "arff", "Wrong format for dataset"
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_create_dataset_sparse(self):
         # test the scipy.sparse.coo_matrix
         sparse_data = scipy.sparse.coo_matrix(
@@ -884,7 +884,7 @@ def test_create_invalid_dataset(self):
         param["data"] = data[0]
         self.assertRaises(ValueError, create_dataset, **param)
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_get_online_dataset_arff(self):
         dataset_id = 100  # Australian
         # lazy loading not used as arff file is checked.
@@ -900,7 +900,7 @@ def test_get_online_dataset_arff(self):
             return_type=arff.DENSE if d_format == "arff" else arff.COO,
         ), "ARFF files are not equal"
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_topic_api_error(self):
         # Check server exception when non-admin accessses apis
         self.assertRaisesRegex(
@@ -919,7 +919,7 @@ def test_topic_api_error(self):
             topic="business",
         )
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_get_online_dataset_format(self):
         # Phoneme dataset
         dataset_id = 77
@@ -929,7 +929,7 @@ def test_get_online_dataset_format(self):
             dataset_id
         ), "The format of the ARFF files is different"
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_create_dataset_pandas(self):
         data = [
             ["a", "sunny", 85.0, 85.0, "FALSE", "no"],
@@ -1154,7 +1154,7 @@ def test_ignore_attributes_dataset(self):
                 paper_url=paper_url,
             )
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_publish_fetch_ignore_attribute(self):
         """Test to upload and retrieve dataset and check ignore_attributes"""
         data = [
@@ -1273,7 +1273,7 @@ def test_create_dataset_row_id_attribute_error(self):
                 paper_url=paper_url,
             )
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_create_dataset_row_id_attribute_inference(self):
         # meta-information
         name = f"{self._get_sentinel()}-pandas_testing_dataset"
@@ -1364,13 +1364,13 @@ def test_create_dataset_attributes_auto_without_df(self):
                 paper_url=paper_url,
             )
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_list_qualities(self):
         qualities = openml.datasets.list_qualities()
         assert isinstance(qualities, list) is True
         assert all(isinstance(q, str) for q in qualities) is True
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_get_dataset_cache_format_pickle(self):
         dataset = openml.datasets.get_dataset(1)
         dataset.get_data()
@@ -1386,7 +1386,7 @@ def test_get_dataset_cache_format_pickle(self):
         assert len(categorical) == X.shape[1]
         assert len(attribute_names) == X.shape[1]
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_get_dataset_cache_format_feather(self):
         # This test crashed due to using the parquet file by default, which is downloaded
         # from minio. However, there is a mismatch between OpenML test server and minio IDs.
@@ -1419,7 +1419,7 @@ def test_get_dataset_cache_format_feather(self):
         assert len(categorical) == X.shape[1]
         assert len(attribute_names) == X.shape[1]
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_data_edit_non_critical_field(self):
         # Case 1
         # All users can edit non-critical fields of datasets
@@ -1441,7 +1441,7 @@ def test_data_edit_non_critical_field(self):
         edited_dataset = openml.datasets.get_dataset(did)
         assert edited_dataset.description == desc
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_data_edit_critical_field(self):
         # Case 2
         # only owners (or admin) can edit all critical fields of datasets
@@ -1468,7 +1468,7 @@ def test_data_edit_critical_field(self):
                     os.path.join(self.workdir, "org", "openml", "test", "datasets", str(did)),
                 )
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_data_edit_requires_field(self):
         # Check server exception when no field to edit is provided
         self.assertRaisesRegex(
@@ -1481,7 +1481,7 @@ def test_data_edit_requires_field(self):
             data_id=64,  # blood-transfusion-service-center
         )
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_data_edit_requires_valid_dataset(self):
         # Check server exception when unknown dataset is provided
         self.assertRaisesRegex(
@@ -1492,7 +1492,7 @@ def test_data_edit_requires_valid_dataset(self):
             description="xor operation dataset",
         )
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_data_edit_cannot_edit_critical_field_if_dataset_has_task(self):
         # Need to own a dataset to be able to edit meta-data
         # Will be creating a forked version of an existing dataset to allow the unit test user
@@ -1519,7 +1519,7 @@ def test_data_edit_cannot_edit_critical_field_if_dataset_has_task(self):
             default_target_attribute="y",
         )
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_edit_data_user_cannot_edit_critical_field_of_other_users_dataset(self):
         # Check server exception when a non-owner or non-admin tries to edit critical fields
         self.assertRaisesRegex(
@@ -1531,7 +1531,7 @@ def test_edit_data_user_cannot_edit_critical_field_of_other_users_dataset(self):
             default_target_attribute="y",
         )
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_data_fork(self):
         did = 1
         result = fork_dataset(did)
@@ -1823,7 +1823,7 @@ def all_datasets():
     return openml.datasets.list_datasets()
 
 
-@pytest.mark.uses_test_server()
+@pytest.mark.test_server()
 def test_list_datasets(all_datasets: pd.DataFrame):
     # We can only perform a smoke test here because we test on dynamic
     # data from the internet...
@@ -1832,49 +1832,49 @@ def test_list_datasets(all_datasets: pd.DataFrame):
     _assert_datasets_have_id_and_valid_status(all_datasets)
 
 
-@pytest.mark.uses_test_server()
+@pytest.mark.test_server()
 def test_list_datasets_by_tag(all_datasets: pd.DataFrame):
     tag_datasets = openml.datasets.list_datasets(tag="study_14")
     assert 0 < len(tag_datasets) < len(all_datasets)
     _assert_datasets_have_id_and_valid_status(tag_datasets)
 
 
-@pytest.mark.uses_test_server()
+@pytest.mark.test_server()
 def test_list_datasets_by_size():
     datasets = openml.datasets.list_datasets(size=5)
     assert len(datasets) == 5
     _assert_datasets_have_id_and_valid_status(datasets)
 
 
-@pytest.mark.uses_test_server()
+@pytest.mark.test_server()
 def test_list_datasets_by_number_instances(all_datasets: pd.DataFrame):
     small_datasets = openml.datasets.list_datasets(number_instances="5..100")
     assert 0 < len(small_datasets) <= len(all_datasets)
     _assert_datasets_have_id_and_valid_status(small_datasets)
 
 
-@pytest.mark.uses_test_server()
+@pytest.mark.test_server()
 def test_list_datasets_by_number_features(all_datasets: pd.DataFrame):
     wide_datasets = openml.datasets.list_datasets(number_features="50..100")
     assert 8 <= len(wide_datasets) < len(all_datasets)
     _assert_datasets_have_id_and_valid_status(wide_datasets)
 
 
-@pytest.mark.uses_test_server()
+@pytest.mark.test_server()
 def test_list_datasets_by_number_classes(all_datasets: pd.DataFrame):
     five_class_datasets = openml.datasets.list_datasets(number_classes="5")
     assert 3 <= len(five_class_datasets) < len(all_datasets)
     _assert_datasets_have_id_and_valid_status(five_class_datasets)
 
 
-@pytest.mark.uses_test_server()
+@pytest.mark.test_server()
 def test_list_datasets_by_number_missing_values(all_datasets: pd.DataFrame):
     na_datasets = openml.datasets.list_datasets(number_missing_values="5..100")
     assert 5 <= len(na_datasets) < len(all_datasets)
     _assert_datasets_have_id_and_valid_status(na_datasets)
 
 
-@pytest.mark.uses_test_server()
+@pytest.mark.test_server()
 def test_list_datasets_combined_filters(all_datasets: pd.DataFrame):
     combined_filter_datasets = openml.datasets.list_datasets(
         tag="study_14",
@@ -1947,7 +1947,7 @@ def isolate_for_test():
     ("with_data", "with_qualities", "with_features"),
     itertools.product([True, False], repeat=3),
 )
-@pytest.mark.uses_test_server()
+@pytest.mark.test_server()
 def test_get_dataset_lazy_behavior(
     isolate_for_test, with_data: bool, with_qualities: bool, with_features: bool
 ):
@@ -1974,7 +1974,7 @@ def test_get_dataset_lazy_behavior(
     )
 
 
-@pytest.mark.uses_test_server()
+@pytest.mark.test_server()
 def test_get_dataset_with_invalid_id() -> None:
     INVALID_ID = 123819023109238  # Well, at some point this will probably be valid...
     with pytest.raises(OpenMLServerNoResult, match="Unknown dataset") as e:
@@ -2002,7 +2002,7 @@ def test_read_features_from_xml_with_whitespace() -> None:
     assert dict[1].nominal_values == [" - 50000.", " 50000+."]
 
 
-@pytest.mark.uses_test_server()
+@pytest.mark.test_server()
 def test_get_dataset_parquet(requests_mock, test_files_directory):
     # Parquet functionality is disabled on the test server
     # There is no parquet-copy of the test server yet.
diff --git a/tests/test_evaluations/test_evaluation_functions.py b/tests/test_evaluations/test_evaluation_functions.py
index ee7c306a1..630aa7b4e 100644
--- a/tests/test_evaluations/test_evaluation_functions.py
+++ b/tests/test_evaluations/test_evaluation_functions.py
@@ -155,7 +155,7 @@ def test_evaluation_list_limit(self):
         )
         assert len(evaluations) == 100
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_list_evaluations_empty(self):
         evaluations = openml.evaluations.list_evaluations("unexisting_measure")
         if len(evaluations) > 0:
@@ -233,7 +233,7 @@ def test_evaluation_list_sort(self):
         test_output = sorted(unsorted_output, reverse=True)
         assert test_output[:size] == sorted_output
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_list_evaluation_measures(self):
         measures = openml.evaluations.list_evaluation_measures()
         assert isinstance(measures, list) is True
diff --git a/tests/test_flows/test_flow.py b/tests/test_flows/test_flow.py
index 527ad1f8c..06bbbe4d5 100644
--- a/tests/test_flows/test_flow.py
+++ b/tests/test_flows/test_flow.py
@@ -103,7 +103,7 @@ def test_get_structure(self):
                 subflow = flow.get_subflow(structure)
                 assert subflow.flow_id == sub_flow_id
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_tagging(self):
         flows = openml.flows.list_flows(size=1)
         flow_id = flows["id"].iloc[0]
@@ -121,7 +121,7 @@ def test_tagging(self):
         flows = openml.flows.list_flows(tag=tag)
         assert len(flows) == 0
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_from_xml_to_xml(self):
         # Get the raw xml thing
         # TODO maybe get this via get_flow(), which would have to be refactored
@@ -181,7 +181,7 @@ def test_to_xml_from_xml(self):
         assert new_flow is not flow
 
     @pytest.mark.sklearn()
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_publish_flow(self):
         flow = openml.OpenMLFlow(
             name="sklearn.dummy.DummyClassifier",
@@ -223,7 +223,7 @@ def test_publish_existing_flow(self, flow_exists_mock):
         )
 
     @pytest.mark.sklearn()
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_publish_flow_with_similar_components(self):
         clf = sklearn.ensemble.VotingClassifier(
             [("lr", sklearn.linear_model.LogisticRegression(solver="lbfgs"))],
@@ -274,7 +274,7 @@ def test_publish_flow_with_similar_components(self):
         TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {flow3.flow_id}")
 
     @pytest.mark.sklearn()
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_semi_legal_flow(self):
         # TODO: Test if parameters are set correctly!
         # should not throw error as it contains two differentiable forms of
@@ -366,7 +366,7 @@ def test_illegal_flow(self):
         )
         self.assertRaises(ValueError, self.extension.model_to_flow, illegal)
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_nonexisting_flow_exists(self):
         def get_sentinel():
             # Create a unique prefix for the flow. Necessary because the flow
@@ -384,7 +384,7 @@ def get_sentinel():
         assert not flow_id
 
     @pytest.mark.sklearn()
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_existing_flow_exists(self):
         # create a flow
         nb = sklearn.naive_bayes.GaussianNB()
@@ -425,7 +425,7 @@ def test_existing_flow_exists(self):
             assert downloaded_flow_id == flow.flow_id
 
     @pytest.mark.sklearn()
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_sklearn_to_upload_to_flow(self):
         iris = sklearn.datasets.load_iris()
         X = iris.data
diff --git a/tests/test_flows/test_flow_functions.py b/tests/test_flows/test_flow_functions.py
index 5aa99cd62..300b1e4e3 100644
--- a/tests/test_flows/test_flow_functions.py
+++ b/tests/test_flows/test_flow_functions.py
@@ -280,7 +280,7 @@ def test_are_flows_equal_ignore_if_older(self):
         reason="OrdinalEncoder introduced in 0.20. "
         "No known models with list of lists parameters in older versions.",
     )
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     @pytest.mark.xfail(reason="failures_issue_1544", strict=False)
     def test_sklearn_to_flow_list_of_lists(self):
         from sklearn.preprocessing import OrdinalEncoder
@@ -310,7 +310,7 @@ def test_get_flow1(self):
         assert flow.external_version is None
 
     @pytest.mark.sklearn()
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_get_flow_reinstantiate_model(self):
         model = ensemble.RandomForestClassifier(n_estimators=33)
         extension = openml.extensions.get_extension_by_model(model)
@@ -322,7 +322,7 @@ def test_get_flow_reinstantiate_model(self):
         downloaded_flow = openml.flows.get_flow(flow.flow_id, reinstantiate=True)
         assert isinstance(downloaded_flow.model, sklearn.ensemble.RandomForestClassifier)
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_get_flow_reinstantiate_model_no_extension(self):
         # Flow 10 is a WEKA flow
         self.assertRaisesRegex(
@@ -393,7 +393,7 @@ def test_get_flow_reinstantiate_flow_not_strict_pre_023(self):
         assert "sklearn==0.19.1" not in flow.dependencies
 
     @pytest.mark.sklearn()
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_get_flow_id(self):
         if self.long_version:
             list_all = openml.utils._list_all
@@ -428,7 +428,7 @@ def test_get_flow_id(self):
             pytest.skip(reason="Not sure why there should only be one version of this flow.")
             assert flow_ids_exact_version_True == flow_ids_exact_version_False
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_delete_flow(self):
         flow = openml.OpenMLFlow(
             name="sklearn.dummy.DummyClassifier",
diff --git a/tests/test_openml/test_api_calls.py b/tests/test_openml/test_api_calls.py
index a295259ef..c8d5be25b 100644
--- a/tests/test_openml/test_api_calls.py
+++ b/tests/test_openml/test_api_calls.py
@@ -15,14 +15,14 @@
 
 
 class TestConfig(openml.testing.TestBase):
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_too_long_uri(self):
         with pytest.raises(openml.exceptions.OpenMLServerError, match="URI too long!"):
             openml.datasets.list_datasets(data_id=list(range(10000)))
 
     @unittest.mock.patch("time.sleep")
     @unittest.mock.patch("requests.Session")
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_retry_on_database_error(self, Session_class_mock, _):
         response_mock = unittest.mock.Mock()
         response_mock.text = (
@@ -117,7 +117,7 @@ def test_download_minio_failure(mock_minio, tmp_path: Path) -> None:
         ("task/42", "delete"),  # 460
     ],
 )
-@pytest.mark.uses_test_server()
+@pytest.mark.test_server()
 def test_authentication_endpoints_requiring_api_key_show_relevant_help_link(
     endpoint: str,
     method: str,
diff --git a/tests/test_runs/test_run.py b/tests/test_runs/test_run.py
index 1a66b76c0..17349fca8 100644
--- a/tests/test_runs/test_run.py
+++ b/tests/test_runs/test_run.py
@@ -25,7 +25,7 @@ class TestRun(TestBase):
     # Splitting not helpful, these test's don't rely on the server and take
     # less than 1 seconds
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_tagging(self):
         runs = openml.runs.list_runs(size=1)
         assert not runs.empty, "Test server state is incorrect"
@@ -119,7 +119,7 @@ def _check_array(array, type_):
             assert run_prime_trace_content is None
 
     @pytest.mark.sklearn()
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_to_from_filesystem_vanilla(self):
         model = Pipeline(
             [
@@ -155,7 +155,7 @@ def test_to_from_filesystem_vanilla(self):
 
     @pytest.mark.sklearn()
     @pytest.mark.flaky()
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_to_from_filesystem_search(self):
         model = Pipeline(
             [
@@ -190,7 +190,7 @@ def test_to_from_filesystem_search(self):
         )
 
     @pytest.mark.sklearn()
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_to_from_filesystem_no_model(self):
         model = Pipeline(
             [("imputer", SimpleImputer(strategy="mean")), ("classifier", DummyClassifier())],
@@ -296,7 +296,7 @@ def assert_run_prediction_data(task, run, model):
             assert_method(y_test, saved_y_test)
 
     @pytest.mark.sklearn()
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_publish_with_local_loaded_flow(self):
         """
         Publish a run tied to a local flow after it has first been saved to
@@ -340,7 +340,7 @@ def test_publish_with_local_loaded_flow(self):
             openml.runs.get_run(loaded_run.run_id)
 
     @pytest.mark.sklearn()
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_offline_and_online_run_identical(self):
         extension = SklearnExtension()
 
diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
index 8f2c505b7..b094bf948 100644
--- a/tests/test_runs/test_run_functions.py
+++ b/tests/test_runs/test_run_functions.py
@@ -398,7 +398,7 @@ def _check_sample_evaluations(
                             assert evaluation < max_time_allowed
 
     @pytest.mark.sklearn()
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_run_regression_on_classif_task(self):
         task_id = 259  # collins; crossvalidation; has numeric targets
 
@@ -415,7 +415,7 @@ def test_run_regression_on_classif_task(self):
             )
 
     @pytest.mark.sklearn()
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_check_erronous_sklearn_flow_fails(self):
         task_id = 115  # diabetes; crossvalidation
         task = openml.tasks.get_task(task_id)
@@ -628,7 +628,7 @@ def _run_and_upload_regression(
         )
 
     @pytest.mark.sklearn()
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_run_and_upload_logistic_regression(self):
         lr = LogisticRegression(solver="lbfgs", max_iter=1000)
         task_id = self.TEST_SERVER_TASK_SIMPLE["task_id"]
@@ -637,7 +637,7 @@ def test_run_and_upload_logistic_regression(self):
         self._run_and_upload_classification(lr, task_id, n_missing_vals, n_test_obs, "62501")
 
     @pytest.mark.sklearn()
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_run_and_upload_linear_regression(self):
         lr = LinearRegression()
         task_id = self.TEST_SERVER_TASK_REGRESSION["task_id"]
@@ -668,7 +668,7 @@ def test_run_and_upload_linear_regression(self):
         self._run_and_upload_regression(lr, task_id, n_missing_vals, n_test_obs, "62501")
 
     @pytest.mark.sklearn()
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_run_and_upload_pipeline_dummy_pipeline(self):
         pipeline1 = Pipeline(
             steps=[
@@ -686,7 +686,7 @@ def test_run_and_upload_pipeline_dummy_pipeline(self):
         Version(sklearn.__version__) < Version("0.20"),
         reason="columntransformer introduction in 0.20.0",
     )
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_run_and_upload_column_transformer_pipeline(self):
         import sklearn.compose
         import sklearn.impute
@@ -799,7 +799,7 @@ def test_run_and_upload_knn_pipeline(self, warnings_mock):
         assert call_count == 3
 
     @pytest.mark.sklearn()
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_run_and_upload_gridsearch(self):
         estimator_name = (
             "base_estimator" if Version(sklearn.__version__) < Version("1.4") else "estimator"
@@ -822,7 +822,7 @@ def test_run_and_upload_gridsearch(self):
         assert len(run.trace.trace_iterations) == 9
 
     @pytest.mark.sklearn()
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_run_and_upload_randomsearch(self):
         randomsearch = RandomizedSearchCV(
             RandomForestClassifier(n_estimators=5),
@@ -855,7 +855,7 @@ def test_run_and_upload_randomsearch(self):
         assert len(trace.trace_iterations) == 5
 
     @pytest.mark.sklearn()
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_run_and_upload_maskedarrays(self):
         # This testcase is important for 2 reasons:
         # 1) it verifies the correct handling of masked arrays (not all
@@ -883,7 +883,7 @@ def test_run_and_upload_maskedarrays(self):
     ##########################################################################
 
     @pytest.mark.sklearn()
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_learning_curve_task_1(self):
         task_id = 801  # diabates dataset
         num_test_instances = 6144  # for learning curve
@@ -908,7 +908,7 @@ def test_learning_curve_task_1(self):
         self._check_sample_evaluations(run.sample_evaluations, num_repeats, num_folds, num_samples)
 
     @pytest.mark.sklearn()
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_learning_curve_task_2(self):
         task_id = 801  # diabates dataset
         num_test_instances = 6144  # for learning curve
@@ -949,7 +949,7 @@ def test_learning_curve_task_2(self):
         Version(sklearn.__version__) < Version("0.21"),
         reason="Pipelines don't support indexing (used for the assert check)",
     )
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_initialize_cv_from_run(self):
         randomsearch = Pipeline(
             [
@@ -1024,7 +1024,7 @@ def _test_local_evaluations(self, run):
                 assert alt_scores[idx] <= 1
 
     @pytest.mark.sklearn()
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_local_run_swapped_parameter_order_model(self):
         clf = DecisionTreeClassifier()
         australian_task = 595  # Australian; crossvalidation
@@ -1044,7 +1044,7 @@ def test_local_run_swapped_parameter_order_model(self):
         Version(sklearn.__version__) < Version("0.20"),
         reason="SimpleImputer doesn't handle mixed type DataFrame as input",
     )
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_local_run_swapped_parameter_order_flow(self):
         # construct sci-kit learn classifier
         clf = Pipeline(
@@ -1073,7 +1073,7 @@ def test_local_run_swapped_parameter_order_flow(self):
         Version(sklearn.__version__) < Version("0.20"),
         reason="SimpleImputer doesn't handle mixed type DataFrame as input",
     )
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_local_run_metric_score(self):
         # construct sci-kit learn classifier
         clf = Pipeline(
@@ -1111,7 +1111,7 @@ def test_online_run_metric_score(self):
         Version(sklearn.__version__) < Version("0.20"),
         reason="SimpleImputer doesn't handle mixed type DataFrame as input",
     )
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_initialize_model_from_run(self):
         clf = sklearn.pipeline.Pipeline(
             steps=[
@@ -1173,7 +1173,7 @@ def test_initialize_model_from_run(self):
         Version(sklearn.__version__) < Version("0.20"),
         reason="SimpleImputer doesn't handle mixed type DataFrame as input",
     )
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test__run_exists(self):
         # would be better to not sentinel these clfs,
         # so we do not have to perform the actual runs
@@ -1229,7 +1229,7 @@ def test__run_exists(self):
             assert run_ids, (run_ids, clf)
 
     @pytest.mark.sklearn()
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_run_with_illegal_flow_id(self):
         # check the case where the user adds an illegal flow id to a
         # non-existing flo
@@ -1249,7 +1249,7 @@ def test_run_with_illegal_flow_id(self):
             )
 
     @pytest.mark.sklearn()
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_run_with_illegal_flow_id_after_load(self):
         # Same as `test_run_with_illegal_flow_id`, but test this error is also
         # caught if the run is stored to and loaded from disk first.
@@ -1281,7 +1281,7 @@ def test_run_with_illegal_flow_id_after_load(self):
             TestBase.logger.info(f"collected from test_run_functions: {loaded_run.run_id}")
 
     @pytest.mark.sklearn()
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_run_with_illegal_flow_id_1(self):
         # Check the case where the user adds an illegal flow id to an existing
         # flow. Comes to a different value error than the previous test
@@ -1307,7 +1307,7 @@ def test_run_with_illegal_flow_id_1(self):
             )
 
     @pytest.mark.sklearn()
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_run_with_illegal_flow_id_1_after_load(self):
         # Same as `test_run_with_illegal_flow_id_1`, but test this error is
         # also caught if the run is stored to and loaded from disk first.
@@ -1350,7 +1350,7 @@ def test_run_with_illegal_flow_id_1_after_load(self):
         Version(sklearn.__version__) < Version("0.20"),
         reason="OneHotEncoder cannot handle mixed type DataFrame as input",
     )
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test__run_task_get_arffcontent(self):
         task = openml.tasks.get_task(7)  # kr-vs-kp; crossvalidation
         num_instances = 3196
@@ -1451,7 +1451,7 @@ def test_get_runs_list(self):
         for run in runs.to_dict(orient="index").values():
             self._check_run(run)
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_list_runs_empty(self):
         runs = openml.runs.list_runs(task=[0])
         assert runs.empty
@@ -1580,7 +1580,7 @@ def test_get_runs_list_by_tag(self):
         Version(sklearn.__version__) < Version("0.20"),
         reason="columntransformer introduction in 0.20.0",
     )
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_run_on_dataset_with_missing_labels_dataframe(self):
         # Check that _run_task_get_arffcontent works when one of the class
         # labels only declared in the arff file, but is not present in the
@@ -1617,7 +1617,7 @@ def test_run_on_dataset_with_missing_labels_dataframe(self):
         Version(sklearn.__version__) < Version("0.20"),
         reason="columntransformer introduction in 0.20.0",
     )
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_run_on_dataset_with_missing_labels_array(self):
         # Check that _run_task_get_arffcontent works when one of the class
         # labels only declared in the arff file, but is not present in the
@@ -1656,7 +1656,7 @@ def test_run_on_dataset_with_missing_labels_array(self):
             # repeat, fold, row_id, 6 confidences, prediction and correct label
             assert len(row) == 12
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_get_cached_run(self):
         openml.config.set_root_cache_directory(self.static_cache_dir)
         openml.runs.functions._get_cached_run(1)
@@ -1667,7 +1667,7 @@ def test_get_uncached_run(self):
             openml.runs.functions._get_cached_run(10)
 
     @pytest.mark.sklearn()
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_run_flow_on_task_downloaded_flow(self):
         model = sklearn.ensemble.RandomForestClassifier(n_estimators=33)
         flow = self.extension.model_to_flow(model)
@@ -1698,7 +1698,7 @@ def test_format_prediction_non_supervised(self):
         ):
             format_prediction(clustering, *ignored_input)
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_format_prediction_classification_no_probabilities(self):
         classification = openml.tasks.get_task(
             self.TEST_SERVER_TASK_SIMPLE["task_id"],
@@ -1708,7 +1708,7 @@ def test_format_prediction_classification_no_probabilities(self):
         with pytest.raises(ValueError, match="`proba` is required for classification task"):
             format_prediction(classification, *ignored_input, proba=None)
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_format_prediction_classification_incomplete_probabilities(self):
         classification = openml.tasks.get_task(
             self.TEST_SERVER_TASK_SIMPLE["task_id"],
@@ -1719,7 +1719,7 @@ def test_format_prediction_classification_incomplete_probabilities(self):
         with pytest.raises(ValueError, match="Each class should have a predicted probability"):
             format_prediction(classification, *ignored_input, proba=incomplete_probabilities)
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_format_prediction_task_without_classlabels_set(self):
         classification = openml.tasks.get_task(
             self.TEST_SERVER_TASK_SIMPLE["task_id"],
@@ -1730,7 +1730,7 @@ def test_format_prediction_task_without_classlabels_set(self):
         with pytest.raises(ValueError, match="The classification task must have class labels set"):
             format_prediction(classification, *ignored_input, proba={})
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_format_prediction_task_learning_curve_sample_not_set(self):
         learning_curve = openml.tasks.get_task(801, download_data=False)  # diabetes;crossvalidation
         probabilities = {c: 0.2 for c in learning_curve.class_labels}
@@ -1738,7 +1738,7 @@ def test_format_prediction_task_learning_curve_sample_not_set(self):
         with pytest.raises(ValueError, match="`sample` can not be none for LearningCurveTask"):
             format_prediction(learning_curve, *ignored_input, sample=None, proba=probabilities)
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_format_prediction_task_regression(self):
         task_meta_data = self.TEST_SERVER_TASK_REGRESSION["task_meta_data"]
         _task_id = check_task_existence(**task_meta_data)
@@ -1773,7 +1773,7 @@ def test_format_prediction_task_regression(self):
         reason="SimpleImputer doesn't handle mixed type DataFrame as input",
     )
     @pytest.mark.sklearn()
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_delete_run(self):
         rs = np.random.randint(1, 2**31 - 1)
         clf = sklearn.pipeline.Pipeline(
@@ -1874,7 +1874,7 @@ def test_delete_unknown_run(mock_delete, test_files_directory, test_api_key):
     reason="couldn't perform local tests successfully w/o bloating RAM",
     )
 @mock.patch("openml_sklearn.SklearnExtension._prevent_optimize_n_jobs")
-@pytest.mark.uses_test_server()
+@pytest.mark.test_server()
 def test__run_task_get_arffcontent_2(parallel_mock):
     """Tests if a run executed in parallel is collated correctly."""
     task = openml.tasks.get_task(7)  # Supervised Classification on kr-vs-kp
@@ -1965,7 +1965,7 @@ def test__run_task_get_arffcontent_2(parallel_mock):
         (-1, "threading", 10),  # the threading backend does preserve mocks even with parallelizing
     ]
 )
-@pytest.mark.uses_test_server()
+@pytest.mark.test_server()
 def test_joblib_backends(parallel_mock, n_jobs, backend, call_count):
     """Tests evaluation of a run using various joblib backends and n_jobs."""
     if backend is None:
diff --git a/tests/test_setups/test_setup_functions.py b/tests/test_setups/test_setup_functions.py
index a0469f9a5..0795d392c 100644
--- a/tests/test_setups/test_setup_functions.py
+++ b/tests/test_setups/test_setup_functions.py
@@ -35,7 +35,7 @@ def setUp(self):
         super().setUp()
 
     @pytest.mark.sklearn()
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_nonexisting_setup_exists(self):
         # first publish a non-existing flow
         sentinel = get_sentinel()
@@ -83,7 +83,7 @@ def _existing_setup_exists(self, classif):
         assert setup_id == run.setup_id
 
     @pytest.mark.sklearn()
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_existing_setup_exists_1(self):
         def side_effect(self):
             self.var_smoothing = 1e-9
@@ -99,13 +99,13 @@ def side_effect(self):
             self._existing_setup_exists(nb)
 
     @pytest.mark.sklearn()
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_exisiting_setup_exists_2(self):
         # Check a flow with one hyperparameter
         self._existing_setup_exists(sklearn.naive_bayes.GaussianNB())
 
     @pytest.mark.sklearn()
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_existing_setup_exists_3(self):
         # Check a flow with many hyperparameters
         self._existing_setup_exists(
@@ -147,7 +147,7 @@ def test_setup_list_filter_flow(self):
         for setup_id in setups:
             assert setups[setup_id].flow_id == flow_id
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_list_setups_empty(self):
         setups = openml.setups.list_setups(setup=[0])
         if len(setups) > 0:
@@ -168,7 +168,7 @@ def test_list_setups_output_format(self):
         assert isinstance(setups, pd.DataFrame)
         assert len(setups) == 10
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_setuplist_offset(self):
         size = 10
         setups = openml.setups.list_setups(offset=0, size=size)
@@ -180,7 +180,7 @@ def test_setuplist_offset(self):
 
         assert len(all) == size * 2
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_get_cached_setup(self):
         openml.config.set_root_cache_directory(self.static_cache_dir)
         openml.setups.functions._get_cached_setup(1)
diff --git a/tests/test_study/test_study_functions.py b/tests/test_study/test_study_functions.py
index 4b662524b..b907ee740 100644
--- a/tests/test_study/test_study_functions.py
+++ b/tests/test_study/test_study_functions.py
@@ -74,7 +74,7 @@ def test_get_suite_error(self):
         ):
             openml.study.get_suite(123)
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_publish_benchmark_suite(self):
         fixture_alias = None
         fixture_name = "unit tested benchmark suite"
@@ -143,16 +143,16 @@ def _test_publish_empty_study_is_allowed(self, explicit: bool):
         assert study_downloaded.main_entity_type == "run"
         assert study_downloaded.runs is None
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_publish_empty_study_explicit(self):
         self._test_publish_empty_study_is_allowed(explicit=True)
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_publish_empty_study_implicit(self):
         self._test_publish_empty_study_is_allowed(explicit=False)
 
     @pytest.mark.flaky()
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_publish_study(self):
         # get some random runs to attach
         run_list = openml.evaluations.list_evaluations("predictive_accuracy", size=10)
@@ -222,7 +222,7 @@ def test_publish_study(self):
         res = openml.study.delete_study(study.id)
         assert res
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_study_attach_illegal(self):
         run_list = openml.runs.list_runs(size=10)
         assert len(run_list) == 10
diff --git a/tests/test_tasks/test_classification_task.py b/tests/test_tasks/test_classification_task.py
index fed0c0a00..65dcebc1d 100644
--- a/tests/test_tasks/test_classification_task.py
+++ b/tests/test_tasks/test_classification_task.py
@@ -18,7 +18,7 @@ def setUp(self, n_levels: int = 1):
         self.task_type = TaskType.SUPERVISED_CLASSIFICATION
         self.estimation_procedure = 5
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_download_task(self):
         task = super().test_download_task()
         assert task.task_id == self.task_id
@@ -26,13 +26,13 @@ def test_download_task(self):
         assert task.dataset_id == 20
         assert task.estimation_procedure_id == self.estimation_procedure
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_class_labels(self):
         task = get_task(self.task_id)
         assert task.class_labels == ["tested_negative", "tested_positive"]
 
 
-@pytest.mark.uses_test_server()
+@pytest.mark.test_server()
 def test_get_X_and_Y():
     task = get_task(119)
     X, Y = task.get_X_and_y()
diff --git a/tests/test_tasks/test_clustering_task.py b/tests/test_tasks/test_clustering_task.py
index 2bbb015c6..b1ce569e2 100644
--- a/tests/test_tasks/test_clustering_task.py
+++ b/tests/test_tasks/test_clustering_task.py
@@ -28,7 +28,7 @@ def test_get_dataset(self):
         task.get_dataset()
 
     @pytest.mark.production()
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_download_task(self):
         # no clustering tasks on test server
         self.use_production_server()
@@ -37,7 +37,7 @@ def test_download_task(self):
         assert task.task_type_id == TaskType.CLUSTERING
         assert task.dataset_id == 36
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_upload_task(self):
         compatible_datasets = self._get_compatible_rand_dataset()
         for i in range(100):
diff --git a/tests/test_tasks/test_learning_curve_task.py b/tests/test_tasks/test_learning_curve_task.py
index fbcbfe9bf..465d9c0be 100644
--- a/tests/test_tasks/test_learning_curve_task.py
+++ b/tests/test_tasks/test_learning_curve_task.py
@@ -18,7 +18,7 @@ def setUp(self, n_levels: int = 1):
         self.task_type = TaskType.LEARNING_CURVE
         self.estimation_procedure = 13
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_get_X_and_Y(self):
         X, Y = super().test_get_X_and_Y()
         assert X.shape == (768, 8)
@@ -27,14 +27,14 @@ def test_get_X_and_Y(self):
         assert isinstance(Y, pd.Series)
         assert pd.api.types.is_categorical_dtype(Y)
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_download_task(self):
         task = super().test_download_task()
         assert task.task_id == self.task_id
         assert task.task_type_id == TaskType.LEARNING_CURVE
         assert task.dataset_id == 20
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_class_labels(self):
         task = get_task(self.task_id)
         assert task.class_labels == ["tested_negative", "tested_positive"]
diff --git a/tests/test_tasks/test_regression_task.py b/tests/test_tasks/test_regression_task.py
index a834cdf0f..26d7dc94b 100644
--- a/tests/test_tasks/test_regression_task.py
+++ b/tests/test_tasks/test_regression_task.py
@@ -49,7 +49,7 @@ def setUp(self, n_levels: int = 1):
         self.task_type = TaskType.SUPERVISED_REGRESSION
 
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_get_X_and_Y(self):
         X, Y = super().test_get_X_and_Y()
         assert X.shape == (194, 32)
@@ -58,7 +58,7 @@ def test_get_X_and_Y(self):
         assert isinstance(Y, pd.Series)
         assert pd.api.types.is_numeric_dtype(Y)
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_download_task(self):
         task = super().test_download_task()
         assert task.task_id == self.task_id
diff --git a/tests/test_tasks/test_supervised_task.py b/tests/test_tasks/test_supervised_task.py
index 3f7b06ee4..99df3cace 100644
--- a/tests/test_tasks/test_supervised_task.py
+++ b/tests/test_tasks/test_supervised_task.py
@@ -28,7 +28,7 @@ def setUpClass(cls):
     def setUp(self, n_levels: int = 1):
         super().setUp()
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_get_X_and_Y(self) -> tuple[pd.DataFrame, pd.Series]:
         task = get_task(self.task_id)
         X, Y = task.get_X_and_y()
diff --git a/tests/test_tasks/test_task.py b/tests/test_tasks/test_task.py
index b77782847..1d0df1210 100644
--- a/tests/test_tasks/test_task.py
+++ b/tests/test_tasks/test_task.py
@@ -32,11 +32,11 @@ def setUpClass(cls):
     def setUp(self, n_levels: int = 1):
         super().setUp()
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_download_task(self):
         return get_task(self.task_id)
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_upload_task(self):
         # We don't know if the task in question already exists, so we try a few times. Checking
         # beforehand would not be an option because a concurrent unit test could potentially
diff --git a/tests/test_tasks/test_task_functions.py b/tests/test_tasks/test_task_functions.py
index d44717177..7b75262e4 100644
--- a/tests/test_tasks/test_task_functions.py
+++ b/tests/test_tasks/test_task_functions.py
@@ -26,7 +26,7 @@ def setUp(self):
     def tearDown(self):
         super().tearDown()
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test__get_cached_tasks(self):
         openml.config.set_root_cache_directory(self.static_cache_dir)
         tasks = openml.tasks.functions._get_cached_tasks()
@@ -34,7 +34,7 @@ def test__get_cached_tasks(self):
         assert len(tasks) == 3
         assert isinstance(next(iter(tasks.values())), OpenMLTask)
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test__get_cached_task(self):
         openml.config.set_root_cache_directory(self.static_cache_dir)
         task = openml.tasks.functions._get_cached_task(1)
@@ -49,7 +49,7 @@ def test__get_cached_task_not_cached(self):
             2,
         )
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test__get_estimation_procedure_list(self):
         estimation_procedures = openml.tasks.functions._get_estimation_procedure_list()
         assert isinstance(estimation_procedures, list)
@@ -73,7 +73,7 @@ def _check_task(self, task):
         assert isinstance(task["status"], str)
         assert task["status"] in ["in_preparation", "active", "deactivated"]
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_list_tasks_by_type(self):
         num_curves_tasks = 198  # number is flexible, check server if fails
         ttid = TaskType.LEARNING_CURVE
@@ -83,18 +83,18 @@ def test_list_tasks_by_type(self):
             assert ttid == task["ttid"]
             self._check_task(task)
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_list_tasks_length(self):
         ttid = TaskType.LEARNING_CURVE
         tasks = openml.tasks.list_tasks(task_type=ttid)
         assert len(tasks) > 100
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_list_tasks_empty(self):
         tasks = openml.tasks.list_tasks(tag="NoOneWillEverUseThisTag")
         assert tasks.empty
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_list_tasks_by_tag(self):
         num_basic_tasks = 100  # number is flexible, check server if fails
         tasks = openml.tasks.list_tasks(tag="OpenML100")
@@ -102,14 +102,14 @@ def test_list_tasks_by_tag(self):
         for task in tasks.to_dict(orient="index").values():
             self._check_task(task)
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_list_tasks(self):
         tasks = openml.tasks.list_tasks()
         assert len(tasks) >= 900
         for task in tasks.to_dict(orient="index").values():
             self._check_task(task)
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_list_tasks_paginate(self):
         size = 10
         max = 100
@@ -119,7 +119,7 @@ def test_list_tasks_paginate(self):
             for task in tasks.to_dict(orient="index").values():
                 self._check_task(task)
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_list_tasks_per_type_paginate(self):
         size = 40
         max = 100
@@ -136,7 +136,7 @@ def test_list_tasks_per_type_paginate(self):
                     assert j == task["ttid"]
                     self._check_task(task)
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test__get_task(self):
         openml.config.set_root_cache_directory(self.static_cache_dir)
         openml.tasks.get_task(1882)
@@ -151,7 +151,7 @@ def test__get_task_live(self):
         # https://github.com/openml/openml-python/issues/378
         openml.tasks.get_task(34536)
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_get_task(self):
         task = openml.tasks.get_task(1, download_data=True)  # anneal; crossvalidation
         assert isinstance(task, OpenMLTask)
@@ -165,7 +165,7 @@ def test_get_task(self):
             os.path.join(self.workdir, "org", "openml", "test", "datasets", "1", "dataset.arff")
         )
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_get_task_lazy(self):
         task = openml.tasks.get_task(2, download_data=False)  # anneal; crossvalidation
         assert isinstance(task, OpenMLTask)
@@ -188,7 +188,7 @@ def test_get_task_lazy(self):
         )
 
     @mock.patch("openml.tasks.functions.get_dataset")
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_removal_upon_download_failure(self, get_dataset):
         class WeirdException(Exception):
             pass
@@ -206,7 +206,7 @@ def assert_and_raise(*args, **kwargs):
         # Now the file should no longer exist
         assert not os.path.exists(os.path.join(os.getcwd(), "tasks", "1", "tasks.xml"))
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_get_task_with_cache(self):
         openml.config.set_root_cache_directory(self.static_cache_dir)
         task = openml.tasks.get_task(1)
@@ -222,7 +222,7 @@ def test_get_task_different_types(self):
         # Issue 538, get_task failing with clustering task.
         openml.tasks.functions.get_task(126033)
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_download_split(self):
         task = openml.tasks.get_task(1)  # anneal; crossvalidation
         split = task.download_split()
diff --git a/tests/test_tasks/test_task_methods.py b/tests/test_tasks/test_task_methods.py
index 6b8804b9f..9316d0876 100644
--- a/tests/test_tasks/test_task_methods.py
+++ b/tests/test_tasks/test_task_methods.py
@@ -16,7 +16,7 @@ def setUp(self):
     def tearDown(self):
         super().tearDown()
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_tagging(self):
         task = openml.tasks.get_task(1)  # anneal; crossvalidation
         # tags can be at most 64 alphanumeric (+ underscore) chars
@@ -32,7 +32,7 @@ def test_tagging(self):
         tasks = openml.tasks.list_tasks(tag=tag)
         assert len(tasks) == 0
 
-    @pytest.mark.uses_test_server()
+    @pytest.mark.test_server()
     def test_get_train_and_test_split_indices(self):
         openml.config.set_root_cache_directory(self.static_cache_dir)
         task = openml.tasks.get_task(1882)
diff --git a/tests/test_utils/test_utils.py b/tests/test_utils/test_utils.py
index a1cdb55ea..4be6cd58a 100644
--- a/tests/test_utils/test_utils.py
+++ b/tests/test_utils/test_utils.py
@@ -48,18 +48,18 @@ def _mocked_perform_api_call(call, request_method):
     return openml._api_calls._download_text_file(url)
 
 
-@pytest.mark.uses_test_server()
+@pytest.mark.test_server()
 def test_list_all():
     openml.utils._list_all(listing_call=openml.tasks.functions._list_tasks)
 
 
-@pytest.mark.uses_test_server()
+@pytest.mark.test_server()
 def test_list_all_for_tasks(min_number_tasks_on_test_server):
     tasks = openml.tasks.list_tasks(size=min_number_tasks_on_test_server)
     assert min_number_tasks_on_test_server == len(tasks)
 
 
-@pytest.mark.uses_test_server()
+@pytest.mark.test_server()
 def test_list_all_with_multiple_batches(min_number_tasks_on_test_server):
     # By setting the batch size one lower than the minimum we guarantee at least two
     # batches and at the same time do as few batches (roundtrips) as possible.
@@ -72,7 +72,7 @@ def test_list_all_with_multiple_batches(min_number_tasks_on_test_server):
     assert min_number_tasks_on_test_server <= sum(len(batch) for batch in batches)
 
 
-@pytest.mark.uses_test_server()
+@pytest.mark.test_server()
 def test_list_all_for_datasets(min_number_datasets_on_test_server):
     datasets = openml.datasets.list_datasets(
         size=min_number_datasets_on_test_server,
@@ -83,14 +83,14 @@ def test_list_all_for_datasets(min_number_datasets_on_test_server):
         _check_dataset(dataset)
 
 
-@pytest.mark.uses_test_server()
+@pytest.mark.test_server()
 def test_list_all_for_flows(min_number_flows_on_test_server):
     flows = openml.flows.list_flows(size=min_number_flows_on_test_server)
     assert min_number_flows_on_test_server == len(flows)
 
 
 @pytest.mark.flaky()  # Other tests might need to upload runs first
-@pytest.mark.uses_test_server()
+@pytest.mark.test_server()
 def test_list_all_for_setups(min_number_setups_on_test_server):
     # TODO apparently list_setups function does not support kwargs
     setups = openml.setups.list_setups(size=min_number_setups_on_test_server)
@@ -98,14 +98,14 @@ def test_list_all_for_setups(min_number_setups_on_test_server):
 
 
 @pytest.mark.flaky()  # Other tests might need to upload runs first
-@pytest.mark.uses_test_server()
+@pytest.mark.test_server()
 def test_list_all_for_runs(min_number_runs_on_test_server):
     runs = openml.runs.list_runs(size=min_number_runs_on_test_server)
     assert min_number_runs_on_test_server == len(runs)
 
 
 @pytest.mark.flaky()  # Other tests might need to upload runs first
-@pytest.mark.uses_test_server()
+@pytest.mark.test_server()
 def test_list_all_for_evaluations(min_number_evaluations_on_test_server):
     # TODO apparently list_evaluations function does not support kwargs
     evaluations = openml.evaluations.list_evaluations(
@@ -116,7 +116,7 @@ def test_list_all_for_evaluations(min_number_evaluations_on_test_server):
 
 
 @unittest.mock.patch("openml._api_calls._perform_api_call", side_effect=_mocked_perform_api_call)
-@pytest.mark.uses_test_server()
+@pytest.mark.test_server()
 def test_list_all_few_results_available(_perform_api_call):
     datasets = openml.datasets.list_datasets(size=1000, data_name="iris", data_version=1)
     assert len(datasets) == 1, "only one iris dataset version 1 should be present"
@@ -141,7 +141,7 @@ def test__create_cache_directory(config_mock, tmp_path):
         openml.utils._create_cache_directory("ghi")
 
 
-@pytest.mark.uses_test_server()
+@pytest.mark.test_server()
 def test_correct_test_server_download_state():
     """This test verifies that the test server downloads the data from the correct source.