From 0380d0f774a7e14e2884711669922c446f5a9534 Mon Sep 17 00:00:00 2001 From: AniruthKarthik Date: Thu, 5 Feb 2026 12:46:35 +0530 Subject: [PATCH 1/4] Register uses_test_server pytest marker --- pytest.ini | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 pytest.ini diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 000000000..05d163208 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,4 @@ +[pytest] +markers = + uses_test_server: tests that require the OpenML test server + From d1b765f484569b5fe0376ba7a941ef8939fe3157 Mon Sep 17 00:00:00 2001 From: AniruthKarthik Date: Fri, 6 Feb 2026 22:46:02 +0530 Subject: [PATCH 2/4] Register uses_test_server pytest marker in pyproject.toml --- pytest.ini | 4 ---- 1 file changed, 4 deletions(-) delete mode 100644 pytest.ini diff --git a/pytest.ini b/pytest.ini deleted file mode 100644 index 05d163208..000000000 --- a/pytest.ini +++ /dev/null @@ -1,4 +0,0 @@ -[pytest] -markers = - uses_test_server: tests that require the OpenML test server - From 18cc404de31eb39079621fe505e1a1b150d4e044 Mon Sep 17 00:00:00 2001 From: AniruthKarthik Date: Mon, 16 Feb 2026 21:07:52 +0530 Subject: [PATCH 3/4] Register uses_test_server pytest marker in pyproject.toml --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 93a6ffbfa..9e1aa20e5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -137,6 +137,7 @@ markers = [ "upload: anything that uploads to a server", "production: any interaction with the production server", "cache: anything that interacts with the (test) cache", + "uses_test_server: tests that require the OpenML test server", ] # https://github.com/charliermarsh/ruff From 1ff94fe1af1cf220937270581c6c0a52e574807f Mon Sep 17 00:00:00 2001 From: AniruthKarthik Date: Mon, 16 Feb 2026 23:10:28 +0530 Subject: [PATCH 4/4] Rename uses_test_server to test_server and remove unused server marker --- pyproject.toml | 3 +- tests/test_datasets/test_dataset.py | 16 +-- tests/test_datasets/test_dataset_functions.py | 116 +++++++++--------- .../test_evaluation_functions.py | 4 +- tests/test_flows/test_flow.py | 16 +-- tests/test_flows/test_flow_functions.py | 10 +- tests/test_openml/test_api_calls.py | 6 +- tests/test_runs/test_run.py | 12 +- tests/test_runs/test_run_functions.py | 70 +++++------ tests/test_setups/test_setup_functions.py | 14 +-- tests/test_study/test_study_functions.py | 10 +- tests/test_tasks/test_classification_task.py | 6 +- tests/test_tasks/test_clustering_task.py | 4 +- tests/test_tasks/test_learning_curve_task.py | 6 +- tests/test_tasks/test_regression_task.py | 4 +- tests/test_tasks/test_supervised_task.py | 2 +- tests/test_tasks/test_task.py | 4 +- tests/test_tasks/test_task_functions.py | 32 ++--- tests/test_tasks/test_task_methods.py | 4 +- tests/test_utils/test_utils.py | 20 +-- 20 files changed, 179 insertions(+), 180 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 9e1aa20e5..880cc4f11 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -133,11 +133,10 @@ filterwarnings=[ "ignore:the matrix subclass:PendingDeprecationWarning" ] markers = [ - "server: anything that connects to a server", "upload: anything that uploads to a server", "production: any interaction with the production server", "cache: anything that interacts with the (test) cache", - "uses_test_server: tests that require the OpenML test server", + "test_server: tests that require the OpenML test server", ] # https://github.com/charliermarsh/ruff diff --git a/tests/test_datasets/test_dataset.py b/tests/test_datasets/test_dataset.py index b13bac30b..bc2270fd6 100644 --- a/tests/test_datasets/test_dataset.py +++ b/tests/test_datasets/test_dataset.py @@ -281,7 +281,7 @@ def test_equality_comparison(self): self.assertNotEqual(self.titanic, "Wrong_object") -@pytest.mark.uses_test_server() +@pytest.mark.test_server() def test_tagging(): dataset = openml.datasets.get_dataset(125, download_data=False) @@ -298,7 +298,7 @@ def test_tagging(): datasets = openml.datasets.list_datasets(tag=tag) assert datasets.empty -@pytest.mark.uses_test_server() +@pytest.mark.test_server() def test_get_feature_with_ontology_data_id_11(): # test on car dataset, which has built-in ontology references dataset = openml.datasets.get_dataset(11) @@ -307,7 +307,7 @@ def test_get_feature_with_ontology_data_id_11(): assert len(dataset.features[2].ontologies) >= 1 assert len(dataset.features[3].ontologies) >= 1 -@pytest.mark.uses_test_server() +@pytest.mark.test_server() def test_add_remove_ontology_to_dataset(): did = 1 feature_index = 1 @@ -315,7 +315,7 @@ def test_add_remove_ontology_to_dataset(): openml.datasets.functions.data_feature_add_ontology(did, feature_index, ontology) openml.datasets.functions.data_feature_remove_ontology(did, feature_index, ontology) -@pytest.mark.uses_test_server() +@pytest.mark.test_server() def test_add_same_ontology_multiple_features(): did = 1 ontology = "https://www.openml.org/unittest/" + str(time()) @@ -324,7 +324,7 @@ def test_add_same_ontology_multiple_features(): openml.datasets.functions.data_feature_add_ontology(did, i, ontology) -@pytest.mark.uses_test_server() +@pytest.mark.test_server() def test_add_illegal_long_ontology(): did = 1 ontology = "http://www.google.com/" + ("a" * 257) @@ -336,7 +336,7 @@ def test_add_illegal_long_ontology(): -@pytest.mark.uses_test_server() +@pytest.mark.test_server() def test_add_illegal_url_ontology(): did = 1 ontology = "not_a_url" + str(time()) @@ -408,7 +408,7 @@ def test_get_sparse_categorical_data_id_395(self): assert len(feature.nominal_values) == 25 -@pytest.mark.uses_test_server() +@pytest.mark.test_server() def test__read_features(mocker, workdir, static_cache_dir): """Test we read the features from the xml if no cache pickle is available. This test also does some simple checks to verify that the features are read correctly @@ -440,7 +440,7 @@ def test__read_features(mocker, workdir, static_cache_dir): assert pickle_mock.dump.call_count == 1 -@pytest.mark.uses_test_server() +@pytest.mark.test_server() def test__read_qualities(static_cache_dir, workdir, mocker): """Test we read the qualities from the xml if no cache pickle is available. This test also does some minor checks to ensure that the qualities are read correctly. diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py index c41664ba7..f67379a2d 100644 --- a/tests/test_datasets/test_dataset_functions.py +++ b/tests/test_datasets/test_dataset_functions.py @@ -107,7 +107,7 @@ def _check_datasets(self, datasets): for did in datasets: self._check_dataset(datasets[did]) - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_tag_untag_dataset(self): tag = "test_tag_%d" % random.randint(1, 1000000) all_tags = _tag_entity("data", 1, tag) @@ -115,12 +115,12 @@ def test_tag_untag_dataset(self): all_tags = _tag_entity("data", 1, tag, untag=True) assert tag not in all_tags - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_list_datasets_length(self): datasets = openml.datasets.list_datasets() assert len(datasets) >= 100 - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_list_datasets_paginate(self): size = 10 max = 100 @@ -135,7 +135,7 @@ def test_list_datasets_paginate(self): categories=["in_preparation", "active", "deactivated"], ) - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_list_datasets_empty(self): datasets = openml.datasets.list_datasets(tag="NoOneWouldUseThisTagAnyway") assert datasets.empty @@ -159,7 +159,7 @@ def test_check_datasets_active(self): ) openml.config.server = self.test_server - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_illegal_character_tag(self): dataset = openml.datasets.get_dataset(1) tag = "illegal_tag&" @@ -169,7 +169,7 @@ def test_illegal_character_tag(self): except openml.exceptions.OpenMLServerException as e: assert e.code == 477 - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_illegal_length_tag(self): dataset = openml.datasets.get_dataset(1) tag = "a" * 65 @@ -211,7 +211,7 @@ def test__name_to_id_with_multiple_active_error(self): error_if_multiple=True, ) - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test__name_to_id_name_does_not_exist(self): """With multiple active datasets, retrieve the least recent active.""" self.assertRaisesRegex( @@ -221,7 +221,7 @@ def test__name_to_id_name_does_not_exist(self): dataset_name="does_not_exist", ) - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test__name_to_id_version_does_not_exist(self): """With multiple active datasets, retrieve the least recent active.""" self.assertRaisesRegex( @@ -232,7 +232,7 @@ def test__name_to_id_version_does_not_exist(self): version=100000, ) - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_get_datasets_by_name(self): # did 1 and 2 on the test server: dids = ["anneal", "kr-vs-kp"] @@ -240,7 +240,7 @@ def test_get_datasets_by_name(self): assert len(datasets) == 2 _assert_datasets_retrieved_successfully([1, 2]) - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_get_datasets_by_mixed(self): # did 1 and 2 on the test server: dids = ["anneal", 2] @@ -248,14 +248,14 @@ def test_get_datasets_by_mixed(self): assert len(datasets) == 2 _assert_datasets_retrieved_successfully([1, 2]) - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_get_datasets(self): dids = [1, 2] datasets = openml.datasets.get_datasets(dids) assert len(datasets) == 2 _assert_datasets_retrieved_successfully([1, 2]) - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_get_dataset_by_name(self): dataset = openml.datasets.get_dataset("anneal") assert type(dataset) == OpenMLDataset @@ -274,7 +274,7 @@ def test_get_dataset_download_all_files(self): # test_get_dataset_lazy raise NotImplementedError - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_get_dataset_uint8_dtype(self): dataset = openml.datasets.get_dataset(1) assert type(dataset) == OpenMLDataset @@ -293,7 +293,7 @@ def test_dataset_by_name_cannot_access_private_data(self): self.use_production_server() self.assertRaises(OpenMLPrivateDatasetError, openml.datasets.get_dataset, "NAME_GOES_HERE") - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_get_dataset_lazy_all_functions(self): """Test that all expected functionality is available without downloading the dataset.""" dataset = openml.datasets.get_dataset(1) @@ -323,28 +323,28 @@ def ensure_absence_of_real_data(): assert classes == ["1", "2", "3", "4", "5", "U"] ensure_absence_of_real_data() - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_get_dataset_sparse(self): dataset = openml.datasets.get_dataset(102) X, *_ = dataset.get_data() assert isinstance(X, pd.DataFrame) assert all(isinstance(col, pd.SparseDtype) for col in X.dtypes) - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_download_rowid(self): # Smoke test which checks that the dataset has the row-id set correctly did = 44 dataset = openml.datasets.get_dataset(did) assert dataset.row_id_attribute == "Counter" - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test__get_dataset_description(self): description = _get_dataset_description(self.workdir, 2) assert isinstance(description, dict) description_xml_path = os.path.join(self.workdir, "description.xml") assert os.path.exists(description_xml_path) - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test__getarff_path_dataset_arff(self): openml.config.set_root_cache_directory(self.static_cache_dir) description = _get_dataset_description(self.workdir, 2) @@ -408,7 +408,7 @@ def test__download_minio_file_works_with_bucket_subdirectory(self): @mock.patch("openml._api_calls._download_minio_file") - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test__get_dataset_parquet_is_cached(self, patch): openml.config.set_root_cache_directory(self.static_cache_dir) patch.side_effect = RuntimeError( @@ -449,21 +449,21 @@ def test__getarff_md5_issue(self): openml.config.connection_n_retries = n - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test__get_dataset_features(self): features_file = _get_dataset_features_file(self.workdir, 2) assert isinstance(features_file, Path) features_xml_path = self.workdir / "features.xml" assert features_xml_path.exists() - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test__get_dataset_qualities(self): qualities = _get_dataset_qualities_file(self.workdir, 2) assert isinstance(qualities, Path) qualities_xml_path = self.workdir / "qualities.xml" assert qualities_xml_path.exists() - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_get_dataset_force_refresh_cache(self): did_cache_dir = _create_cache_directory_for_id( DATASETS_CACHE_DIR_NAME, @@ -486,7 +486,7 @@ def test_get_dataset_force_refresh_cache(self): did_cache_dir, ) - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_get_dataset_force_refresh_cache_clean_start(self): did_cache_dir = _create_cache_directory_for_id( DATASETS_CACHE_DIR_NAME, @@ -523,14 +523,14 @@ def test_deletion_of_cache_dir(self): # get_dataset_description is the only data guaranteed to be downloaded @mock.patch("openml.datasets.functions._get_dataset_description") - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_deletion_of_cache_dir_faulty_download(self, patch): patch.side_effect = Exception("Boom!") self.assertRaisesRegex(Exception, "Boom!", openml.datasets.get_dataset, dataset_id=1) datasets_cache_dir = os.path.join(self.workdir, "org", "openml", "test", "datasets") assert len(os.listdir(datasets_cache_dir)) == 0 - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_publish_dataset(self): # lazy loading not possible as we need the arff-file. openml.datasets.get_dataset(3, download_data=True) @@ -556,7 +556,7 @@ def test_publish_dataset(self): ) assert isinstance(dataset.dataset_id, int) - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test__retrieve_class_labels(self): openml.config.set_root_cache_directory(self.static_cache_dir) labels = openml.datasets.get_dataset(2).retrieve_class_labels() @@ -573,7 +573,7 @@ def test__retrieve_class_labels(self): labels = custom_ds.retrieve_class_labels(target_name=custom_ds.features[31].name) assert labels == ["COIL", "SHEET"] - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_upload_dataset_with_url(self): dataset = OpenMLDataset( f"{self._get_sentinel()}-UploadTestWithURL", @@ -600,7 +600,7 @@ def _assert_status_of_dataset(self, *, did: int, status: str): assert result[did]["status"] == status @pytest.mark.flaky() - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_data_status(self): dataset = OpenMLDataset( f"{self._get_sentinel()}-UploadTestWithURL", @@ -692,7 +692,7 @@ def test_attributes_arff_from_df_unknown_dtype(self): with pytest.raises(ValueError, match=err_msg): attributes_arff_from_df(df) - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_create_dataset_numpy(self): data = np.array([[1, 2, 3], [1.2, 2.5, 3.8], [2, 5, 8], [0, 1, 0]]).T @@ -726,7 +726,7 @@ def test_create_dataset_numpy(self): ), "Uploaded arff does not match original one" assert _get_online_dataset_format(dataset.id) == "arff", "Wrong format for dataset" - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_create_dataset_list(self): data = [ ["a", "sunny", 85.0, 85.0, "FALSE", "no"], @@ -781,7 +781,7 @@ def test_create_dataset_list(self): ), "Uploaded ARFF does not match original one" assert _get_online_dataset_format(dataset.id) == "arff", "Wrong format for dataset" - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_create_dataset_sparse(self): # test the scipy.sparse.coo_matrix sparse_data = scipy.sparse.coo_matrix( @@ -884,7 +884,7 @@ def test_create_invalid_dataset(self): param["data"] = data[0] self.assertRaises(ValueError, create_dataset, **param) - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_get_online_dataset_arff(self): dataset_id = 100 # Australian # lazy loading not used as arff file is checked. @@ -900,7 +900,7 @@ def test_get_online_dataset_arff(self): return_type=arff.DENSE if d_format == "arff" else arff.COO, ), "ARFF files are not equal" - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_topic_api_error(self): # Check server exception when non-admin accessses apis self.assertRaisesRegex( @@ -919,7 +919,7 @@ def test_topic_api_error(self): topic="business", ) - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_get_online_dataset_format(self): # Phoneme dataset dataset_id = 77 @@ -929,7 +929,7 @@ def test_get_online_dataset_format(self): dataset_id ), "The format of the ARFF files is different" - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_create_dataset_pandas(self): data = [ ["a", "sunny", 85.0, 85.0, "FALSE", "no"], @@ -1154,7 +1154,7 @@ def test_ignore_attributes_dataset(self): paper_url=paper_url, ) - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_publish_fetch_ignore_attribute(self): """Test to upload and retrieve dataset and check ignore_attributes""" data = [ @@ -1273,7 +1273,7 @@ def test_create_dataset_row_id_attribute_error(self): paper_url=paper_url, ) - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_create_dataset_row_id_attribute_inference(self): # meta-information name = f"{self._get_sentinel()}-pandas_testing_dataset" @@ -1364,13 +1364,13 @@ def test_create_dataset_attributes_auto_without_df(self): paper_url=paper_url, ) - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_list_qualities(self): qualities = openml.datasets.list_qualities() assert isinstance(qualities, list) is True assert all(isinstance(q, str) for q in qualities) is True - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_get_dataset_cache_format_pickle(self): dataset = openml.datasets.get_dataset(1) dataset.get_data() @@ -1386,7 +1386,7 @@ def test_get_dataset_cache_format_pickle(self): assert len(categorical) == X.shape[1] assert len(attribute_names) == X.shape[1] - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_get_dataset_cache_format_feather(self): # This test crashed due to using the parquet file by default, which is downloaded # from minio. However, there is a mismatch between OpenML test server and minio IDs. @@ -1419,7 +1419,7 @@ def test_get_dataset_cache_format_feather(self): assert len(categorical) == X.shape[1] assert len(attribute_names) == X.shape[1] - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_data_edit_non_critical_field(self): # Case 1 # All users can edit non-critical fields of datasets @@ -1441,7 +1441,7 @@ def test_data_edit_non_critical_field(self): edited_dataset = openml.datasets.get_dataset(did) assert edited_dataset.description == desc - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_data_edit_critical_field(self): # Case 2 # only owners (or admin) can edit all critical fields of datasets @@ -1468,7 +1468,7 @@ def test_data_edit_critical_field(self): os.path.join(self.workdir, "org", "openml", "test", "datasets", str(did)), ) - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_data_edit_requires_field(self): # Check server exception when no field to edit is provided self.assertRaisesRegex( @@ -1481,7 +1481,7 @@ def test_data_edit_requires_field(self): data_id=64, # blood-transfusion-service-center ) - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_data_edit_requires_valid_dataset(self): # Check server exception when unknown dataset is provided self.assertRaisesRegex( @@ -1492,7 +1492,7 @@ def test_data_edit_requires_valid_dataset(self): description="xor operation dataset", ) - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_data_edit_cannot_edit_critical_field_if_dataset_has_task(self): # Need to own a dataset to be able to edit meta-data # Will be creating a forked version of an existing dataset to allow the unit test user @@ -1519,7 +1519,7 @@ def test_data_edit_cannot_edit_critical_field_if_dataset_has_task(self): default_target_attribute="y", ) - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_edit_data_user_cannot_edit_critical_field_of_other_users_dataset(self): # Check server exception when a non-owner or non-admin tries to edit critical fields self.assertRaisesRegex( @@ -1531,7 +1531,7 @@ def test_edit_data_user_cannot_edit_critical_field_of_other_users_dataset(self): default_target_attribute="y", ) - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_data_fork(self): did = 1 result = fork_dataset(did) @@ -1823,7 +1823,7 @@ def all_datasets(): return openml.datasets.list_datasets() -@pytest.mark.uses_test_server() +@pytest.mark.test_server() def test_list_datasets(all_datasets: pd.DataFrame): # We can only perform a smoke test here because we test on dynamic # data from the internet... @@ -1832,49 +1832,49 @@ def test_list_datasets(all_datasets: pd.DataFrame): _assert_datasets_have_id_and_valid_status(all_datasets) -@pytest.mark.uses_test_server() +@pytest.mark.test_server() def test_list_datasets_by_tag(all_datasets: pd.DataFrame): tag_datasets = openml.datasets.list_datasets(tag="study_14") assert 0 < len(tag_datasets) < len(all_datasets) _assert_datasets_have_id_and_valid_status(tag_datasets) -@pytest.mark.uses_test_server() +@pytest.mark.test_server() def test_list_datasets_by_size(): datasets = openml.datasets.list_datasets(size=5) assert len(datasets) == 5 _assert_datasets_have_id_and_valid_status(datasets) -@pytest.mark.uses_test_server() +@pytest.mark.test_server() def test_list_datasets_by_number_instances(all_datasets: pd.DataFrame): small_datasets = openml.datasets.list_datasets(number_instances="5..100") assert 0 < len(small_datasets) <= len(all_datasets) _assert_datasets_have_id_and_valid_status(small_datasets) -@pytest.mark.uses_test_server() +@pytest.mark.test_server() def test_list_datasets_by_number_features(all_datasets: pd.DataFrame): wide_datasets = openml.datasets.list_datasets(number_features="50..100") assert 8 <= len(wide_datasets) < len(all_datasets) _assert_datasets_have_id_and_valid_status(wide_datasets) -@pytest.mark.uses_test_server() +@pytest.mark.test_server() def test_list_datasets_by_number_classes(all_datasets: pd.DataFrame): five_class_datasets = openml.datasets.list_datasets(number_classes="5") assert 3 <= len(five_class_datasets) < len(all_datasets) _assert_datasets_have_id_and_valid_status(five_class_datasets) -@pytest.mark.uses_test_server() +@pytest.mark.test_server() def test_list_datasets_by_number_missing_values(all_datasets: pd.DataFrame): na_datasets = openml.datasets.list_datasets(number_missing_values="5..100") assert 5 <= len(na_datasets) < len(all_datasets) _assert_datasets_have_id_and_valid_status(na_datasets) -@pytest.mark.uses_test_server() +@pytest.mark.test_server() def test_list_datasets_combined_filters(all_datasets: pd.DataFrame): combined_filter_datasets = openml.datasets.list_datasets( tag="study_14", @@ -1947,7 +1947,7 @@ def isolate_for_test(): ("with_data", "with_qualities", "with_features"), itertools.product([True, False], repeat=3), ) -@pytest.mark.uses_test_server() +@pytest.mark.test_server() def test_get_dataset_lazy_behavior( isolate_for_test, with_data: bool, with_qualities: bool, with_features: bool ): @@ -1974,7 +1974,7 @@ def test_get_dataset_lazy_behavior( ) -@pytest.mark.uses_test_server() +@pytest.mark.test_server() def test_get_dataset_with_invalid_id() -> None: INVALID_ID = 123819023109238 # Well, at some point this will probably be valid... with pytest.raises(OpenMLServerNoResult, match="Unknown dataset") as e: @@ -2002,7 +2002,7 @@ def test_read_features_from_xml_with_whitespace() -> None: assert dict[1].nominal_values == [" - 50000.", " 50000+."] -@pytest.mark.uses_test_server() +@pytest.mark.test_server() def test_get_dataset_parquet(requests_mock, test_files_directory): # Parquet functionality is disabled on the test server # There is no parquet-copy of the test server yet. diff --git a/tests/test_evaluations/test_evaluation_functions.py b/tests/test_evaluations/test_evaluation_functions.py index ee7c306a1..630aa7b4e 100644 --- a/tests/test_evaluations/test_evaluation_functions.py +++ b/tests/test_evaluations/test_evaluation_functions.py @@ -155,7 +155,7 @@ def test_evaluation_list_limit(self): ) assert len(evaluations) == 100 - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_list_evaluations_empty(self): evaluations = openml.evaluations.list_evaluations("unexisting_measure") if len(evaluations) > 0: @@ -233,7 +233,7 @@ def test_evaluation_list_sort(self): test_output = sorted(unsorted_output, reverse=True) assert test_output[:size] == sorted_output - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_list_evaluation_measures(self): measures = openml.evaluations.list_evaluation_measures() assert isinstance(measures, list) is True diff --git a/tests/test_flows/test_flow.py b/tests/test_flows/test_flow.py index 527ad1f8c..06bbbe4d5 100644 --- a/tests/test_flows/test_flow.py +++ b/tests/test_flows/test_flow.py @@ -103,7 +103,7 @@ def test_get_structure(self): subflow = flow.get_subflow(structure) assert subflow.flow_id == sub_flow_id - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_tagging(self): flows = openml.flows.list_flows(size=1) flow_id = flows["id"].iloc[0] @@ -121,7 +121,7 @@ def test_tagging(self): flows = openml.flows.list_flows(tag=tag) assert len(flows) == 0 - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_from_xml_to_xml(self): # Get the raw xml thing # TODO maybe get this via get_flow(), which would have to be refactored @@ -181,7 +181,7 @@ def test_to_xml_from_xml(self): assert new_flow is not flow @pytest.mark.sklearn() - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_publish_flow(self): flow = openml.OpenMLFlow( name="sklearn.dummy.DummyClassifier", @@ -223,7 +223,7 @@ def test_publish_existing_flow(self, flow_exists_mock): ) @pytest.mark.sklearn() - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_publish_flow_with_similar_components(self): clf = sklearn.ensemble.VotingClassifier( [("lr", sklearn.linear_model.LogisticRegression(solver="lbfgs"))], @@ -274,7 +274,7 @@ def test_publish_flow_with_similar_components(self): TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {flow3.flow_id}") @pytest.mark.sklearn() - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_semi_legal_flow(self): # TODO: Test if parameters are set correctly! # should not throw error as it contains two differentiable forms of @@ -366,7 +366,7 @@ def test_illegal_flow(self): ) self.assertRaises(ValueError, self.extension.model_to_flow, illegal) - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_nonexisting_flow_exists(self): def get_sentinel(): # Create a unique prefix for the flow. Necessary because the flow @@ -384,7 +384,7 @@ def get_sentinel(): assert not flow_id @pytest.mark.sklearn() - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_existing_flow_exists(self): # create a flow nb = sklearn.naive_bayes.GaussianNB() @@ -425,7 +425,7 @@ def test_existing_flow_exists(self): assert downloaded_flow_id == flow.flow_id @pytest.mark.sklearn() - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_sklearn_to_upload_to_flow(self): iris = sklearn.datasets.load_iris() X = iris.data diff --git a/tests/test_flows/test_flow_functions.py b/tests/test_flows/test_flow_functions.py index 5aa99cd62..300b1e4e3 100644 --- a/tests/test_flows/test_flow_functions.py +++ b/tests/test_flows/test_flow_functions.py @@ -280,7 +280,7 @@ def test_are_flows_equal_ignore_if_older(self): reason="OrdinalEncoder introduced in 0.20. " "No known models with list of lists parameters in older versions.", ) - @pytest.mark.uses_test_server() + @pytest.mark.test_server() @pytest.mark.xfail(reason="failures_issue_1544", strict=False) def test_sklearn_to_flow_list_of_lists(self): from sklearn.preprocessing import OrdinalEncoder @@ -310,7 +310,7 @@ def test_get_flow1(self): assert flow.external_version is None @pytest.mark.sklearn() - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_get_flow_reinstantiate_model(self): model = ensemble.RandomForestClassifier(n_estimators=33) extension = openml.extensions.get_extension_by_model(model) @@ -322,7 +322,7 @@ def test_get_flow_reinstantiate_model(self): downloaded_flow = openml.flows.get_flow(flow.flow_id, reinstantiate=True) assert isinstance(downloaded_flow.model, sklearn.ensemble.RandomForestClassifier) - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_get_flow_reinstantiate_model_no_extension(self): # Flow 10 is a WEKA flow self.assertRaisesRegex( @@ -393,7 +393,7 @@ def test_get_flow_reinstantiate_flow_not_strict_pre_023(self): assert "sklearn==0.19.1" not in flow.dependencies @pytest.mark.sklearn() - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_get_flow_id(self): if self.long_version: list_all = openml.utils._list_all @@ -428,7 +428,7 @@ def test_get_flow_id(self): pytest.skip(reason="Not sure why there should only be one version of this flow.") assert flow_ids_exact_version_True == flow_ids_exact_version_False - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_delete_flow(self): flow = openml.OpenMLFlow( name="sklearn.dummy.DummyClassifier", diff --git a/tests/test_openml/test_api_calls.py b/tests/test_openml/test_api_calls.py index a295259ef..c8d5be25b 100644 --- a/tests/test_openml/test_api_calls.py +++ b/tests/test_openml/test_api_calls.py @@ -15,14 +15,14 @@ class TestConfig(openml.testing.TestBase): - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_too_long_uri(self): with pytest.raises(openml.exceptions.OpenMLServerError, match="URI too long!"): openml.datasets.list_datasets(data_id=list(range(10000))) @unittest.mock.patch("time.sleep") @unittest.mock.patch("requests.Session") - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_retry_on_database_error(self, Session_class_mock, _): response_mock = unittest.mock.Mock() response_mock.text = ( @@ -117,7 +117,7 @@ def test_download_minio_failure(mock_minio, tmp_path: Path) -> None: ("task/42", "delete"), # 460 ], ) -@pytest.mark.uses_test_server() +@pytest.mark.test_server() def test_authentication_endpoints_requiring_api_key_show_relevant_help_link( endpoint: str, method: str, diff --git a/tests/test_runs/test_run.py b/tests/test_runs/test_run.py index 1a66b76c0..17349fca8 100644 --- a/tests/test_runs/test_run.py +++ b/tests/test_runs/test_run.py @@ -25,7 +25,7 @@ class TestRun(TestBase): # Splitting not helpful, these test's don't rely on the server and take # less than 1 seconds - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_tagging(self): runs = openml.runs.list_runs(size=1) assert not runs.empty, "Test server state is incorrect" @@ -119,7 +119,7 @@ def _check_array(array, type_): assert run_prime_trace_content is None @pytest.mark.sklearn() - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_to_from_filesystem_vanilla(self): model = Pipeline( [ @@ -155,7 +155,7 @@ def test_to_from_filesystem_vanilla(self): @pytest.mark.sklearn() @pytest.mark.flaky() - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_to_from_filesystem_search(self): model = Pipeline( [ @@ -190,7 +190,7 @@ def test_to_from_filesystem_search(self): ) @pytest.mark.sklearn() - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_to_from_filesystem_no_model(self): model = Pipeline( [("imputer", SimpleImputer(strategy="mean")), ("classifier", DummyClassifier())], @@ -296,7 +296,7 @@ def assert_run_prediction_data(task, run, model): assert_method(y_test, saved_y_test) @pytest.mark.sklearn() - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_publish_with_local_loaded_flow(self): """ Publish a run tied to a local flow after it has first been saved to @@ -340,7 +340,7 @@ def test_publish_with_local_loaded_flow(self): openml.runs.get_run(loaded_run.run_id) @pytest.mark.sklearn() - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_offline_and_online_run_identical(self): extension = SklearnExtension() diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py index 8f2c505b7..b094bf948 100644 --- a/tests/test_runs/test_run_functions.py +++ b/tests/test_runs/test_run_functions.py @@ -398,7 +398,7 @@ def _check_sample_evaluations( assert evaluation < max_time_allowed @pytest.mark.sklearn() - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_run_regression_on_classif_task(self): task_id = 259 # collins; crossvalidation; has numeric targets @@ -415,7 +415,7 @@ def test_run_regression_on_classif_task(self): ) @pytest.mark.sklearn() - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_check_erronous_sklearn_flow_fails(self): task_id = 115 # diabetes; crossvalidation task = openml.tasks.get_task(task_id) @@ -628,7 +628,7 @@ def _run_and_upload_regression( ) @pytest.mark.sklearn() - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_run_and_upload_logistic_regression(self): lr = LogisticRegression(solver="lbfgs", max_iter=1000) task_id = self.TEST_SERVER_TASK_SIMPLE["task_id"] @@ -637,7 +637,7 @@ def test_run_and_upload_logistic_regression(self): self._run_and_upload_classification(lr, task_id, n_missing_vals, n_test_obs, "62501") @pytest.mark.sklearn() - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_run_and_upload_linear_regression(self): lr = LinearRegression() task_id = self.TEST_SERVER_TASK_REGRESSION["task_id"] @@ -668,7 +668,7 @@ def test_run_and_upload_linear_regression(self): self._run_and_upload_regression(lr, task_id, n_missing_vals, n_test_obs, "62501") @pytest.mark.sklearn() - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_run_and_upload_pipeline_dummy_pipeline(self): pipeline1 = Pipeline( steps=[ @@ -686,7 +686,7 @@ def test_run_and_upload_pipeline_dummy_pipeline(self): Version(sklearn.__version__) < Version("0.20"), reason="columntransformer introduction in 0.20.0", ) - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_run_and_upload_column_transformer_pipeline(self): import sklearn.compose import sklearn.impute @@ -799,7 +799,7 @@ def test_run_and_upload_knn_pipeline(self, warnings_mock): assert call_count == 3 @pytest.mark.sklearn() - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_run_and_upload_gridsearch(self): estimator_name = ( "base_estimator" if Version(sklearn.__version__) < Version("1.4") else "estimator" @@ -822,7 +822,7 @@ def test_run_and_upload_gridsearch(self): assert len(run.trace.trace_iterations) == 9 @pytest.mark.sklearn() - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_run_and_upload_randomsearch(self): randomsearch = RandomizedSearchCV( RandomForestClassifier(n_estimators=5), @@ -855,7 +855,7 @@ def test_run_and_upload_randomsearch(self): assert len(trace.trace_iterations) == 5 @pytest.mark.sklearn() - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_run_and_upload_maskedarrays(self): # This testcase is important for 2 reasons: # 1) it verifies the correct handling of masked arrays (not all @@ -883,7 +883,7 @@ def test_run_and_upload_maskedarrays(self): ########################################################################## @pytest.mark.sklearn() - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_learning_curve_task_1(self): task_id = 801 # diabates dataset num_test_instances = 6144 # for learning curve @@ -908,7 +908,7 @@ def test_learning_curve_task_1(self): self._check_sample_evaluations(run.sample_evaluations, num_repeats, num_folds, num_samples) @pytest.mark.sklearn() - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_learning_curve_task_2(self): task_id = 801 # diabates dataset num_test_instances = 6144 # for learning curve @@ -949,7 +949,7 @@ def test_learning_curve_task_2(self): Version(sklearn.__version__) < Version("0.21"), reason="Pipelines don't support indexing (used for the assert check)", ) - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_initialize_cv_from_run(self): randomsearch = Pipeline( [ @@ -1024,7 +1024,7 @@ def _test_local_evaluations(self, run): assert alt_scores[idx] <= 1 @pytest.mark.sklearn() - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_local_run_swapped_parameter_order_model(self): clf = DecisionTreeClassifier() australian_task = 595 # Australian; crossvalidation @@ -1044,7 +1044,7 @@ def test_local_run_swapped_parameter_order_model(self): Version(sklearn.__version__) < Version("0.20"), reason="SimpleImputer doesn't handle mixed type DataFrame as input", ) - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_local_run_swapped_parameter_order_flow(self): # construct sci-kit learn classifier clf = Pipeline( @@ -1073,7 +1073,7 @@ def test_local_run_swapped_parameter_order_flow(self): Version(sklearn.__version__) < Version("0.20"), reason="SimpleImputer doesn't handle mixed type DataFrame as input", ) - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_local_run_metric_score(self): # construct sci-kit learn classifier clf = Pipeline( @@ -1111,7 +1111,7 @@ def test_online_run_metric_score(self): Version(sklearn.__version__) < Version("0.20"), reason="SimpleImputer doesn't handle mixed type DataFrame as input", ) - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_initialize_model_from_run(self): clf = sklearn.pipeline.Pipeline( steps=[ @@ -1173,7 +1173,7 @@ def test_initialize_model_from_run(self): Version(sklearn.__version__) < Version("0.20"), reason="SimpleImputer doesn't handle mixed type DataFrame as input", ) - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test__run_exists(self): # would be better to not sentinel these clfs, # so we do not have to perform the actual runs @@ -1229,7 +1229,7 @@ def test__run_exists(self): assert run_ids, (run_ids, clf) @pytest.mark.sklearn() - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_run_with_illegal_flow_id(self): # check the case where the user adds an illegal flow id to a # non-existing flo @@ -1249,7 +1249,7 @@ def test_run_with_illegal_flow_id(self): ) @pytest.mark.sklearn() - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_run_with_illegal_flow_id_after_load(self): # Same as `test_run_with_illegal_flow_id`, but test this error is also # caught if the run is stored to and loaded from disk first. @@ -1281,7 +1281,7 @@ def test_run_with_illegal_flow_id_after_load(self): TestBase.logger.info(f"collected from test_run_functions: {loaded_run.run_id}") @pytest.mark.sklearn() - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_run_with_illegal_flow_id_1(self): # Check the case where the user adds an illegal flow id to an existing # flow. Comes to a different value error than the previous test @@ -1307,7 +1307,7 @@ def test_run_with_illegal_flow_id_1(self): ) @pytest.mark.sklearn() - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_run_with_illegal_flow_id_1_after_load(self): # Same as `test_run_with_illegal_flow_id_1`, but test this error is # also caught if the run is stored to and loaded from disk first. @@ -1350,7 +1350,7 @@ def test_run_with_illegal_flow_id_1_after_load(self): Version(sklearn.__version__) < Version("0.20"), reason="OneHotEncoder cannot handle mixed type DataFrame as input", ) - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test__run_task_get_arffcontent(self): task = openml.tasks.get_task(7) # kr-vs-kp; crossvalidation num_instances = 3196 @@ -1451,7 +1451,7 @@ def test_get_runs_list(self): for run in runs.to_dict(orient="index").values(): self._check_run(run) - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_list_runs_empty(self): runs = openml.runs.list_runs(task=[0]) assert runs.empty @@ -1580,7 +1580,7 @@ def test_get_runs_list_by_tag(self): Version(sklearn.__version__) < Version("0.20"), reason="columntransformer introduction in 0.20.0", ) - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_run_on_dataset_with_missing_labels_dataframe(self): # Check that _run_task_get_arffcontent works when one of the class # labels only declared in the arff file, but is not present in the @@ -1617,7 +1617,7 @@ def test_run_on_dataset_with_missing_labels_dataframe(self): Version(sklearn.__version__) < Version("0.20"), reason="columntransformer introduction in 0.20.0", ) - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_run_on_dataset_with_missing_labels_array(self): # Check that _run_task_get_arffcontent works when one of the class # labels only declared in the arff file, but is not present in the @@ -1656,7 +1656,7 @@ def test_run_on_dataset_with_missing_labels_array(self): # repeat, fold, row_id, 6 confidences, prediction and correct label assert len(row) == 12 - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_get_cached_run(self): openml.config.set_root_cache_directory(self.static_cache_dir) openml.runs.functions._get_cached_run(1) @@ -1667,7 +1667,7 @@ def test_get_uncached_run(self): openml.runs.functions._get_cached_run(10) @pytest.mark.sklearn() - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_run_flow_on_task_downloaded_flow(self): model = sklearn.ensemble.RandomForestClassifier(n_estimators=33) flow = self.extension.model_to_flow(model) @@ -1698,7 +1698,7 @@ def test_format_prediction_non_supervised(self): ): format_prediction(clustering, *ignored_input) - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_format_prediction_classification_no_probabilities(self): classification = openml.tasks.get_task( self.TEST_SERVER_TASK_SIMPLE["task_id"], @@ -1708,7 +1708,7 @@ def test_format_prediction_classification_no_probabilities(self): with pytest.raises(ValueError, match="`proba` is required for classification task"): format_prediction(classification, *ignored_input, proba=None) - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_format_prediction_classification_incomplete_probabilities(self): classification = openml.tasks.get_task( self.TEST_SERVER_TASK_SIMPLE["task_id"], @@ -1719,7 +1719,7 @@ def test_format_prediction_classification_incomplete_probabilities(self): with pytest.raises(ValueError, match="Each class should have a predicted probability"): format_prediction(classification, *ignored_input, proba=incomplete_probabilities) - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_format_prediction_task_without_classlabels_set(self): classification = openml.tasks.get_task( self.TEST_SERVER_TASK_SIMPLE["task_id"], @@ -1730,7 +1730,7 @@ def test_format_prediction_task_without_classlabels_set(self): with pytest.raises(ValueError, match="The classification task must have class labels set"): format_prediction(classification, *ignored_input, proba={}) - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_format_prediction_task_learning_curve_sample_not_set(self): learning_curve = openml.tasks.get_task(801, download_data=False) # diabetes;crossvalidation probabilities = {c: 0.2 for c in learning_curve.class_labels} @@ -1738,7 +1738,7 @@ def test_format_prediction_task_learning_curve_sample_not_set(self): with pytest.raises(ValueError, match="`sample` can not be none for LearningCurveTask"): format_prediction(learning_curve, *ignored_input, sample=None, proba=probabilities) - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_format_prediction_task_regression(self): task_meta_data = self.TEST_SERVER_TASK_REGRESSION["task_meta_data"] _task_id = check_task_existence(**task_meta_data) @@ -1773,7 +1773,7 @@ def test_format_prediction_task_regression(self): reason="SimpleImputer doesn't handle mixed type DataFrame as input", ) @pytest.mark.sklearn() - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_delete_run(self): rs = np.random.randint(1, 2**31 - 1) clf = sklearn.pipeline.Pipeline( @@ -1874,7 +1874,7 @@ def test_delete_unknown_run(mock_delete, test_files_directory, test_api_key): reason="couldn't perform local tests successfully w/o bloating RAM", ) @mock.patch("openml_sklearn.SklearnExtension._prevent_optimize_n_jobs") -@pytest.mark.uses_test_server() +@pytest.mark.test_server() def test__run_task_get_arffcontent_2(parallel_mock): """Tests if a run executed in parallel is collated correctly.""" task = openml.tasks.get_task(7) # Supervised Classification on kr-vs-kp @@ -1965,7 +1965,7 @@ def test__run_task_get_arffcontent_2(parallel_mock): (-1, "threading", 10), # the threading backend does preserve mocks even with parallelizing ] ) -@pytest.mark.uses_test_server() +@pytest.mark.test_server() def test_joblib_backends(parallel_mock, n_jobs, backend, call_count): """Tests evaluation of a run using various joblib backends and n_jobs.""" if backend is None: diff --git a/tests/test_setups/test_setup_functions.py b/tests/test_setups/test_setup_functions.py index a0469f9a5..0795d392c 100644 --- a/tests/test_setups/test_setup_functions.py +++ b/tests/test_setups/test_setup_functions.py @@ -35,7 +35,7 @@ def setUp(self): super().setUp() @pytest.mark.sklearn() - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_nonexisting_setup_exists(self): # first publish a non-existing flow sentinel = get_sentinel() @@ -83,7 +83,7 @@ def _existing_setup_exists(self, classif): assert setup_id == run.setup_id @pytest.mark.sklearn() - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_existing_setup_exists_1(self): def side_effect(self): self.var_smoothing = 1e-9 @@ -99,13 +99,13 @@ def side_effect(self): self._existing_setup_exists(nb) @pytest.mark.sklearn() - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_exisiting_setup_exists_2(self): # Check a flow with one hyperparameter self._existing_setup_exists(sklearn.naive_bayes.GaussianNB()) @pytest.mark.sklearn() - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_existing_setup_exists_3(self): # Check a flow with many hyperparameters self._existing_setup_exists( @@ -147,7 +147,7 @@ def test_setup_list_filter_flow(self): for setup_id in setups: assert setups[setup_id].flow_id == flow_id - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_list_setups_empty(self): setups = openml.setups.list_setups(setup=[0]) if len(setups) > 0: @@ -168,7 +168,7 @@ def test_list_setups_output_format(self): assert isinstance(setups, pd.DataFrame) assert len(setups) == 10 - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_setuplist_offset(self): size = 10 setups = openml.setups.list_setups(offset=0, size=size) @@ -180,7 +180,7 @@ def test_setuplist_offset(self): assert len(all) == size * 2 - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_get_cached_setup(self): openml.config.set_root_cache_directory(self.static_cache_dir) openml.setups.functions._get_cached_setup(1) diff --git a/tests/test_study/test_study_functions.py b/tests/test_study/test_study_functions.py index 4b662524b..b907ee740 100644 --- a/tests/test_study/test_study_functions.py +++ b/tests/test_study/test_study_functions.py @@ -74,7 +74,7 @@ def test_get_suite_error(self): ): openml.study.get_suite(123) - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_publish_benchmark_suite(self): fixture_alias = None fixture_name = "unit tested benchmark suite" @@ -143,16 +143,16 @@ def _test_publish_empty_study_is_allowed(self, explicit: bool): assert study_downloaded.main_entity_type == "run" assert study_downloaded.runs is None - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_publish_empty_study_explicit(self): self._test_publish_empty_study_is_allowed(explicit=True) - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_publish_empty_study_implicit(self): self._test_publish_empty_study_is_allowed(explicit=False) @pytest.mark.flaky() - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_publish_study(self): # get some random runs to attach run_list = openml.evaluations.list_evaluations("predictive_accuracy", size=10) @@ -222,7 +222,7 @@ def test_publish_study(self): res = openml.study.delete_study(study.id) assert res - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_study_attach_illegal(self): run_list = openml.runs.list_runs(size=10) assert len(run_list) == 10 diff --git a/tests/test_tasks/test_classification_task.py b/tests/test_tasks/test_classification_task.py index fed0c0a00..65dcebc1d 100644 --- a/tests/test_tasks/test_classification_task.py +++ b/tests/test_tasks/test_classification_task.py @@ -18,7 +18,7 @@ def setUp(self, n_levels: int = 1): self.task_type = TaskType.SUPERVISED_CLASSIFICATION self.estimation_procedure = 5 - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_download_task(self): task = super().test_download_task() assert task.task_id == self.task_id @@ -26,13 +26,13 @@ def test_download_task(self): assert task.dataset_id == 20 assert task.estimation_procedure_id == self.estimation_procedure - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_class_labels(self): task = get_task(self.task_id) assert task.class_labels == ["tested_negative", "tested_positive"] -@pytest.mark.uses_test_server() +@pytest.mark.test_server() def test_get_X_and_Y(): task = get_task(119) X, Y = task.get_X_and_y() diff --git a/tests/test_tasks/test_clustering_task.py b/tests/test_tasks/test_clustering_task.py index 2bbb015c6..b1ce569e2 100644 --- a/tests/test_tasks/test_clustering_task.py +++ b/tests/test_tasks/test_clustering_task.py @@ -28,7 +28,7 @@ def test_get_dataset(self): task.get_dataset() @pytest.mark.production() - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_download_task(self): # no clustering tasks on test server self.use_production_server() @@ -37,7 +37,7 @@ def test_download_task(self): assert task.task_type_id == TaskType.CLUSTERING assert task.dataset_id == 36 - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_upload_task(self): compatible_datasets = self._get_compatible_rand_dataset() for i in range(100): diff --git a/tests/test_tasks/test_learning_curve_task.py b/tests/test_tasks/test_learning_curve_task.py index fbcbfe9bf..465d9c0be 100644 --- a/tests/test_tasks/test_learning_curve_task.py +++ b/tests/test_tasks/test_learning_curve_task.py @@ -18,7 +18,7 @@ def setUp(self, n_levels: int = 1): self.task_type = TaskType.LEARNING_CURVE self.estimation_procedure = 13 - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_get_X_and_Y(self): X, Y = super().test_get_X_and_Y() assert X.shape == (768, 8) @@ -27,14 +27,14 @@ def test_get_X_and_Y(self): assert isinstance(Y, pd.Series) assert pd.api.types.is_categorical_dtype(Y) - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_download_task(self): task = super().test_download_task() assert task.task_id == self.task_id assert task.task_type_id == TaskType.LEARNING_CURVE assert task.dataset_id == 20 - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_class_labels(self): task = get_task(self.task_id) assert task.class_labels == ["tested_negative", "tested_positive"] diff --git a/tests/test_tasks/test_regression_task.py b/tests/test_tasks/test_regression_task.py index a834cdf0f..26d7dc94b 100644 --- a/tests/test_tasks/test_regression_task.py +++ b/tests/test_tasks/test_regression_task.py @@ -49,7 +49,7 @@ def setUp(self, n_levels: int = 1): self.task_type = TaskType.SUPERVISED_REGRESSION - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_get_X_and_Y(self): X, Y = super().test_get_X_and_Y() assert X.shape == (194, 32) @@ -58,7 +58,7 @@ def test_get_X_and_Y(self): assert isinstance(Y, pd.Series) assert pd.api.types.is_numeric_dtype(Y) - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_download_task(self): task = super().test_download_task() assert task.task_id == self.task_id diff --git a/tests/test_tasks/test_supervised_task.py b/tests/test_tasks/test_supervised_task.py index 3f7b06ee4..99df3cace 100644 --- a/tests/test_tasks/test_supervised_task.py +++ b/tests/test_tasks/test_supervised_task.py @@ -28,7 +28,7 @@ def setUpClass(cls): def setUp(self, n_levels: int = 1): super().setUp() - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_get_X_and_Y(self) -> tuple[pd.DataFrame, pd.Series]: task = get_task(self.task_id) X, Y = task.get_X_and_y() diff --git a/tests/test_tasks/test_task.py b/tests/test_tasks/test_task.py index b77782847..1d0df1210 100644 --- a/tests/test_tasks/test_task.py +++ b/tests/test_tasks/test_task.py @@ -32,11 +32,11 @@ def setUpClass(cls): def setUp(self, n_levels: int = 1): super().setUp() - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_download_task(self): return get_task(self.task_id) - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_upload_task(self): # We don't know if the task in question already exists, so we try a few times. Checking # beforehand would not be an option because a concurrent unit test could potentially diff --git a/tests/test_tasks/test_task_functions.py b/tests/test_tasks/test_task_functions.py index d44717177..7b75262e4 100644 --- a/tests/test_tasks/test_task_functions.py +++ b/tests/test_tasks/test_task_functions.py @@ -26,7 +26,7 @@ def setUp(self): def tearDown(self): super().tearDown() - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test__get_cached_tasks(self): openml.config.set_root_cache_directory(self.static_cache_dir) tasks = openml.tasks.functions._get_cached_tasks() @@ -34,7 +34,7 @@ def test__get_cached_tasks(self): assert len(tasks) == 3 assert isinstance(next(iter(tasks.values())), OpenMLTask) - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test__get_cached_task(self): openml.config.set_root_cache_directory(self.static_cache_dir) task = openml.tasks.functions._get_cached_task(1) @@ -49,7 +49,7 @@ def test__get_cached_task_not_cached(self): 2, ) - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test__get_estimation_procedure_list(self): estimation_procedures = openml.tasks.functions._get_estimation_procedure_list() assert isinstance(estimation_procedures, list) @@ -73,7 +73,7 @@ def _check_task(self, task): assert isinstance(task["status"], str) assert task["status"] in ["in_preparation", "active", "deactivated"] - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_list_tasks_by_type(self): num_curves_tasks = 198 # number is flexible, check server if fails ttid = TaskType.LEARNING_CURVE @@ -83,18 +83,18 @@ def test_list_tasks_by_type(self): assert ttid == task["ttid"] self._check_task(task) - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_list_tasks_length(self): ttid = TaskType.LEARNING_CURVE tasks = openml.tasks.list_tasks(task_type=ttid) assert len(tasks) > 100 - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_list_tasks_empty(self): tasks = openml.tasks.list_tasks(tag="NoOneWillEverUseThisTag") assert tasks.empty - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_list_tasks_by_tag(self): num_basic_tasks = 100 # number is flexible, check server if fails tasks = openml.tasks.list_tasks(tag="OpenML100") @@ -102,14 +102,14 @@ def test_list_tasks_by_tag(self): for task in tasks.to_dict(orient="index").values(): self._check_task(task) - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_list_tasks(self): tasks = openml.tasks.list_tasks() assert len(tasks) >= 900 for task in tasks.to_dict(orient="index").values(): self._check_task(task) - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_list_tasks_paginate(self): size = 10 max = 100 @@ -119,7 +119,7 @@ def test_list_tasks_paginate(self): for task in tasks.to_dict(orient="index").values(): self._check_task(task) - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_list_tasks_per_type_paginate(self): size = 40 max = 100 @@ -136,7 +136,7 @@ def test_list_tasks_per_type_paginate(self): assert j == task["ttid"] self._check_task(task) - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test__get_task(self): openml.config.set_root_cache_directory(self.static_cache_dir) openml.tasks.get_task(1882) @@ -151,7 +151,7 @@ def test__get_task_live(self): # https://github.com/openml/openml-python/issues/378 openml.tasks.get_task(34536) - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_get_task(self): task = openml.tasks.get_task(1, download_data=True) # anneal; crossvalidation assert isinstance(task, OpenMLTask) @@ -165,7 +165,7 @@ def test_get_task(self): os.path.join(self.workdir, "org", "openml", "test", "datasets", "1", "dataset.arff") ) - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_get_task_lazy(self): task = openml.tasks.get_task(2, download_data=False) # anneal; crossvalidation assert isinstance(task, OpenMLTask) @@ -188,7 +188,7 @@ def test_get_task_lazy(self): ) @mock.patch("openml.tasks.functions.get_dataset") - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_removal_upon_download_failure(self, get_dataset): class WeirdException(Exception): pass @@ -206,7 +206,7 @@ def assert_and_raise(*args, **kwargs): # Now the file should no longer exist assert not os.path.exists(os.path.join(os.getcwd(), "tasks", "1", "tasks.xml")) - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_get_task_with_cache(self): openml.config.set_root_cache_directory(self.static_cache_dir) task = openml.tasks.get_task(1) @@ -222,7 +222,7 @@ def test_get_task_different_types(self): # Issue 538, get_task failing with clustering task. openml.tasks.functions.get_task(126033) - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_download_split(self): task = openml.tasks.get_task(1) # anneal; crossvalidation split = task.download_split() diff --git a/tests/test_tasks/test_task_methods.py b/tests/test_tasks/test_task_methods.py index 6b8804b9f..9316d0876 100644 --- a/tests/test_tasks/test_task_methods.py +++ b/tests/test_tasks/test_task_methods.py @@ -16,7 +16,7 @@ def setUp(self): def tearDown(self): super().tearDown() - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_tagging(self): task = openml.tasks.get_task(1) # anneal; crossvalidation # tags can be at most 64 alphanumeric (+ underscore) chars @@ -32,7 +32,7 @@ def test_tagging(self): tasks = openml.tasks.list_tasks(tag=tag) assert len(tasks) == 0 - @pytest.mark.uses_test_server() + @pytest.mark.test_server() def test_get_train_and_test_split_indices(self): openml.config.set_root_cache_directory(self.static_cache_dir) task = openml.tasks.get_task(1882) diff --git a/tests/test_utils/test_utils.py b/tests/test_utils/test_utils.py index a1cdb55ea..4be6cd58a 100644 --- a/tests/test_utils/test_utils.py +++ b/tests/test_utils/test_utils.py @@ -48,18 +48,18 @@ def _mocked_perform_api_call(call, request_method): return openml._api_calls._download_text_file(url) -@pytest.mark.uses_test_server() +@pytest.mark.test_server() def test_list_all(): openml.utils._list_all(listing_call=openml.tasks.functions._list_tasks) -@pytest.mark.uses_test_server() +@pytest.mark.test_server() def test_list_all_for_tasks(min_number_tasks_on_test_server): tasks = openml.tasks.list_tasks(size=min_number_tasks_on_test_server) assert min_number_tasks_on_test_server == len(tasks) -@pytest.mark.uses_test_server() +@pytest.mark.test_server() def test_list_all_with_multiple_batches(min_number_tasks_on_test_server): # By setting the batch size one lower than the minimum we guarantee at least two # batches and at the same time do as few batches (roundtrips) as possible. @@ -72,7 +72,7 @@ def test_list_all_with_multiple_batches(min_number_tasks_on_test_server): assert min_number_tasks_on_test_server <= sum(len(batch) for batch in batches) -@pytest.mark.uses_test_server() +@pytest.mark.test_server() def test_list_all_for_datasets(min_number_datasets_on_test_server): datasets = openml.datasets.list_datasets( size=min_number_datasets_on_test_server, @@ -83,14 +83,14 @@ def test_list_all_for_datasets(min_number_datasets_on_test_server): _check_dataset(dataset) -@pytest.mark.uses_test_server() +@pytest.mark.test_server() def test_list_all_for_flows(min_number_flows_on_test_server): flows = openml.flows.list_flows(size=min_number_flows_on_test_server) assert min_number_flows_on_test_server == len(flows) @pytest.mark.flaky() # Other tests might need to upload runs first -@pytest.mark.uses_test_server() +@pytest.mark.test_server() def test_list_all_for_setups(min_number_setups_on_test_server): # TODO apparently list_setups function does not support kwargs setups = openml.setups.list_setups(size=min_number_setups_on_test_server) @@ -98,14 +98,14 @@ def test_list_all_for_setups(min_number_setups_on_test_server): @pytest.mark.flaky() # Other tests might need to upload runs first -@pytest.mark.uses_test_server() +@pytest.mark.test_server() def test_list_all_for_runs(min_number_runs_on_test_server): runs = openml.runs.list_runs(size=min_number_runs_on_test_server) assert min_number_runs_on_test_server == len(runs) @pytest.mark.flaky() # Other tests might need to upload runs first -@pytest.mark.uses_test_server() +@pytest.mark.test_server() def test_list_all_for_evaluations(min_number_evaluations_on_test_server): # TODO apparently list_evaluations function does not support kwargs evaluations = openml.evaluations.list_evaluations( @@ -116,7 +116,7 @@ def test_list_all_for_evaluations(min_number_evaluations_on_test_server): @unittest.mock.patch("openml._api_calls._perform_api_call", side_effect=_mocked_perform_api_call) -@pytest.mark.uses_test_server() +@pytest.mark.test_server() def test_list_all_few_results_available(_perform_api_call): datasets = openml.datasets.list_datasets(size=1000, data_name="iris", data_version=1) assert len(datasets) == 1, "only one iris dataset version 1 should be present" @@ -141,7 +141,7 @@ def test__create_cache_directory(config_mock, tmp_path): openml.utils._create_cache_directory("ghi") -@pytest.mark.uses_test_server() +@pytest.mark.test_server() def test_correct_test_server_download_state(): """This test verifies that the test server downloads the data from the correct source.