From c4d49272eaf597ad66de50471b8b831cc9949191 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Thu, 22 Jan 2026 15:52:33 +0100 Subject: [PATCH 01/14] Bump REST API version --- docker-compose.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-compose.yaml b/docker-compose.yaml index e4e9507..e87aed3 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -58,7 +58,7 @@ services: php-api: profiles: ["all", "minio", "rest-api", "frontend", "evaluation-engine"] - image: openml/php-rest-api:v1.2.1 + image: openml/php-rest-api:v1.2.4 container_name: "openml-php-rest-api" ports: - "8080:80" # also known as /api (nginx) From d1673a0b93f5bd6b573765b86cff9cdfbe5a5ad4 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Mon, 26 Jan 2026 15:10:15 +0100 Subject: [PATCH 02/14] Update routing to same location for internal and external requests NGINX is now also listens to port 8000 on the docker network. This is an important step to being able to start these `services` and have them function as a local test server for openml-python among others. --- config/arff-to-pq-converter/Dockerfile | 2 +- config/arff-to-pq-converter/config | 2 +- config/database/update.sh | 4 ++-- config/nginx/Dockerfile | 4 ++-- config/nginx/nginx.conf | 4 ++-- config/php/.env | 7 ++++--- docker-compose.yaml | 16 +++++++++++++++- 7 files changed, 27 insertions(+), 12 deletions(-) diff --git a/config/arff-to-pq-converter/Dockerfile b/config/arff-to-pq-converter/Dockerfile index d164e90..58cc6b7 100644 --- a/config/arff-to-pq-converter/Dockerfile +++ b/config/arff-to-pq-converter/Dockerfile @@ -8,4 +8,4 @@ RUN apt update && apt upgrade -y RUN apt -y install cron RUN chmod +x /etc/cron.d/openml -RUN crontab -u unprivileged-user /etc/cron.d/openml \ No newline at end of file +RUN crontab -u unprivileged-user /etc/cron.d/openml diff --git a/config/arff-to-pq-converter/config b/config/arff-to-pq-converter/config index 9e32e2f..acbc67d 100644 --- a/config/arff-to-pq-converter/config +++ b/config/arff-to-pq-converter/config @@ -1 +1 @@ -server=http://nginx:80/api/v1/xml +server=http://nginx:8000/api/v1/xml diff --git a/config/database/update.sh b/config/database/update.sh index 0239211..b962e11 100755 --- a/config/database/update.sh +++ b/config/database/update.sh @@ -2,7 +2,7 @@ # Change the filepath of openml.file # from "https://www.openml.org/data/download/1666876/phpFsFYVN" # to "http://minio:9000/datasets/0000/0001/phpFsFYVN" -mysql -hdatabase -uroot -pok -e 'UPDATE openml.file SET filepath = CONCAT("http://minio:9000/datasets/0000/", LPAD(id, 4, "0"), "/", SUBSTRING_INDEX(filepath, "/", -1)) WHERE extension="arff";' +mysql -hdatabase -uroot -pok -e 'UPDATE openml.file SET filepath = CONCAT("http://localhost:8000/datasets/0000/", LPAD(id, 4, "0"), "/", SUBSTRING_INDEX(filepath, "/", -1)) WHERE extension="arff";' # Update openml.expdb.dataset with the same url mysql -hdatabase -uroot -pok -e 'UPDATE openml_expdb.dataset DS, openml.file FL SET DS.url = FL.filepath WHERE DS.did = FL.id;' @@ -28,4 +28,4 @@ mysql -hdatabase -uroot -pok -Dopenml_expdb -e 'INSERT IGNORE INTO dataset_statu mysql -hdatabase -uroot -pok -Dopenml_expdb -e 'DELETE FROM dataset_status WHERE did = 2 AND status = "deactivated";' # Temporary fix in case the database missed the kaggle table. The PHP Rest API expects the table to be there, while indexing. -mysql -hdatabase -uroot -pok -Dopenml_expdb -e 'CREATE TABLE IF NOT EXISTS `kaggle` (`dataset_id` int(11) DEFAULT NULL, `kaggle_link` varchar(500) DEFAULT NULL)' \ No newline at end of file +mysql -hdatabase -uroot -pok -Dopenml_expdb -e 'CREATE TABLE IF NOT EXISTS `kaggle` (`dataset_id` int(11) DEFAULT NULL, `kaggle_link` varchar(500) DEFAULT NULL)' diff --git a/config/nginx/Dockerfile b/config/nginx/Dockerfile index 7498879..7a7ab50 100644 --- a/config/nginx/Dockerfile +++ b/config/nginx/Dockerfile @@ -3,6 +3,6 @@ FROM nginx:alpine WORKDIR /etc/nginx COPY ./nginx.conf ./conf.d/default.conf COPY ./shared.conf ./shared.conf -EXPOSE 80 +EXPOSE 8000 ENTRYPOINT [ "nginx" ] -CMD [ "-g", "daemon off;" ] \ No newline at end of file +CMD [ "-g", "daemon off;" ] diff --git a/config/nginx/nginx.conf b/config/nginx/nginx.conf index 5316b1f..7dcd3d7 100644 --- a/config/nginx/nginx.conf +++ b/config/nginx/nginx.conf @@ -3,7 +3,7 @@ server { - listen 80; + listen 8000; server_name localhost; resolver 127.0.0.11; @@ -48,4 +48,4 @@ server { set $upstream_f http://frontend:5000; proxy_pass $upstream_f/$1$is_args; } -} \ No newline at end of file +} diff --git a/config/php/.env b/config/php/.env index 3a641c8..77c1058 100644 --- a/config/php/.env +++ b/config/php/.env @@ -1,6 +1,6 @@ API_KEY=AD000000000000000000000000000000 -BASE_URL=http://php-api:80/ -MINIO_URL=http://minio:9000/ +BASE_URL=http://localhost:8000/ +MINIO_URL=http://localhost:8000/minio/ DB_HOST_OPENML=database:3306 DB_HOST_EXPDB=database:3306 DB_USER_OPENML=root @@ -10,4 +10,5 @@ DB_PASS_EXPDB_WRITE=ok DB_USER_EXPDB_READ=root DB_PASS_EXPDB_READ=ok ES_URL=elasticsearch:9200 -ES_PASSWORD=default \ No newline at end of file +ES_PASSWORD=default +INDEX_ES_DURING_STARTUP=true diff --git a/docker-compose.yaml b/docker-compose.yaml index e87aed3..21f720b 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -54,7 +54,11 @@ services: context: config/nginx container_name: openml-nginx ports: - - "8000:80" + - "8000:8000" + networks: + default: + ipv4_address: 172.28.0.2 + php-api: profiles: ["all", "minio", "rest-api", "frontend", "evaluation-engine"] @@ -78,6 +82,8 @@ services: start_interval: 5s timeout: 3s interval: 1m + extra_hosts: + - "localhost=172.28.0.2" email-server: profiles: ["all", "frontend"] @@ -133,6 +139,8 @@ services: depends_on: php-api: condition: service_healthy + extra_hosts: + - "localhost=172.28.0.2" croissants: profiles: ["all"] @@ -161,3 +169,9 @@ services: networks: default: name: openml-services + ipam: + driver: default + config: + - subnet: 172.28.0.0/16 + ip_range: 172.28.1.0/24 + From 456586a261183abaf6145425693aa242a1056202 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Wed, 28 Jan 2026 17:19:02 +0100 Subject: [PATCH 03/14] Update configuration to work with local database and localhost --- config/evaluation-engine/.env | 4 ++-- config/evaluation-engine/run-cron.sh | 6 +++++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/config/evaluation-engine/.env b/config/evaluation-engine/.env index 882ba22..17480f2 100644 --- a/config/evaluation-engine/.env +++ b/config/evaluation-engine/.env @@ -1,4 +1,4 @@ -CONFIG=api_key=AD000000000000000000000000000000;server=http://php-api:80/ +CONFIG=api_key=abc;server=http://php-api:80/ JAVA=/usr/bin/java JAR=/usr/local/lib/evaluation-engine.jar -LOG_DIR=/logs \ No newline at end of file +LOG_DIR=/logs diff --git a/config/evaluation-engine/run-cron.sh b/config/evaluation-engine/run-cron.sh index cff7a2f..004e656 100755 --- a/config/evaluation-engine/run-cron.sh +++ b/config/evaluation-engine/run-cron.sh @@ -1,6 +1,10 @@ #!/bin/sh +# We need to remove the default 127.0.0.1 localhost map to +# ensure the remap to the static nginx ip address is respected. +sed -i '/^127.0.0.1.*localhost/d' /etc/hosts + printenv | grep -v HOME >> /etc/environment touch /cron.log -/usr/sbin/crond -l 4 && tail -f /cron.log \ No newline at end of file +/usr/sbin/crond -l 4 && tail -f /cron.log From 4ab1036d2e0415b49d3826c858b8afb3075b0c37 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Thu, 29 Jan 2026 15:54:18 +0100 Subject: [PATCH 04/14] Use a more reliable way to overwrite the /etc/hosts file --- config/evaluation-engine/run-cron.sh | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/config/evaluation-engine/run-cron.sh b/config/evaluation-engine/run-cron.sh index 004e656..5c1b03d 100755 --- a/config/evaluation-engine/run-cron.sh +++ b/config/evaluation-engine/run-cron.sh @@ -2,7 +2,13 @@ # We need to remove the default 127.0.0.1 localhost map to # ensure the remap to the static nginx ip address is respected. -sed -i '/^127.0.0.1.*localhost/d' /etc/hosts +# Updating /etc/hosts in place isn't always allowed ("Resource Busy"), +# directly overwriting it instead seems to bypass that protection. +cp /etc/hosts /etc/hosts.new +sed -i '/^127.0.0.1.*localhost/d' /etc/hosts.new +sed -i -E 's/^(::1\t)localhost (.*)$/\1\2/g' /etc/hosts.new +cat /etc/hosts.new > /etc/hosts +rm /etc/hosts.new printenv | grep -v HOME >> /etc/environment From 273d8e9afda4ec2687a21e2a0017c21015e97756 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Thu, 29 Jan 2026 17:06:49 +0100 Subject: [PATCH 05/14] Remove commented out code --- config/database/update.sh | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/config/database/update.sh b/config/database/update.sh index b962e11..5b49e33 100755 --- a/config/database/update.sh +++ b/config/database/update.sh @@ -6,26 +6,3 @@ mysql -hdatabase -uroot -pok -e 'UPDATE openml.file SET filepath = CONCAT("http: # Update openml.expdb.dataset with the same url mysql -hdatabase -uroot -pok -e 'UPDATE openml_expdb.dataset DS, openml.file FL SET DS.url = FL.filepath WHERE DS.did = FL.id;' - - - - - -# Create the data_feature_description TABLE. TODO: can we make sure this table exists already? -mysql -hdatabase -uroot -pok -Dopenml_expdb -e 'CREATE TABLE IF NOT EXISTS `data_feature_description` ( - `did` int unsigned NOT NULL, - `index` int unsigned NOT NULL, - `uploader` mediumint unsigned NOT NULL, - `date` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP, - `description_type` enum("plain", "ontology") NOT NULL, - `value` varchar(256) NOT NULL, - KEY `did` (`did`,`index`), - CONSTRAINT `data_feature_description_ibfk_1` FOREIGN KEY (`did`, `index`) REFERENCES `data_feature` (`did`, `index`) ON DELETE CASCADE ON UPDATE CASCADE -)' - -# SET dataset 1 to active (used in unittests java) -mysql -hdatabase -uroot -pok -Dopenml_expdb -e 'INSERT IGNORE INTO dataset_status VALUES (1, "active", "2024-01-01 00:00:00", 1)' -mysql -hdatabase -uroot -pok -Dopenml_expdb -e 'DELETE FROM dataset_status WHERE did = 2 AND status = "deactivated";' - -# Temporary fix in case the database missed the kaggle table. The PHP Rest API expects the table to be there, while indexing. -mysql -hdatabase -uroot -pok -Dopenml_expdb -e 'CREATE TABLE IF NOT EXISTS `kaggle` (`dataset_id` int(11) DEFAULT NULL, `kaggle_link` varchar(500) DEFAULT NULL)' From 7ac2879381ea4f864dfcce35b2b33fbcefd41a9c Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Thu, 29 Jan 2026 17:07:40 +0100 Subject: [PATCH 06/14] Point to most recent images --- docker-compose.yaml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/docker-compose.yaml b/docker-compose.yaml index 21f720b..079e643 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -1,6 +1,6 @@ services: database: - image: "openml/test-database:20240105" + image: "openml/test-database:v0.1.20260129" container_name: "openml-test-database" environment: MYSQL_ROOT_PASSWORD: ok @@ -114,7 +114,7 @@ services: minio: profiles: ["all", "minio", "evaluation-engine"] - image: openml/test-minio:v0.1.20241110 + image: openml/test-minio:v0.1.20260129 container_name: "openml-minio" ports: - "9000:9000" # also known as /data (nginx) @@ -165,6 +165,8 @@ services: depends_on: php-api: condition: service_healthy + extra_hosts: + - "localhost=172.28.0.2" networks: default: From b2e71becc67e5cd17a65fe1aed0675e3bcd48130 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Wed, 4 Feb 2026 17:10:14 +0200 Subject: [PATCH 07/14] Point to updated versions of the image which are built cross platform --- config/email-server/imapsql.db | Bin 94208 -> 94208 bytes docker-compose.yaml | 4 ++-- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/config/email-server/imapsql.db b/config/email-server/imapsql.db index 95f5d19f3f65b445829176db76e31fe12feee486..de1f39796b95db91137b8a39cfda9d17f39dd79e 100644 GIT binary patch delta 90 zcmZp8z}oPDb%Hcw&_o$$)*uGGs=|#a3;qkR@hdR!U*q4+KbgOf-;G~kbD;naznTgg ri@2?DX>n>%adBZzW=U#%VrfZ!W?o8aMLbwE-q29NaJw5ng|zuV?Q0Tcf1Zv2e9 F5&)%65G()y diff --git a/docker-compose.yaml b/docker-compose.yaml index 079e643..a0d895a 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -1,6 +1,6 @@ services: database: - image: "openml/test-database:v0.1.20260129" + image: "openml/test-database:v0.1.20260204" container_name: "openml-test-database" environment: MYSQL_ROOT_PASSWORD: ok @@ -114,7 +114,7 @@ services: minio: profiles: ["all", "minio", "evaluation-engine"] - image: openml/test-minio:v0.1.20260129 + image: openml/test-minio:v0.1.20260204 container_name: "openml-minio" ports: - "9000:9000" # also known as /data (nginx) From 0b185d8530744bcb5726221d2fdfb76889d0905e Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Wed, 4 Feb 2026 17:11:50 +0200 Subject: [PATCH 08/14] Note on emulation of ES on arm mac --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 1406cdd..543423d 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ Overview of all OpenML components including a docker-compose to run OpenML servi ![OpenML Component overview](https://raw.githubusercontent.com/openml/services/main/documentation/OpenML-overview.png) ## Prerequisites -- Linux/MacOS with Intell processor (because of our old ES version, this project currently does not support `arm` architectures) +- Linux/MacOS (For Mac with `arm` architectures, enable Rosetta for emulation. QEMU and Docker VMM do not work with the elastic search image) - [Docker](https://docs.docker.com/get-docker/) - [Docker Compose](https://docs.docker.com/compose/install/) version 2.21.0 or higher From eeaa47b6ae7db339bea9dbc64689092881cd7925 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Thu, 5 Feb 2026 16:02:55 +0200 Subject: [PATCH 09/14] Bump frontend version --- docker-compose.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-compose.yaml b/docker-compose.yaml index a0d895a..d4c0831 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -101,7 +101,7 @@ services: frontend: profiles: ["all", "frontend"] - image: openml/frontend:dev_v2.0.20251111 + image: openml/frontend:v2.1.1 container_name: "openml-frontend" ports: - "8081:5000" # also known as / (nginx) From 73baa3e2a0fb9d137d1ad0c8f39da58393446c26 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Thu, 5 Feb 2026 17:24:58 +0200 Subject: [PATCH 10/14] Add a shell script for automated testing of some of the services --- test.sh | 133 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 133 insertions(+) create mode 100755 test.sh diff --git a/test.sh b/test.sh new file mode 100755 index 0000000..1ba23e7 --- /dev/null +++ b/test.sh @@ -0,0 +1,133 @@ +#!/bin/bash +# This test assumes services are running locally: +# `docker compose --profile all up -d` +# +# It tests some of the most important services, but is by no means comprehensive. +# In particular, also at least check the frontpage in a browser (http://localhost:8000). + +set -e + +assert_contains() { + if echo "$1" | grep -q "$2"; then + echo "PASS: output contains '$2'" + else + echo "FAIL: output does not contain '$2'" + echo "Full output:" + echo "$1" + exit 1 + fi +} + +assert_url_exists() { + if curl --output /dev/null --silent --head --fail "$1"; then + echo "PASS: $1 exists" + else + echo "FAIL: $1 does not exist" + exit 1 + fi +} + +# nginx redirects request to the home page +HOME_PAGE=$(curl -s http://localhost:8000) +assert_contains "$HOME_PAGE" "OpenML is an open platform for sharing datasets" + +DATASET_URL=http://localhost:8000/minio/datasets/0000/0020/dataset_37_diabetes.arff +DESCRIPTION_URL=http://localhost:8000/api/v1/json/data/20 + +# The JSON response may contain escaped slashes (e.g. http:\/\/), so strip them +DESCRIPTION=$(curl -s "$DESCRIPTION_URL" | sed 's/\\//g') +assert_contains "$DESCRIPTION" "diabetes" + +wget "$DATASET_URL" -O dataset.arff +assert_contains "$(cat dataset.arff)" "@data" + +if [ -d .venv ]; then + echo "Using existing virtual environment for dataset upload." +else + echo "Creating virtual environment for dataset upload." + python -m venv .venv + source .venv/bin/activate + python -m pip install uv + uv pip install openml numpy +fi + +echo "Attempting dataset upload" + +DATA_ID=$(.venv/bin/python -c " +import numpy as np +import openml +from openml.datasets import create_dataset + +openml.config.server = 'http://localhost:8000/api/v1/xml' +openml.config.apikey = 'normaluser' + +data = np.array([[1, 2, 3], [1.2, 2.5, 3.8], [2, 5, 8], [0, 1, 0]]).T +attributes = [('col_' + str(i), 'REAL') for i in range(data.shape[1])] + +dataset = create_dataset( + name='test-data', + description='Synthetic dataset created from a NumPy array', + creator='OpenML tester', + contributor=None, + collection_date='01-01-2018', + language='English', + licence='MIT', + default_target_attribute='col_' + str(data.shape[1] - 1), + row_id_attribute=None, + ignore_attribute=None, + citation='None', + attributes=attributes, + data=data, + version_label='test', + original_data_url='http://openml.github.io/openml-python', + paper_url='http://openml.github.io/openml-python', +) +dataset.publish() +print(dataset.id) +") + +# Make sure DATA_ID is an integer, and not some Python error output +if ! echo "$DATA_ID" | grep -q '^[0-9]\+$'; then + echo "FAIL: DATA_ID is not an integer: '$DATA_ID'" + exit 1 +fi + +PADDED_ID=$(printf "%04d" "$DATA_ID") +NEW_DATASET_URL="http://localhost:8000/datasets/0000/${PADDED_ID}/dataset.arff" + +assert_url_exists "$NEW_DATASET_URL" + +# Wait for the dataset to become active, polling every 10 seconds for up to 2 minutes +WAITED=0 +while [ "$WAITED" -lt 120 ]; do + DATASET_STATUS=$(curl -s "http://localhost:8000/api/v1/json/data/${DATA_ID}") + if echo "$DATASET_STATUS" | grep -q "active"; then + echo "PASS: dataset $DATA_ID is active (after ${WAITED}s)" + break + fi + echo "Waiting for dataset $DATA_ID to become active... (${WAITED}s elapsed)" + sleep 10 + WAITED=$((WAITED + 10)) +done + +if [ "$WAITED" -ge 120 ]; then + echo "FAIL: dataset $DATA_ID did not become active within 120s" + echo "Full output:" + echo "$DATASET_STATUS" + exit 1 +fi + +NEW_PARQUET_URL="${NEW_DATASET_URL%.arff}.pq" +assert_url_exists "$NEW_PARQUET_URL" + +CROISSANT_URL="http://localhost:8000/croissant/dataset/${DATA_ID}" +CROISSANT_STATUS=$(curl --silent --output /dev/null --write-out "%{http_code}" "$CROISSANT_URL") +if [ "$CROISSANT_STATUS" = "200" ]; then + echo "PASS: $CROISSANT_URL exists (HTTP $CROISSANT_STATUS)" +else + echo "FAIL: $CROISSANT_URL returned HTTP $CROISSANT_STATUS" + exit 1 +fi + +ES_RESPONSE=$(curl -s "http://localhost:8000/es/data/_doc/${DATA_ID}") +assert_contains "$ES_RESPONSE" "test-data" From 99f2e0a9cf4e6ffd26e1dee65d19d2c465278f6d Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Fri, 6 Feb 2026 13:29:28 +0200 Subject: [PATCH 11/14] Add minio prefix to dataset path --- config/database/update.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/database/update.sh b/config/database/update.sh index 5b49e33..8ef5a75 100755 --- a/config/database/update.sh +++ b/config/database/update.sh @@ -2,7 +2,7 @@ # Change the filepath of openml.file # from "https://www.openml.org/data/download/1666876/phpFsFYVN" # to "http://minio:9000/datasets/0000/0001/phpFsFYVN" -mysql -hdatabase -uroot -pok -e 'UPDATE openml.file SET filepath = CONCAT("http://localhost:8000/datasets/0000/", LPAD(id, 4, "0"), "/", SUBSTRING_INDEX(filepath, "/", -1)) WHERE extension="arff";' +mysql -hdatabase -uroot -pok -e 'UPDATE openml.file SET filepath = CONCAT("http://localhost:8000/minio/datasets/0000/", LPAD(id, 4, "0"), "/", SUBSTRING_INDEX(filepath, "/", -1)) WHERE extension="arff";' # Update openml.expdb.dataset with the same url mysql -hdatabase -uroot -pok -e 'UPDATE openml_expdb.dataset DS, openml.file FL SET DS.url = FL.filepath WHERE DS.did = FL.id;' From 848c73fcc770bbb26e49e649ab4c5f388b7d4f9f Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Fri, 6 Feb 2026 14:42:01 +0200 Subject: [PATCH 12/14] Test file content to avoid false positives (except croissant) --- test.sh | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/test.sh b/test.sh index 1ba23e7..9baa46d 100755 --- a/test.sh +++ b/test.sh @@ -8,7 +8,7 @@ set -e assert_contains() { - if echo "$1" | grep -q "$2"; then + if echo "$1" | grep --ignore-case -q "$2"; then echo "PASS: output contains '$2'" else echo "FAIL: output does not contain '$2'" @@ -19,7 +19,7 @@ assert_contains() { } assert_url_exists() { - if curl --output /dev/null --silent --head --fail "$1"; then + if curl --output /dev/null --silent --head --fail --location "$1"; then echo "PASS: $1 exists" else echo "FAIL: $1 does not exist" @@ -40,6 +40,7 @@ assert_contains "$DESCRIPTION" "diabetes" wget "$DATASET_URL" -O dataset.arff assert_contains "$(cat dataset.arff)" "@data" +rm dataset.arff if [ -d .venv ]; then echo "Using existing virtual environment for dataset upload." @@ -92,10 +93,11 @@ if ! echo "$DATA_ID" | grep -q '^[0-9]\+$'; then exit 1 fi -PADDED_ID=$(printf "%04d" "$DATA_ID") -NEW_DATASET_URL="http://localhost:8000/datasets/0000/${PADDED_ID}/dataset.arff" - +NEW_DATASET_URL=$(curl -s http://localhost:8000/api/v1/json/data/169 | jq -r ".data_set_description.url") assert_url_exists "$NEW_DATASET_URL" +wget "$NEW_DATASET_URL" -O new_dataset.arff +assert_contains "$(cat new_dataset.arff)" "@data" +rm new_dataset.arff # Wait for the dataset to become active, polling every 10 seconds for up to 2 minutes WAITED=0 @@ -117,8 +119,13 @@ if [ "$WAITED" -ge 120 ]; then exit 1 fi -NEW_PARQUET_URL="${NEW_DATASET_URL%.arff}.pq" -assert_url_exists "$NEW_PARQUET_URL" +echo "Checking parquet conversion" +PADDED_ID=$(printf "%04d" "$DATA_ID") +NEW_PARQUET_URL="http://localhost:8000/minio/datasets/0000/${PADDED_ID}/dataset_${DATA_ID}.pq" +wget "$NEW_PARQUET_URL" +DATA_SHAPE=$(.venv/bin/python -c "import pandas as pd; df = pd.read_parquet(\"dataset_${DATA_ID}.pq\"); print(df.shape)") +assert_contains "${DATA_SHAPE}" "(3, 4)" +rm "dataset_${DATA_ID}.pq" CROISSANT_URL="http://localhost:8000/croissant/dataset/${DATA_ID}" CROISSANT_STATUS=$(curl --silent --output /dev/null --write-out "%{http_code}" "$CROISSANT_URL") From a2e584460d4027e42366fc1e15e4f701a876e058 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Fri, 6 Feb 2026 15:47:24 +0200 Subject: [PATCH 13/14] Check croissant file content --- test.sh | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/test.sh b/test.sh index 9baa46d..8b2b1fc 100755 --- a/test.sh +++ b/test.sh @@ -128,13 +128,8 @@ assert_contains "${DATA_SHAPE}" "(3, 4)" rm "dataset_${DATA_ID}.pq" CROISSANT_URL="http://localhost:8000/croissant/dataset/${DATA_ID}" -CROISSANT_STATUS=$(curl --silent --output /dev/null --write-out "%{http_code}" "$CROISSANT_URL") -if [ "$CROISSANT_STATUS" = "200" ]; then - echo "PASS: $CROISSANT_URL exists (HTTP $CROISSANT_STATUS)" -else - echo "FAIL: $CROISSANT_URL returned HTTP $CROISSANT_STATUS" - exit 1 -fi +CROISSANT_NAME=$(curl -s ${CROISSANT_URL} | jq -r ".name") +assert_contains ${CROISSANT_NAME} "test-data" ES_RESPONSE=$(curl -s "http://localhost:8000/es/data/_doc/${DATA_ID}") -assert_contains "$ES_RESPONSE" "test-data" +assert_contains "${ES_RESPONSE}" "test-data" From 55150067430fad4ffcb20dd0cbb646496ac5b95e Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Fri, 6 Feb 2026 16:48:37 +0200 Subject: [PATCH 14/14] Update configurations for local setup --- config/frontend/.env | 2 +- config/python/config | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/config/frontend/.env b/config/frontend/.env index 128bbab..b33aeba 100644 --- a/config/frontend/.env +++ b/config/frontend/.env @@ -9,7 +9,7 @@ DATABASE_URI="mysql+pymysql://root:ok@database:3306/openml" TESTING=False REACT_APP_URL_SITE_BACKEND=http://localhost:8000/ -REACT_APP_URL_API=http://localhost:8000/api/ +REACT_APP_URL_API=http://localhost:8000/ REACT_APP_URL_ELASTICSEARCH=http://localhost:8000/es/ REACT_APP_ELASTICSEARCH_VERSION_MAYOR=6 REACT_APP_URL_MINIO=http://localhost:8000/data/ diff --git a/config/python/config b/config/python/config index a64d014..62266bf 100644 --- a/config/python/config +++ b/config/python/config @@ -1,2 +1,2 @@ -apikey=AD000000000000000000000000000000 -server=http://nginx:80/api/v1/xml \ No newline at end of file +apikey=normaluser +server=http://localhost:8000/api/v1/xml