diff --git a/S3MP/mirror_path.py b/S3MP/mirror_path.py index 42d5fa6..5097d55 100644 --- a/S3MP/mirror_path.py +++ b/S3MP/mirror_path.py @@ -199,11 +199,16 @@ def download_to_mirror_if_not_present(self): self.download_to_mirror(overwrite=False) def upload_from_mirror(self, overwrite: bool = False): - """Upload local file to S3.""" - if not overwrite and self.exists_on_s3(): - self.update_callback_on_skipped_transfer() - return - upload_to_key(self.s3_key, self.local_path, self.bucket, self.s3_client) + """Upload local file or folder to S3.""" + if self.is_file: + if not overwrite and self.exists_on_s3(): + self.update_callback_on_skipped_transfer() + return + upload_to_key(self.s3_key, self.local_path, self.bucket, self.s3_client) + else: + for child in self.local_path.iterdir(): + child_mp = self.get_child(child.name) + child_mp.upload_from_mirror(overwrite=overwrite) def upload_from_mirror_if_not_present(self): """Upload from mirror if not present on S3.""" diff --git a/S3MP/utils/s3_utils.py b/S3MP/utils/s3_utils.py index 3813e6c..21eaa79 100644 --- a/S3MP/utils/s3_utils.py +++ b/S3MP/utils/s3_utils.py @@ -111,8 +111,9 @@ def upload_to_key( Config=S3MPConfig.transfer_config, # type: ignore[arg-type] ) else: + folder_key = key.rstrip("/") for child in local_path.iterdir(): - upload_to_key(f"{key}/{child.name}", child, bucket, client) + upload_to_key(f"{folder_key}/{child.name}", child, bucket, client) def key_exists_on_s3( @@ -180,8 +181,13 @@ def delete_child_keys_on_s3( client = client or S3MPConfig.s3_client assert bucket is not None assert client is not None - for child_key in s3_list_child_keys(key, bucket, client): - client.delete_object(Bucket=bucket.name, Key=child_key) + folder_key = key if key.endswith("/") else f"{key}/" + for child_key in s3_list_child_keys(folder_key, bucket, client): + if child_key.endswith("/"): + delete_child_keys_on_s3(child_key, bucket, client) + client.delete_object(Bucket=bucket.name, Key=child_key) + else: + client.delete_object(Bucket=bucket.name, Key=child_key) def delete_key_on_s3( @@ -199,4 +205,6 @@ def delete_key_on_s3( if key_is_file_on_s3(key, bucket, client): client.delete_object(Bucket=bucket.name, Key=key) else: - delete_child_keys_on_s3(key, bucket, client) + folder_key = key if key.endswith("/") else f"{key}/" + delete_child_keys_on_s3(folder_key, bucket, client) + client.delete_object(Bucket=bucket.name, Key=folder_key) diff --git a/pyproject.toml b/pyproject.toml index 546b531..7faee45 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "S3MP" -version = "0.9.0" +version = "0.9.1" description = "" authors = [ {name = "Joshua Dean", email = "joshua.dean@sentera.com"}, diff --git a/tests/test_mirror_path.py b/tests/test_mirror_path.py index cdca9ed..34cc0b1 100644 --- a/tests/test_mirror_path.py +++ b/tests/test_mirror_path.py @@ -10,6 +10,7 @@ from S3MP.global_config import S3MPConfig from S3MP.mirror_path import MirrorPath from S3MP.types import S3Client +from S3MP.utils.s3_utils import delete_key_on_s3 TESTING_BUCKET_NAME = "s3mp-testing" @@ -34,11 +35,15 @@ def _configure_s3mp(tmp_path): @pytest.fixture() def s3_client(): - """Create an S3 client and ensure the test bucket exists.""" + """Create an S3 client, ensure the test bucket exists, and clean up after the test.""" client: S3Client = boto3.client("s3") with contextlib.suppress(client.exceptions.BucketAlreadyOwnedByYou): client.create_bucket(Bucket=TESTING_BUCKET_NAME) - return client + yield client + # Teardown: delete test data from S3 (local mirror is cleaned up by tmp_path) + bucket = boto3.resource("s3").Bucket(TESTING_BUCKET_NAME) + for prefix in ["mp_test/", "mr_prop/"]: + delete_key_on_s3(prefix, bucket, client) # ── from_s3_key construction ────────────────────────────────────────────── @@ -666,6 +671,76 @@ def test_copy_to_mp_from_mirror(self, s3_client, tmp_path): assert mp_dest.exists_on_s3() assert mp_dest.local_path.read_bytes() == b"mirror_src_data" + def test_upload_from_mirror_folder_uploads_children(self, s3_client, tmp_path): + """upload_from_mirror on a folder should upload all child files.""" + folder_mp = MirrorPath.from_s3_key("mp_test/upload_folder/") + folder_mp.local_path.mkdir(parents=True, exist_ok=True) + (folder_mp.local_path / "x.txt").write_bytes(b"x_data") + (folder_mp.local_path / "y.txt").write_bytes(b"y_data") + + folder_mp.upload_from_mirror(overwrite=True) + + child_x = folder_mp.get_child("x.txt") + child_y = folder_mp.get_child("y.txt") + assert child_x.exists_on_s3() + assert child_y.exists_on_s3() + + def test_upload_from_mirror_folder_no_double_slash(self, s3_client, tmp_path): + """upload_from_mirror on a folder must not produce double-slash S3 keys.""" + folder_mp = MirrorPath.from_s3_key("mp_test/up_nods/") + folder_mp.local_path.mkdir(parents=True, exist_ok=True) + (folder_mp.local_path / "file.txt").write_bytes(b"data") + + folder_mp.upload_from_mirror(overwrite=True) + + child_mp = folder_mp.get_child("file.txt") + assert "//" not in child_mp.s3_key + assert child_mp.exists_on_s3() + + def test_upload_from_mirror_folder_nested(self, s3_client, tmp_path): + """upload_from_mirror recursively uploads nested subdirectories.""" + folder_mp = MirrorPath.from_s3_key("mp_test/up_nested/") + folder_mp.local_path.mkdir(parents=True, exist_ok=True) + (folder_mp.local_path / "top.txt").write_bytes(b"top") + sub = folder_mp.local_path / "sub" + sub.mkdir() + (sub / "deep.txt").write_bytes(b"deep") + + folder_mp.upload_from_mirror(overwrite=True) + + top_mp = folder_mp.get_child("top.txt") + sub_mp = folder_mp.get_child("sub") + deep_mp = sub_mp.get_child("deep.txt") + assert top_mp.exists_on_s3() + assert deep_mp.exists_on_s3() + + def test_upload_from_mirror_folder_overwrite_false_still_uploads_new_children( + self, s3_client, tmp_path + ): + """When overwrite=False and the folder prefix exists, new child files are still uploaded.""" + # Upload one child first so the prefix exists on S3 + folder_mp = MirrorPath.from_s3_key("mp_test/up_partial/") + folder_mp.local_path.mkdir(parents=True, exist_ok=True) + + existing_mp = folder_mp.get_child("existing.txt") + existing_mp.local_path.parent.mkdir(parents=True, exist_ok=True) + existing_mp.local_path.write_bytes(b"original") + existing_mp.upload_from_mirror(overwrite=True) + assert existing_mp.exists_on_s3() + + # Now add a new file locally and re-upload with overwrite=False + (folder_mp.local_path / "new_file.txt").write_bytes(b"new_data") + + folder_mp.upload_from_mirror(overwrite=False) + + # The new file should have been uploaded + new_mp = folder_mp.get_child("new_file.txt") + assert new_mp.exists_on_s3() + + # The existing file should not have been re-uploaded (still original) + existing_mp.download_to_mirror(overwrite=True) + assert existing_mp.local_path.read_bytes() == b"original" + def _write_text(path: str, data: str): """Helper: write text to a file.""" diff --git a/tests/test_prefix_queries.py b/tests/test_prefix_queries.py index 499027b..4a2efad 100644 --- a/tests/test_prefix_queries.py +++ b/tests/test_prefix_queries.py @@ -10,6 +10,7 @@ get_folders_within_folder, ) from S3MP.types import S3Client +from S3MP.utils.s3_utils import delete_key_on_s3 TESTING_BUCKET_NAME = "s3mp-testing" @@ -32,7 +33,10 @@ def s3_env(): client.put_object(Bucket=TESTING_BUCKET_NAME, Key="pq_test/sub_one/", Body=b"") client.put_object(Bucket=TESTING_BUCKET_NAME, Key="pq_test/sub_one/nested.txt", Body=b"n") client.put_object(Bucket=TESTING_BUCKET_NAME, Key="pq_test/sub_two/", Body=b"") - return client + yield client + # Teardown + bucket = boto3.resource("s3").Bucket(TESTING_BUCKET_NAME) + delete_key_on_s3("pq_test/", bucket, client) # ── get_files_within_folder ─────────────────────────────────────────────── diff --git a/tests/test_s3_utils.py b/tests/test_s3_utils.py index 79361fe..d95f365 100644 --- a/tests/test_s3_utils.py +++ b/tests/test_s3_utils.py @@ -141,6 +141,9 @@ def test_folders_files(): for key in keys_that_are_folders: assert not key_is_file_on_s3(key, bucket, client) + # Cleanup + delete_key_on_s3("test_folder/", bucket, client) + if __name__ == "__main__": test_folders_files() @@ -151,12 +154,26 @@ def test_folders_files(): @pytest.fixture() def s3_env(): - """Create a client, resource, and bucket for S3 tests.""" + """Create a client, resource, and bucket for S3 tests; clean up afterward.""" client: S3Client = boto3.client("s3") with contextlib.suppress(client.exceptions.BucketAlreadyOwnedByYou): client.create_bucket(Bucket=TESTING_BUCKET_NAME) bucket = boto3.resource("s3").Bucket(TESTING_BUCKET_NAME) - return client, bucket + yield client, bucket + # Teardown: delete all objects under known test prefixes + for prefix in [ + "single/", + "single_multi/", + "size/", + "dl_up/", + "up_folder/", + "up_nested/", + "del/", + "del_folder/", + "del_nested/", + "del_children/", + ]: + delete_key_on_s3(prefix, bucket, client) # ── s3_list_single_key ──────────────────────────────────────────────────── @@ -242,6 +259,39 @@ def test_download_nonexistent_raises(self, s3_env, tmp_path): with pytest.raises(ValueError, match="does not exist"): download_key("dl_up/no_such.txt", tmp_path / "out.txt", bucket, client) + def test_upload_folder_no_double_slash(self, s3_env, tmp_path): + """upload_to_key with a trailing-slash key must not produce double slashes.""" + client, bucket = s3_env + folder = tmp_path / "my_folder" + folder.mkdir() + (folder / "a.txt").write_bytes(b"aaa") + (folder / "b.txt").write_bytes(b"bbb") + + upload_to_key("up_folder/", folder, bucket, client) + + # Keys should NOT contain "//" + assert key_exists_on_s3("up_folder/a.txt", bucket, client) + assert key_exists_on_s3("up_folder/b.txt", bucket, client) + assert not key_exists_on_s3("up_folder//a.txt", bucket, client) + + def test_upload_nested_folder(self, s3_env, tmp_path): + """upload_to_key recursively uploads nested directories.""" + client, bucket = s3_env + root = tmp_path / "nested_root" + root.mkdir() + (root / "top.txt").write_bytes(b"top") + sub = root / "sub" + sub.mkdir() + (sub / "deep.txt").write_bytes(b"deep") + + upload_to_key("up_nested/", root, bucket, client) + + assert key_exists_on_s3("up_nested/top.txt", bucket, client) + assert key_exists_on_s3("up_nested/sub/deep.txt", bucket, client) + # No double slashes + assert not key_exists_on_s3("up_nested//top.txt", bucket, client) + assert not key_exists_on_s3("up_nested//sub/deep.txt", bucket, client) + # ── delete_key_on_s3 / delete_child_keys_on_s3 ─────────────────────────── @@ -269,6 +319,27 @@ def test_delete_folder_removes_children(self, s3_env): delete_key_on_s3("del_folder/", bucket, client) assert not key_exists_on_s3("del_folder/a.txt", bucket, client) assert not key_exists_on_s3("del_folder/b.txt", bucket, client) + assert not key_exists_on_s3("del_folder/", bucket, client) + + def test_delete_folder_removes_nested_children(self, s3_env): + """delete_key_on_s3 must recursively remove nested subfolders and their contents.""" + client, bucket = s3_env + client.put_object(Bucket=TESTING_BUCKET_NAME, Key="del_nested/", Body=b"") + client.put_object(Bucket=TESTING_BUCKET_NAME, Key="del_nested/top.txt", Body=b"top") + client.put_object(Bucket=TESTING_BUCKET_NAME, Key="del_nested/sub/", Body=b"") + client.put_object(Bucket=TESTING_BUCKET_NAME, Key="del_nested/sub/deep.txt", Body=b"deep") + client.put_object(Bucket=TESTING_BUCKET_NAME, Key="del_nested/sub/deeper/", Body=b"") + client.put_object( + Bucket=TESTING_BUCKET_NAME, Key="del_nested/sub/deeper/bottom.txt", Body=b"bottom" + ) + + delete_key_on_s3("del_nested/", bucket, client) + + assert not key_exists_on_s3("del_nested/top.txt", bucket, client) + assert not key_exists_on_s3("del_nested/sub/deep.txt", bucket, client) + assert not key_exists_on_s3("del_nested/sub/deeper/bottom.txt", bucket, client) + assert not key_exists_on_s3("del_nested/sub/", bucket, client) + assert not key_exists_on_s3("del_nested/", bucket, client) def test_delete_child_keys(self, s3_env): client, bucket = s3_env diff --git a/uv.lock b/uv.lock index c3e957d..0f46621 100644 --- a/uv.lock +++ b/uv.lock @@ -1515,7 +1515,7 @@ wheels = [ [[package]] name = "s3mp" -version = "0.9.0" +version = "0.9.1" source = { editable = "." } dependencies = [ { name = "aioboto3" },