diff --git a/docs/changes.md b/docs/changes.md index f873d0b..a850efd 100644 --- a/docs/changes.md +++ b/docs/changes.md @@ -4,6 +4,11 @@ * Update Norbeck to the current 2026-01 version ({pull}`96`) * Various Bill Black fixes ({pull}`96`) +* Add zlib-based compression support + for Eskin ABC Transcription Tools URL decoding and URL creation ({pull}`95`). + It is the default in the Eskin tools as of 2026-02-02, + but for now, in {func}`~pyabc2.sources.eskin.abc_to_abctools_url` + you have to opt in using `lzw=False`. ## v0.1.1 (2026-01-20) diff --git a/pyabc2/sources/eskin.py b/pyabc2/sources/eskin.py index 62ed5d1..912aacc 100644 --- a/pyabc2/sources/eskin.py +++ b/pyabc2/sources/eskin.py @@ -59,6 +59,37 @@ _URL_NETLOCS = {"michaeleskin.com", "www.michaeleskin.com"} +def _deflate(s: str, /) -> str: + """Use deflate (zlib) to compress and base64-encode `s`.""" + import base64 + import zlib + + b = s.encode("utf-8") + c = zlib.compress(b) + b64 = base64.b64encode(c).decode("ascii") + b64_for_url = b64.replace("+", "-").replace("/", "_").rstrip("=") + return b64_for_url + + +def _inflate(s: str, /) -> str: + """Use inflate (zlib) to decompress and base64-decode `s`.""" + import base64 + import zlib + + b64 = s.replace("-", "+").replace("_", "/") + pad = len(b64) % 4 + if pad == 2: + b64 += "==" + elif pad == 3: + b64 += "=" + else: + if pad != 0: + raise ValueError(f"Invalid base64 string length {len(b64)}") + c = base64.b64decode(b64) + b = zlib.decompress(c) + return b.decode("utf-8") + + def abctools_url_to_abc( url: str, *, @@ -85,6 +116,14 @@ def abctools_url_to_abc( remove_prefs Remove lines starting with these prefixes. Use ``False`` or an empty iterable to keep all lines instead. + + Notes + ----- + ``def`` takes preference if both ``def`` and ``lzw`` are present in the URL query parameters. + + See Also + -------- + abc_to_abctools_url """ if not remove_prefs: @@ -99,19 +138,36 @@ def abctools_url_to_abc( logger.debug(f"Unexpected Eskin URL path: {res.path}") query_params = parse_qs(res.query) - try: - (lzw,) = query_params["lzw"] - except Exception as e: - raise ValueError("URL does not contain required 'lzw' parameter") from e - # Note `+` has been replaced with space by parse_qs - # Note js LZString.compressToEncodedURIComponent() is used to compress/encode the ABC + # Note `+` is now replaced with space - try: - abc = LZString.decompressFromEncodedURIComponent(lzw) - except Exception as e: - raise RuntimeError("Failed to decompress LZString data") from e - if abc is None: - raise RuntimeError("Failed to decompress LZString data") + abc_params = ["def", "lzw"] + todo = abc_params[:] + while todo: + param = todo.pop(0) + try: + (encoded,) = query_params[param] + except KeyError: + continue + + if param == "lzw": + try: + abc = LZString.decompressFromEncodedURIComponent(encoded) + except Exception as e: + raise RuntimeError("Failed to decompress LZString data") from e + if abc is None: # pragma: no cover + raise RuntimeError("Failed to decompress LZString data") + break + elif param == "def": + try: + abc = _inflate(encoded) + except Exception as e: + raise RuntimeError("Failed to decompress deflate data") from e + break + else: # pragma: no cover + raise AssertionError(f"Unexpected ABC data parameter: {param!r}") + else: + s_params = ", ".join(repr(p) for p in abc_params) + raise ValueError(f"No known ABC data parameter found in URL (tried {s_params})") wanted_lines = [ line.strip() for line in abc.splitlines() if not line.lstrip().startswith(remove_prefs) @@ -120,19 +176,37 @@ def abctools_url_to_abc( return "\n".join(wanted_lines) -def abc_to_abctools_url(abc: str) -> str: +def abc_to_abctools_url(abc: str, *, lzw: bool = True) -> str: """Create an Eskin abctools (``michaeleskin.com/abctools/``) share URL for `abc`. More info: https://michaeleskin.com/tools/generate_share_link.html + + Parameters + ---------- + abc + The tune. + lzw + Whether to use the original LZString compression method (``True``, default) + or the newer deflate (zlib) compression method (``False``), + which gives shorter URLs. + + See Also + -------- + abctools_url_to_abc """ # Must start with 'X:' (seems value is not required) if not abc.lstrip().startswith("X"): abc = "X:\n" + abc - lzw = LZString.compressToEncodedURIComponent(abc) + if lzw: + param = "lzw" + compressed = LZString.compressToEncodedURIComponent(abc) + else: + param = "def" + compressed = _deflate(abc) - return f"https://michaeleskin.com/abctools/abctools.html?lzw={lzw}" + return f"https://michaeleskin.com/abctools/abctools.html?{param}={compressed}" class EskinTunebookInfo(NamedTuple): @@ -298,8 +372,10 @@ def load_url(url: str) -> Tune: from . import load_example_abc abc = load_example_abc("For the Love of Music") - url = abc_to_abctools_url(abc) - print(url) + url_lzw = abc_to_abctools_url(abc, lzw=True) + print(url_lzw) + url_def = abc_to_abctools_url(abc, lzw=False) + print(url_def) kss = load_meta("kss") print(kss) diff --git a/tests/test_sources.py b/tests/test_sources.py index ded9332..7d21a41 100644 --- a/tests/test_sources.py +++ b/tests/test_sources.py @@ -19,6 +19,13 @@ NORBECK_IRISH_COUNT = 2813 +ESKIN_COMPRESSED_ABC_DATA = { + # For the Love of Music with `X:` + # Compressed/encoded tune data only (no other query params) + "lzw": "BoLgUAKiBiD2BOACCALApogMrAbhg8gGaICyArgM4CWAxmAEogUA2VADogFZUDmYAwiExUAXon4BDePFjNmYEiACcAegAcYTCACM6sAGkQAcTBGAogBFEFs0cQBBIwCFEAHwdG7zgCaI0333dzKxs7Rxo3RCc0DCd7F3MzRBBXMB5-PxVCFR4EpxUaFUDEdN80HgAjRAkAJmJ3Uszs3Id8wuL-F28nMKdAtIy0LJy8gqLIxvKq2olIipimnIxankjOxG7e+zdUoA", + "def": "eJyFjbEKwkAQRPv9iv2DQyvd7jbGK0wQJIVtktucJ4FIghZyH-8aCWJlM_BmZ2fOBBXthxGri2AxPASPHZb3KbZwoqmPN7zGABkV8YlZPY5D30NJW7OBglaqB3Lg8h3ucofWMSZVh449ivdK31urxCLIltXNkRIE0ZjpTFCHTWveD7MXGqzX3UKfhF0S4hk9a6WO_OuolRodnROiRvgpsJgSvAAdaUjy", +} + @pytest.mark.parametrize("tune_name", examples) def test_examples_load(tune_name): @@ -258,10 +265,16 @@ def test_load_url_norbeck(netloc): @pytest.mark.parametrize("netloc", sorted(eskin._URL_NETLOCS)) -def test_load_url_eskin(netloc): - url = f"https://{netloc}/abctools/abctools.html?lzw=BoLgUAKiBiD2BOACCALApogMrAbhg8gGaICyArgM4CWAxmAEogUA2VADogFZUDmYAwiExUAXon4BDePFjNmYEiACcAegAcYTCACM6sAGkQAcTBGAogBFEFs0cQBBIwCFEAHwdG7zgCaI0333dzKxs7Rxo3RCc0DCd7F3MzRBBXMB5-PxVCFR4EpxUaFUDEdN80HgAjRAkAJmJ3Uszs3Id8wuL-F28nMKdAtIy0LJy8gqLIxvKq2olIipimnIxankjOxG7e+zdUoA" +@pytest.mark.parametrize("param", list(ESKIN_COMPRESSED_ABC_DATA)) +def test_load_url_eskin(netloc, param): + data = ESKIN_COMPRESSED_ABC_DATA[param] + url = f"https://{netloc}/abctools/abctools.html?{param}={data}" tune = load_url(url) assert tune.title == "For The Love Of Music" + assert str(tune.key) == "Gmaj" + assert len(tune.measures) == 16 + assert len(tune.measures[0]) == len(tune.measures[-1]) == 9 + assert tune.abc + "\n" == "X:\n" + examples["for the love of music"] def test_load_url_invalid_domain(): @@ -375,7 +388,10 @@ def test_eskin_abc_url_parsing(): def test_eskin_abc_url_missing_param(): url = "https://michaeleskin.com/abctools/abctools.html?" - with pytest.raises(ValueError, match="URL does not contain required 'lzw' parameter"): + with pytest.raises( + ValueError, + match=r"No known ABC data parameter found in URL \(tried 'def', 'lzw'\)", + ): _ = eskin.abctools_url_to_abc(url) @@ -385,6 +401,12 @@ def test_eskin_abc_url_bad_param(): _ = eskin.abctools_url_to_abc(url) +def test_eskin_abc_url_bad_param_def(): + url = "https://michaeleskin.com/abctools/abctools.html?def=hi" + with pytest.raises(RuntimeError, match="Failed to decompress deflate data"): + _ = eskin.abctools_url_to_abc(url) + + def test_eskin_abc_url_bad(caplog): url = "https://michaeleski.com/deftools/abctools.html?lzw=BoLgjAUApFAuCWsA2BTAZgewHawAQAUBDJQhLDXMADmigGcBXAIwWXWzyJLIrAGZa8LJkw4CxUkN4CYAB0IAnWHVGcJPSjLgoAtrIyrx3KZtqwUAD1iGuk8qYAqIBwAsUuAIJMmKAJ64IABlwAHoaAFkQABYQqIgARRAAJliAXgBOAAYIACUQHJQUJGg6AHchAHNcTIA6SABpEABxaEImAGMAKzoAfToMBiwAE0M0UiYMX1pwgEkAERncWQUMCoVCHWrp+cWmQjo6ZdWtmFmF3HaXDAUho6rs053cPYOANwwkXAA2OMfzy+uQ3enx+rSGQx6xCQPVkJF8e3aAGsekghIi6BAAEQeHSYzx8XAAIU8SVwTQAorgAD6eDxNDy4TFNPGE8HEmnY3G0jzEunkgBi1MZzLJTXpRLZ1KxOLxHlJPJJZMpNI8dIZTJZko5MsVCr5go5IrF4tZQyqVKp0q5KAqttwhFJeyFGtwFTaVUIFRQQ2dOptdodrsIzuZTDdaFd7iGpMtnLx-o9FTDIcxnuTnu9vutto9pLdKbDhAjXqG7IAukA&format=noten&ssp=10&name=The_Abbey&play=1" with caplog.at_level("DEBUG"): @@ -396,12 +418,13 @@ def test_eskin_abc_url_bad(caplog): ] -def test_eskin_abc_url_creation(): +@pytest.mark.parametrize("use_lzw", [True, False]) +def test_eskin_abc_url_creation(use_lzw): import requests abc = load_example_abc("For the Love of Music") - url = eskin.abc_to_abctools_url(abc) + url = eskin.abc_to_abctools_url(abc, lzw=use_lzw) r = requests.head(url, timeout=5) r.raise_for_status() if ( @@ -416,6 +439,20 @@ def test_eskin_invalid_tunebook_key(): _ = eskin.get_tunebook_info("asdf") +def test_eskin_inflate_invalid_length(): + s = "eJyFjbEKwkAQRPv9iv2DQyvd7jbGK0wQJIVtktucJ4FIghZyH-abcdefg" + with pytest.raises( + ValueError, + match=f"Invalid base64 string length {len(s)}", + ): + _ = eskin._inflate(s) + + +def test_eskin_inflate_pad_3(): + s = "abc" + assert eskin._inflate(eskin._deflate(s)) == s + + @pytest.mark.xfail(reason="Bill Black site now has HTTPS", strict=False) def test_bill_black_no_https(): # If the site does get HTTPS, we'd like to know