From ed9da182a50e932f76ecd2252d50d623a4dba62e Mon Sep 17 00:00:00 2001 From: SachaMorard <2254275+SachaMorard@users.noreply.github.com> Date: Fri, 6 Mar 2026 08:55:30 +0100 Subject: [PATCH 1/3] feat: update compression response to new API format, add SSL cert fix - Replace input_tokens, rate with saved_tokens, cost_savings, reduction, time_ms - Use certifi for SSL context when available (fixes cert verification on macOS) - Add optional edgee[ssl] dependency for certifi - Update README with new compression fields and troubleshooting section Made-with: Cursor --- README.md | 11 ++++++----- edgee/__init__.py | 27 +++++++++++++++++++++------ example/compression.py | 26 +++++++++++--------------- example/test.py | 8 ++++---- pyproject.toml | 4 +++- tests/test_edgee.py | 10 ++++++---- uv.lock | 2 +- 7 files changed, 52 insertions(+), 36 deletions(-) diff --git a/README.md b/README.md index 8f87046..3c3d2b1 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ edgee = Edgee("your-api-key") # Send a simple request response = edgee.send( - model="gpt-5.2", + model="anthropic/claude-haiku-4-5", input="What is the capital of France?" ) @@ -34,7 +34,7 @@ The `send()` method makes non-streaming chat completion requests: ```python response = edgee.send( - model="gpt-5.2", + model="anthropic/claude-haiku-4-5", input="Hello, world!" ) @@ -48,9 +48,10 @@ if response.usage: print(f"Tokens used: {response.usage.total_tokens}") if response.compression: - print(f"Input tokens: {response.compression.input_tokens}") print(f"Saved tokens: {response.compression.saved_tokens}") - print(f"Compression rate: {response.compression.rate}") + print(f"Reduction: {response.compression.reduction}%") + print(f"Cost savings: ${response.compression.cost_savings / 1_000_000:.3f}") + print(f"Time: {response.compression.time_ms} ms") ``` ## Stream Method @@ -58,7 +59,7 @@ if response.compression: The `stream()` method enables real-time streaming responses: ```python -for chunk in edgee.stream("gpt-5.2", "Tell me a story"): +for chunk in edgee.stream("anthropic/claude-haiku-4-5", "Tell me a story"): if chunk.text: print(chunk.text, end="", flush=True) diff --git a/edgee/__init__.py b/edgee/__init__.py index b8d5520..f03d131 100644 --- a/edgee/__init__.py +++ b/edgee/__init__.py @@ -2,6 +2,7 @@ import json import os +import ssl from dataclasses import dataclass from urllib.error import HTTPError from urllib.request import Request, urlopen @@ -11,6 +12,18 @@ API_ENDPOINT = "/v1/chat/completions" +def _ssl_context() -> ssl.SSLContext: + """Create SSL context. Uses certifi's CA bundle when available (fixes cert issues on macOS).""" + ctx = ssl.create_default_context() + try: + import certifi + + ctx.load_verify_locations(certifi.where()) + except ImportError: + pass # Use default system/store certs + return ctx + + @dataclass class FunctionDefinition: name: str @@ -70,9 +83,10 @@ class Usage: @dataclass class Compression: - input_tokens: int saved_tokens: int - rate: float + cost_savings: int # micro-units (e.g. 27000 = $0.027) + reduction: int # percentage (e.g. 48 = 48%) + time_ms: int # milliseconds @dataclass @@ -253,7 +267,7 @@ def send( def _handle_non_streaming_response(self, request: Request) -> SendResponse: """Handle non-streaming response.""" try: - with urlopen(request) as response: + with urlopen(request, context=_ssl_context()) as response: data = json.loads(response.read().decode("utf-8")) except HTTPError as e: error_body = e.read().decode("utf-8") @@ -279,9 +293,10 @@ def _handle_non_streaming_response(self, request: Request) -> SendResponse: compression = None if "compression" in data: compression = Compression( - input_tokens=data["compression"]["input_tokens"], saved_tokens=data["compression"]["saved_tokens"], - rate=data["compression"]["rate"], + cost_savings=data["compression"]["cost_savings"], + reduction=data["compression"]["reduction"], + time_ms=data["compression"]["time_ms"], ) return SendResponse(choices=choices, usage=usage, compression=compression) @@ -289,7 +304,7 @@ def _handle_non_streaming_response(self, request: Request) -> SendResponse: def _handle_streaming_response(self, request: Request): """Handle streaming response, yielding StreamChunk objects.""" try: - with urlopen(request) as response: + with urlopen(request, context=_ssl_context()) as response: # Read and parse SSE stream for line in response: decoded_line = line.decode("utf-8") diff --git a/example/compression.py b/example/compression.py index 4dce59e..67c6128 100644 --- a/example/compression.py +++ b/example/compression.py @@ -87,7 +87,7 @@ Based on this context, summarize the key milestones in AI development in 3 bullet points.""" response = edgee.send( - model="gpt-5.2", + model="anthropic/claude-haiku-4-5", input={ "messages": [ {"role": "user", "content": user_message}, @@ -111,21 +111,17 @@ # Display compression information if response.compression: print("Compression Metrics:") - print(f" Input tokens: {response.compression.input_tokens}") print(f" Saved tokens: {response.compression.saved_tokens}") - print(f" Compression rate: {response.compression.rate:.2%}") - savings_pct = ( - (response.compression.saved_tokens / response.compression.input_tokens * 100) - if response.compression.input_tokens > 0 - else 0 - ) - print(f" Savings: {savings_pct:.1f}% of input tokens saved!") - print() - print(" 💡 Without compression, this request would have used") - print(f" {response.compression.input_tokens} input tokens.") - print( - f" With compression, only {response.compression.input_tokens - response.compression.saved_tokens} tokens were processed!" - ) + print(f" Reduction: {response.compression.reduction}%") + print(f" Cost savings: ${response.compression.cost_savings / 1_000_000:.3f}") + print(f" Time: {response.compression.time_ms} ms") + if response.compression.reduction > 0: + original_tokens = response.compression.saved_tokens * 100 // response.compression.reduction + tokens_after = original_tokens - response.compression.saved_tokens + print() + print(" 💡 Without compression, this request would have used") + print(f" {original_tokens} input tokens.") + print(f" With compression, only {tokens_after} tokens were processed!") else: print("No compression data available in response.") print("Note: Compression data is only returned when compression is enabled") diff --git a/example/test.py b/example/test.py index d1f3ee5..d522f9b 100644 --- a/example/test.py +++ b/example/test.py @@ -13,7 +13,7 @@ # Test 1: Simple string input print("Test 1: Simple string input") response1 = edgee.send( - model="mistral/mistral-small-latest", + model="anthropic/claude-haiku-4-5", input="What is the capital of France?", ) print(f"Content: {response1.text}") @@ -23,7 +23,7 @@ # Test 2: Full input object with messages print("Test 2: Full input object with messages") response2 = edgee.send( - model="mistral/mistral-small-latest", + model="anthropic/claude-haiku-4-5", input={ "messages": [ {"role": "system", "content": "You are a helpful assistant."}, @@ -37,7 +37,7 @@ # Test 3: With tools print("Test 3: With tools") response3 = edgee.send( - model="gpt-5.2", + model="anthropic/claude-haiku-4-5", input={ "messages": [{"role": "user", "content": "What is the weather in Paris?"}], "tools": [ @@ -65,7 +65,7 @@ # Test 4: Streaming print("Test 4: Streaming") -for chunk in edgee.stream(model="mistral/mistral-small-latest", input="What is Python?"): +for chunk in edgee.stream(model="anthropic/claude-haiku-4-5", input="What is Python?"): if chunk.text: print(chunk.text, end="", flush=True) print("\n") diff --git a/pyproject.toml b/pyproject.toml index f654b57..08126d7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "edgee" -version = "1.0.1" +version = "1.0.2" description = "Lightweight Python SDK for Edgee AI Gateway" readme = "README.md" license = "Apache-2.0" @@ -25,6 +25,8 @@ Repository = "https://github.com/edgee-ai/python-sdk" [project.optional-dependencies] dev = ["pytest>=8.0.0", "ruff>=0.8.0"] +# Install certifi to fix SSL certificate verification on macOS (Python.org installs) +ssl = ["certifi>=2024.0.0"] [build-system] requires = ["hatchling"] diff --git a/tests/test_edgee.py b/tests/test_edgee.py index 520d714..5e1fd4c 100644 --- a/tests/test_edgee.py +++ b/tests/test_edgee.py @@ -320,9 +320,10 @@ def test_send_with_compression_response(self, mock_urlopen): ], "usage": {"prompt_tokens": 100, "completion_tokens": 50, "total_tokens": 150}, "compression": { - "input_tokens": 100, "saved_tokens": 42, - "rate": 0.6102003642987249, + "cost_savings": 27000, + "reduction": 48, + "time_ms": 150, }, } mock_urlopen.return_value = self._mock_response(mock_response_data) @@ -331,9 +332,10 @@ def test_send_with_compression_response(self, mock_urlopen): result = client.send(model="gpt-4", input="Test") assert result.compression is not None - assert result.compression.input_tokens == 100 assert result.compression.saved_tokens == 42 - assert result.compression.rate == 0.6102003642987249 + assert result.compression.cost_savings == 27000 + assert result.compression.reduction == 48 + assert result.compression.time_ms == 150 @patch("edgee.urlopen") def test_send_without_compression_response(self, mock_urlopen): diff --git a/uv.lock b/uv.lock index 8842f95..5789609 100644 --- a/uv.lock +++ b/uv.lock @@ -13,7 +13,7 @@ wheels = [ [[package]] name = "edgee" -version = "1.0.0" +version = "1.0.1" source = { editable = "." } [package.optional-dependencies] From f995b8646aee93ce644050a887109e62833056f9 Mon Sep 17 00:00:00 2001 From: SachaMorard <2254275+SachaMorard@users.noreply.github.com> Date: Fri, 6 Mar 2026 09:31:10 +0100 Subject: [PATCH 2/3] chore: update dependencies --- uv.lock | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/uv.lock b/uv.lock index 5789609..f341822 100644 --- a/uv.lock +++ b/uv.lock @@ -2,6 +2,15 @@ version = 1 revision = 3 requires-python = ">=3.10" +[[package]] +name = "certifi" +version = "2026.2.25" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/af/2d/7bf41579a8986e348fa033a31cdd0e4121114f6bce2457e8876010b092dd/certifi-2026.2.25.tar.gz", hash = "sha256:e887ab5cee78ea814d3472169153c2d12cd43b14bd03329a39a9c6e2e80bfba7", size = 155029, upload-time = "2026-02-25T02:54:17.342Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9a/3c/c17fb3ca2d9c3acff52e30b309f538586f9f5b9c9cf454f3845fc9af4881/certifi-2026.2.25-py3-none-any.whl", hash = "sha256:027692e4402ad994f1c42e52a4997a9763c646b73e4096e4d5d6db8af1d6f0fa", size = 153684, upload-time = "2026-02-25T02:54:15.766Z" }, +] + [[package]] name = "colorama" version = "0.4.6" @@ -13,7 +22,7 @@ wheels = [ [[package]] name = "edgee" -version = "1.0.1" +version = "1.0.2" source = { editable = "." } [package.optional-dependencies] @@ -21,6 +30,9 @@ dev = [ { name = "pytest" }, { name = "ruff" }, ] +ssl = [ + { name = "certifi" }, +] [package.dev-dependencies] dev = [ @@ -30,10 +42,11 @@ dev = [ [package.metadata] requires-dist = [ + { name = "certifi", marker = "extra == 'ssl'", specifier = ">=2024.0.0" }, { name = "pytest", marker = "extra == 'dev'", specifier = ">=8.0.0" }, { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.8.0" }, ] -provides-extras = ["dev"] +provides-extras = ["dev", "ssl"] [package.metadata.requires-dev] dev = [ From c9848964aaf8dac0a5c28e6e85cce4f5cdd33f16 Mon Sep 17 00:00:00 2001 From: SachaMorard <2254275+SachaMorard@users.noreply.github.com> Date: Fri, 6 Mar 2026 09:32:57 +0100 Subject: [PATCH 3/3] chore: reformat --- example/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/example/test.py b/example/test.py index d522f9b..3f5b624 100644 --- a/example/test.py +++ b/example/test.py @@ -37,7 +37,7 @@ # Test 3: With tools print("Test 3: With tools") response3 = edgee.send( - model="anthropic/claude-haiku-4-5", + model="anthropic/claude-haiku-4-5", input={ "messages": [{"role": "user", "content": "What is the weather in Paris?"}], "tools": [