Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ edgee = Edgee("your-api-key")

# Send a simple request
response = edgee.send(
model="gpt-5.2",
model="anthropic/claude-haiku-4-5",
input="What is the capital of France?"
)

Expand All @@ -34,7 +34,7 @@ The `send()` method makes non-streaming chat completion requests:

```python
response = edgee.send(
model="gpt-5.2",
model="anthropic/claude-haiku-4-5",
input="Hello, world!"
)

Expand All @@ -48,17 +48,18 @@ if response.usage:
print(f"Tokens used: {response.usage.total_tokens}")

if response.compression:
print(f"Input tokens: {response.compression.input_tokens}")
print(f"Saved tokens: {response.compression.saved_tokens}")
print(f"Compression rate: {response.compression.rate}")
print(f"Reduction: {response.compression.reduction}%")
print(f"Cost savings: ${response.compression.cost_savings / 1_000_000:.3f}")
print(f"Time: {response.compression.time_ms} ms")
```

## Stream Method

The `stream()` method enables real-time streaming responses:

```python
for chunk in edgee.stream("gpt-5.2", "Tell me a story"):
for chunk in edgee.stream("anthropic/claude-haiku-4-5", "Tell me a story"):
if chunk.text:
print(chunk.text, end="", flush=True)

Expand Down
27 changes: 21 additions & 6 deletions edgee/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import json
import os
import ssl
from dataclasses import dataclass
from urllib.error import HTTPError
from urllib.request import Request, urlopen
Expand All @@ -11,6 +12,18 @@
API_ENDPOINT = "/v1/chat/completions"


def _ssl_context() -> ssl.SSLContext:
"""Create SSL context. Uses certifi's CA bundle when available (fixes cert issues on macOS)."""
ctx = ssl.create_default_context()
try:
import certifi

ctx.load_verify_locations(certifi.where())
except ImportError:
pass # Use default system/store certs
return ctx


@dataclass
class FunctionDefinition:
name: str
Expand Down Expand Up @@ -70,9 +83,10 @@ class Usage:

@dataclass
class Compression:
input_tokens: int
saved_tokens: int
rate: float
cost_savings: int # micro-units (e.g. 27000 = $0.027)
reduction: int # percentage (e.g. 48 = 48%)
time_ms: int # milliseconds


@dataclass
Expand Down Expand Up @@ -253,7 +267,7 @@ def send(
def _handle_non_streaming_response(self, request: Request) -> SendResponse:
"""Handle non-streaming response."""
try:
with urlopen(request) as response:
with urlopen(request, context=_ssl_context()) as response:
data = json.loads(response.read().decode("utf-8"))
except HTTPError as e:
error_body = e.read().decode("utf-8")
Expand All @@ -279,17 +293,18 @@ def _handle_non_streaming_response(self, request: Request) -> SendResponse:
compression = None
if "compression" in data:
compression = Compression(
input_tokens=data["compression"]["input_tokens"],
saved_tokens=data["compression"]["saved_tokens"],
rate=data["compression"]["rate"],
cost_savings=data["compression"]["cost_savings"],
reduction=data["compression"]["reduction"],
time_ms=data["compression"]["time_ms"],
)

return SendResponse(choices=choices, usage=usage, compression=compression)

def _handle_streaming_response(self, request: Request):
"""Handle streaming response, yielding StreamChunk objects."""
try:
with urlopen(request) as response:
with urlopen(request, context=_ssl_context()) as response:
# Read and parse SSE stream
for line in response:
decoded_line = line.decode("utf-8")
Expand Down
26 changes: 11 additions & 15 deletions example/compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@
Based on this context, summarize the key milestones in AI development in 3 bullet points."""

response = edgee.send(
model="gpt-5.2",
model="anthropic/claude-haiku-4-5",
input={
"messages": [
{"role": "user", "content": user_message},
Expand All @@ -111,21 +111,17 @@
# Display compression information
if response.compression:
print("Compression Metrics:")
print(f" Input tokens: {response.compression.input_tokens}")
print(f" Saved tokens: {response.compression.saved_tokens}")
print(f" Compression rate: {response.compression.rate:.2%}")
savings_pct = (
(response.compression.saved_tokens / response.compression.input_tokens * 100)
if response.compression.input_tokens > 0
else 0
)
print(f" Savings: {savings_pct:.1f}% of input tokens saved!")
print()
print(" 💡 Without compression, this request would have used")
print(f" {response.compression.input_tokens} input tokens.")
print(
f" With compression, only {response.compression.input_tokens - response.compression.saved_tokens} tokens were processed!"
)
print(f" Reduction: {response.compression.reduction}%")
print(f" Cost savings: ${response.compression.cost_savings / 1_000_000:.3f}")
print(f" Time: {response.compression.time_ms} ms")
if response.compression.reduction > 0:
original_tokens = response.compression.saved_tokens * 100 // response.compression.reduction
tokens_after = original_tokens - response.compression.saved_tokens
print()
print(" 💡 Without compression, this request would have used")
print(f" {original_tokens} input tokens.")
print(f" With compression, only {tokens_after} tokens were processed!")
else:
print("No compression data available in response.")
print("Note: Compression data is only returned when compression is enabled")
Expand Down
8 changes: 4 additions & 4 deletions example/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# Test 1: Simple string input
print("Test 1: Simple string input")
response1 = edgee.send(
model="mistral/mistral-small-latest",
model="anthropic/claude-haiku-4-5",
input="What is the capital of France?",
)
print(f"Content: {response1.text}")
Expand All @@ -23,7 +23,7 @@
# Test 2: Full input object with messages
print("Test 2: Full input object with messages")
response2 = edgee.send(
model="mistral/mistral-small-latest",
model="anthropic/claude-haiku-4-5",
input={
"messages": [
{"role": "system", "content": "You are a helpful assistant."},
Expand All @@ -37,7 +37,7 @@
# Test 3: With tools
print("Test 3: With tools")
response3 = edgee.send(
model="gpt-5.2",
model="anthropic/claude-haiku-4-5",
input={
"messages": [{"role": "user", "content": "What is the weather in Paris?"}],
"tools": [
Expand Down Expand Up @@ -65,7 +65,7 @@

# Test 4: Streaming
print("Test 4: Streaming")
for chunk in edgee.stream(model="mistral/mistral-small-latest", input="What is Python?"):
for chunk in edgee.stream(model="anthropic/claude-haiku-4-5", input="What is Python?"):
if chunk.text:
print(chunk.text, end="", flush=True)
print("\n")
4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "edgee"
version = "1.0.1"
version = "1.0.2"
description = "Lightweight Python SDK for Edgee AI Gateway"
readme = "README.md"
license = "Apache-2.0"
Expand All @@ -25,6 +25,8 @@ Repository = "https://github.com/edgee-ai/python-sdk"

[project.optional-dependencies]
dev = ["pytest>=8.0.0", "ruff>=0.8.0"]
# Install certifi to fix SSL certificate verification on macOS (Python.org installs)
ssl = ["certifi>=2024.0.0"]

[build-system]
requires = ["hatchling"]
Expand Down
10 changes: 6 additions & 4 deletions tests/test_edgee.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,9 +320,10 @@ def test_send_with_compression_response(self, mock_urlopen):
],
"usage": {"prompt_tokens": 100, "completion_tokens": 50, "total_tokens": 150},
"compression": {
"input_tokens": 100,
"saved_tokens": 42,
"rate": 0.6102003642987249,
"cost_savings": 27000,
"reduction": 48,
"time_ms": 150,
},
}
mock_urlopen.return_value = self._mock_response(mock_response_data)
Expand All @@ -331,9 +332,10 @@ def test_send_with_compression_response(self, mock_urlopen):
result = client.send(model="gpt-4", input="Test")

assert result.compression is not None
assert result.compression.input_tokens == 100
assert result.compression.saved_tokens == 42
assert result.compression.rate == 0.6102003642987249
assert result.compression.cost_savings == 27000
assert result.compression.reduction == 48
assert result.compression.time_ms == 150

@patch("edgee.urlopen")
def test_send_without_compression_response(self, mock_urlopen):
Expand Down
17 changes: 15 additions & 2 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading