Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .sampo/changesets/django-tracing-header-sanitization.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
pypi/posthog: patch
---

Sanitize PostHog tracing headers extracted by Django middleware.
48 changes: 41 additions & 7 deletions posthog/integrations/django.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from typing import TYPE_CHECKING, cast
import re
from typing import TYPE_CHECKING, Optional, cast

from posthog import contexts
from posthog.client import Client

Expand All @@ -18,7 +20,36 @@ def markcoroutinefunction(func):

if TYPE_CHECKING:
from django.http import HttpRequest, HttpResponse # noqa: F401
from typing import Callable, Dict, Any, Optional, Union, Awaitable # noqa: F401
from typing import Callable, Dict, Any, Union, Awaitable # noqa: F401


_MAX_TRACING_HEADER_LENGTH = 1000
Comment thread
dustinbyrne marked this conversation as resolved.
_TRACING_HEADER_CONTROL_CHARS_RE = re.compile(r"[\x00-\x1f\x7f-\x9f]")


def _sanitize_tracing_header_value(value) -> Optional[str]:
"""Return a safe tracing header value, or None if the value is invalid.

Tracing headers come from user-controlled HTTP requests and are copied into event properties.
Match the PostHog app's header sanitization: accept strings only, remove C0/C1 control
characters, trim surrounding whitespace, cap length, and drop empty results.
"""
if not isinstance(value, str) or not value:
return None

return (
_TRACING_HEADER_CONTROL_CHARS_RE.sub("", value).strip()[
:_MAX_TRACING_HEADER_LENGTH
]
or None
)


def _get_sanitized_tracing_header(request, header_name) -> Optional[str]:
try:
return _sanitize_tracing_header_value(request.headers.get(header_name))
except Exception:
return None


class PosthogContextMiddleware:
Expand All @@ -42,9 +73,10 @@ class PosthogContextMiddleware:
You can use the `POSTHOG_MW_TAG_MAP` function to remove any default tags you don't want to capture, or override them with your own values.

Context tags are automatically included as properties on all events captured within a context, including exceptions.
See the context documentation for more information. The extracted distinct ID and session ID, if found, are used to
associate all events captured in the middleware context with the same distinct ID and session as currently active on the
frontend. See the documentation for `set_context_session` and `identify_context` for more details.
See the context documentation for more information. The extracted distinct ID and session ID,
if found, are used to associate all events captured in the middleware context with the same distinct ID
and session as currently active on the frontend. See the documentation for `set_context_session`
and `identify_context` for more details.

This middleware is hybrid-capable: it supports both WSGI (sync) and ASGI (async) Django applications. The middleware
detects at initialization whether the next middleware in the chain is async or sync, and adapts its behavior accordingly.
Expand Down Expand Up @@ -126,12 +158,14 @@ def _build_tags(self, request, user_id, user_email):
tags = {}

# Extract session ID from X-POSTHOG-SESSION-ID header
session_id = request.headers.get("X-POSTHOG-SESSION-ID")
session_id = _get_sanitized_tracing_header(request, "X-POSTHOG-SESSION-ID")
if session_id:
contexts.set_context_session(session_id)

# Extract distinct ID from X-POSTHOG-DISTINCT-ID header or request user id
distinct_id = request.headers.get("X-POSTHOG-DISTINCT-ID") or user_id
distinct_id = (
_get_sanitized_tracing_header(request, "X-POSTHOG-DISTINCT-ID") or user_id
)
if distinct_id:
contexts.identify_context(distinct_id)

Expand Down
68 changes: 68 additions & 0 deletions posthog/test/integrations/test_middleware.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import unittest
from unittest.mock import Mock, patch
import asyncio
from parameterized import parameterized

# Configure Django settings before importing middleware
import django
Expand Down Expand Up @@ -132,6 +133,73 @@ def test_extract_tags_partial_headers(self):
self.assertIsNone(get_context_distinct_id())
self.assertEqual(tags["$request_method"], "PUT")

@parameterized.expand(
[
(
"session_control_chars",
"X-POSTHOG-SESSION-ID",
" session\n-\t123\x85 ",
get_context_session_id,
"session-123",
None,
),
(
"distinct_empty_falls_back_to_user",
"X-POSTHOG-DISTINCT-ID",
"\r\n ",
get_context_distinct_id,
"42",
42,
),
]
)
def test_extract_tags_sanitizes_tracing_header(
self, _name, header_name, raw_value, get_context_value, expected_value, user_pk
):
"""Test tracing header values are sanitized before entering context."""

with new_context():
middleware = self.create_middleware()
request = MockRequest(headers={header_name: raw_value}, method="GET")
if user_pk is not None:
user = Mock()
user.is_authenticated = True
user.pk = user_pk
request.user = user

middleware.extract_tags(request)

self.assertEqual(get_context_value(), expected_value)

@parameterized.expand(
[
(
"session_non_string",
"X-POSTHOG-SESSION-ID",
123,
get_context_session_id,
),
(
"distinct_non_string",
"X-POSTHOG-DISTINCT-ID",
object(),
get_context_distinct_id,
),
]
)
def test_extract_tags_ignores_non_string_tracing_header(
self, _name, header_name, raw_value, get_context_value
):
"""Test non-string tracing header values are ignored without throwing."""

with new_context():
middleware = self.create_middleware()
request = MockRequest(headers={header_name: raw_value}, method="GET")

middleware.extract_tags(request)

self.assertIsNone(get_context_value())

def test_extract_tags_with_extra_tags(self):
"""Test tag extraction with extra_tags function"""

Comment thread
dustinbyrne marked this conversation as resolved.
Expand Down
Loading