Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
b3ea9b8
apply speculative TURN refresh fixes + diagnostic logging
Copilot Apr 29, 2026
f01167b
address review: preserve stun_message_find_error predicate; comment fix
Copilot Apr 29, 2026
53c7298
add round-2 fixes: suppress fatal signal when siblings alive, per-can…
Copilot Apr 29, 2026
f46d0ac
address review: log "never" for unset last_event; clarify off-by-one …
Copilot Apr 29, 2026
16e1a6a
logs
havardgraff Apr 29, 2026
a3ade21
logs
havardgraff Apr 29, 2026
228dd34
Speculative-fix #13: send TURN lifetime=0 release exactly once
Copilot Apr 29, 2026
289d278
new client logs
havardgraff Apr 29, 2026
19cf7b2
new pcap
havardgraff Apr 29, 2026
4756f60
chrome packet capture
havardgraff Apr 29, 2026
b5bb7f2
turn: fix refresh timers attached to wrong main context, add diagnost…
Copilot Apr 29, 2026
7069788
cleanup: remove pcaps/captured logs, drop heartbeat machinery, keep R…
Copilot Apr 29, 2026
04b5aca
tests: add regression tests for GMainContext bug and refresh-interval…
Copilot Apr 29, 2026
1ee0910
tests: wire new tests into meson.build (drop autotools wiring)
Copilot Apr 29, 2026
84b2e48
turn: cache realm/nonce on refresh; add test-only timeout knobs
Copilot Apr 29, 2026
b12e38b
conncheck/tests: fix ms overflow, deflake mainctx test, exercise TURN…
Copilot Apr 29, 2026
dcc6af9
turn: fix Source-ID-not-found crash on socket_close (timers attached …
Copilot Apr 29, 2026
1b829d7
socket/turn: fix Source-ID-not-found crash; tcp-bsd parens; agent.c f…
Copilot Apr 29, 2026
a099a24
socket/turn: log when priv_source_remove_with_context finds no source…
Copilot Apr 29, 2026
6415153
conncheck: honor NICE_TURN_EXPIRE_TIMEOUT for ALLOCATE refresh schedu…
Copilot Apr 29, 2026
3c82a14
conncheck: tear down TURN allocation when stale-nonce retries reach MAX
Copilot Apr 29, 2026
14f589d
conncheck: cap consecutive stale-nonce retries at 4
Copilot Apr 29, 2026
5313ce6
fixup we want 5
havardgraff Apr 29, 2026
e242069
conncheck: cache 200 OK ALLOCATE response so first Refresh uses fresh…
Copilot Apr 30, 2026
995c318
propagate Refresh-rotated NONCE to TURN socket credential cache
Copilot Apr 30, 2026
07f0549
conncheck: re-arm periodic Refresh on stale-nonce exhaustion instead …
Copilot Apr 30, 2026
efc1453
conncheck: re-read NICE_TURN_EXPIRE_TIMEOUT each refresh; warn when r…
Copilot Apr 30, 2026
d816113
remove accidentally committed config.log
Copilot Apr 30, 2026
571c472
Update agent/discovery.h
havardgraff Apr 30, 2026
4366992
Update tests/test-turn-refresh-interval.c
havardgraff Apr 30, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion agent/agent-priv.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,6 @@ struct _NiceAgent
GSource *event_source;

gboolean full_mode; /* property: full-mode */
GTimeVal next_check_tv; /* property: next conncheck timestamp */
gchar *stun_server_ip; /* property: STUN server IP */
guint stun_server_port; /* property: STUN server port */
gchar *proxy_ip; /* property: Proxy server IP */
Expand Down
4 changes: 2 additions & 2 deletions agent/agent.c
Original file line number Diff line number Diff line change
Expand Up @@ -3281,13 +3281,13 @@ _priv_set_socket_tos (NiceAgent * agent, NiceSocket * sock, gint tos)
if (sock->fileno &&
setsockopt (g_socket_get_fd (sock->fileno), IPPROTO_IP,
IP_TOS, (const char *) &tos, sizeof (tos)) < 0) {
GST_WARNING_OBJECT (agent, "Could not set socket ToS", g_strerror (errno));
GST_WARNING_OBJECT (agent, "Could not set socket ToS: %s", g_strerror (errno));
}
#ifdef IPV6_TCLASS
if (sock->fileno &&
setsockopt (g_socket_get_fd (sock->fileno), IPPROTO_IPV6,
IPV6_TCLASS, (const char *) &tos, sizeof (tos)) < 0) {
GST_DEBUG_OBJECT (agent, "Could not set IPV6 socket ToS",
GST_DEBUG_OBJECT (agent, "Could not set IPV6 socket ToS: %s",
g_strerror (errno));
}
#endif
Expand Down
527 changes: 462 additions & 65 deletions agent/conncheck.c

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion agent/conncheck.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ struct _CandidateCheckPair
gboolean controlling;
gboolean timer_restarted;
guint64 priority;
GTimeVal next_tick; /* next tick timestamp */
gint64 next_tick; /* next tick timestamp, wall-clock microseconds (g_get_real_time) */
StunTimer timer;
uint8_t stun_buffer[STUN_MAX_MESSAGE_SIZE];
StunMessage stun_message;
Expand Down
51 changes: 32 additions & 19 deletions agent/discovery.c
Original file line number Diff line number Diff line change
Expand Up @@ -65,11 +65,9 @@
GST_DEBUG_CATEGORY_EXTERN (niceagent_debug);
#define GST_CAT_DEFAULT niceagent_debug

static inline int priv_timer_expired (GTimeVal *timer, GTimeVal *now)
static inline int priv_timer_expired (gint64 timer, gint64 now)
{
return (now->tv_sec == timer->tv_sec) ?
now->tv_usec >= timer->tv_usec :
now->tv_sec >= timer->tv_sec;
return now >= timer;
}

/*
Expand Down Expand Up @@ -148,6 +146,16 @@ void refresh_free_item (gpointer data, gpointer user_data)

g_assert (user_data == NULL);

GST_INFO_OBJECT (agent,
"%u/%u: Freeing TURN refresh candidate %p "
"(refresh_count=%u, last_lifetime=%u s, "
"consecutive_stale_nonce=%u); sending REFRESH lifetime=0 to "
"release the allocation",
cand->stream ? cand->stream->id : 0,
cand->component ? cand->component->id : 0,
cand, cand->refresh_count,
cand->last_lifetime_s, cand->consecutive_stale_nonce);

if (cand->timer_source != NULL) {
g_source_destroy (cand->timer_source);
g_source_unref (cand->timer_source);
Expand Down Expand Up @@ -189,13 +197,18 @@ void refresh_free_item (gpointer data, gpointer user_data)
nice_address_copy_to_sockaddr(&cand->server, (struct sockaddr *)&server_address);
stun_message_log(&cand->stun_message, TRUE, (struct sockaddr *)&server_address);

/* send the refresh twice since we won't do retransmissions */
/* RFC 5766 §7: the release REFRESH (lifetime=0) is purely
* advisory. We have already forgotten the transaction above and
* the server keeps its own allocation-expiry timer (last granted
* lifetime, max 600 s) as a backstop. Sending it twice -- which
* the original code did as a poor-man's retransmission -- causes
* TURN servers to process the duplicate as a separate request,
* yielding either a second STUN response that we can no longer
* match (logged as "*** ERROR *** unmatched stun response …") or
* a spurious 437 Allocation Mismatch on the duplicate when the
* first request has just succeeded. Send exactly once. */
nice_socket_send (cand->nicesock, &cand->server,
buffer_len, (gchar *)cand->stun_buffer);
if (!nice_socket_is_reliable (cand->nicesock)) {
nice_socket_send (cand->nicesock, &cand->server,
buffer_len, (gchar *)cand->stun_buffer);
}

}

Expand Down Expand Up @@ -931,7 +944,9 @@ static gboolean priv_discovery_tick_unlocked (gpointer pointer)
&cand->stun_message, cand->stun_buffer, sizeof(cand->stun_buffer),
cand->stun_resp_msg.buffer == NULL ? NULL : &cand->stun_resp_msg,
STUN_USAGE_TURN_REQUEST_PORT_NORMAL,
-1, -1,
/* RFC 5766 §6.1: explicitly request LIFETIME=600 s
* rather than relying on the server's default. */
-1, 600,
username, username_len,
password, password_len,
turn_compat);
Expand Down Expand Up @@ -966,7 +981,7 @@ static gboolean priv_discovery_tick_unlocked (gpointer pointer)
buffer_len, (gchar *)cand->stun_buffer);

/* case: success, start waiting for the result */
g_get_current_time (&cand->next_tick);
cand->next_tick = g_get_real_time ();

} else {
/* case: error in starting discovery, start the next discovery */
Expand All @@ -984,16 +999,16 @@ static gboolean priv_discovery_tick_unlocked (gpointer pointer)
}

if (cand->done != TRUE) {
GTimeVal now;
gint64 now;

g_get_current_time (&now);
now = g_get_real_time ();

if (cand->stun_message.buffer == NULL) {
GST_DEBUG_OBJECT (agent, "%u/%u: STUN discovery was cancelled, marking discovery done.",
cand->stream->id, cand->component->id);
cand->done = TRUE;
}
else if (priv_timer_expired (&cand->next_tick, &now)) {
else if (priv_timer_expired (cand->next_tick, now)) {
switch (stun_timer_refresh (&cand->timer)) {
case STUN_USAGE_TIMER_RETURN_TIMEOUT:
{
Expand Down Expand Up @@ -1031,9 +1046,8 @@ static gboolean priv_discovery_tick_unlocked (gpointer pointer)
stun_message_length (&cand->stun_message),
(gchar *)cand->stun_buffer);

/* note: convert from milli to microseconds for g_time_val_add() */
cand->next_tick = now;
g_time_val_add (&cand->next_tick, timeout * 1000);
/* note: convert from milli to microseconds */
cand->next_tick = now + (gint64) timeout * 1000;

++not_done; /* note: retry later */
break;
Expand All @@ -1042,8 +1056,7 @@ static gboolean priv_discovery_tick_unlocked (gpointer pointer)
{
unsigned int timeout = stun_timer_remainder (&cand->timer);

cand->next_tick = now;
g_time_val_add (&cand->next_tick, timeout * 1000);
cand->next_tick = now + (gint64) timeout * 1000;

++not_done; /* note: retry later */
break;
Expand Down
41 changes: 40 additions & 1 deletion agent/discovery.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ typedef struct
NiceCandidateType type; /**< candidate type STUN or TURN */
NiceSocket *nicesock; /**< XXX: should be taken from local cand: existing socket to use */
NiceAddress server; /**< STUN/TURN server address */
GTimeVal next_tick; /**< next tick timestamp */
gint64 next_tick; /**< next tick timestamp, wall-clock microseconds (g_get_real_time) */
gboolean pending; /**< is discovery in progress? */
gboolean done; /**< is discovery complete? */
Stream *stream;
Expand Down Expand Up @@ -87,8 +87,47 @@ typedef struct
StunMessage stun_message;
uint8_t stun_resp_buffer[STUN_MAX_MESSAGE_SIZE];
StunMessage stun_resp_msg;

/*
* Robustness counters used by the TURN refresh code.
*
* - refresh_count: how many Refresh requests we have sent on this
* allocation (including resends after 438). Used in log lines so
* that "is this the first refresh, or is it stuck in a retry
* loop?" can be answered from the log.
* - consecutive_stale_nonce: how many 438/401-realm-changed responses
* we have received in a row without an intervening success. Reset
* to zero on any RELAY_SUCCESS response. Compared against
* NICE_TURN_MAX_CONSECUTIVE_STALE_NONCE: when the counter reaches
* the limit, the refresh logic backs off and re-arms the periodic
* refresh instead of immediately failing the allocation.
* - last_lifetime_s: lifetime (seconds) granted by the most recent
* successful Allocate / Refresh response. Used both for log lines
* and for the release REFRESH at teardown.
* - tolerate_one_timeout: when TRUE, the next retransmission timeout
* in priv_turn_allocate_refresh_retransmissions_tick will trigger
* one extra refresh attempt rather than tearing down the
* allocation. Set automatically after every successful refresh so
* that a single lost refresh does not kill the allocation.
*/
guint refresh_count;
guint consecutive_stale_nonce;
guint32 last_lifetime_s;
gboolean tolerate_one_timeout;
} CandidateRefresh;

/* How many Refresh transactions in total we will send on a single
* candidate while the server keeps returning 438 (Stale Nonce) /
* 401 (realm changed). RFC 5389 only mandates one retry, but real-world
* TURN servers (notably coturn with short stale-nonce values) can
* rotate the nonce again between our retry being sent and reaching
* them, so be a little more lenient — but not so lenient that a
* misbehaving server can keep us looping for a long time. With siblings
* (e.g. an RTP+RTCP pair sharing one TURN server) the total Refresh
* traffic generated for one component is bounded by
* NICE_TURN_MAX_CONSECUTIVE_STALE_NONCE * <number of sibling refreshes>. */
#define NICE_TURN_MAX_CONSECUTIVE_STALE_NONCE 5

void refresh_free_item (gpointer data, gpointer user_data);
void refresh_free (NiceAgent *agent);
void refresh_prune_stream (NiceAgent *agent, guint stream_id);
Expand Down
2 changes: 1 addition & 1 deletion meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -90,5 +90,5 @@ subdir('agent')
subdir('nice')
subdir('gst')

#subdir('tests')
subdir('tests')
#subdir('python')
6 changes: 3 additions & 3 deletions socket/tcp-bsd.c
Original file line number Diff line number Diff line change
Expand Up @@ -348,9 +348,9 @@ socket_send_more (
}

if (ret < 0) {
if(gerr != NULL &&
g_error_matches (gerr, G_IO_ERROR, G_IO_ERROR_WOULD_BLOCK)
|| g_error_matches (gerr, G_IO_ERROR, G_IO_ERROR_NOT_CONNECTED)) {
if (gerr != NULL &&
(g_error_matches (gerr, G_IO_ERROR, G_IO_ERROR_WOULD_BLOCK) ||
g_error_matches (gerr, G_IO_ERROR, G_IO_ERROR_NOT_CONNECTED))) {
add_to_be_sent (sock, tbs->buf, tbs->length, TRUE);
g_free (tbs->buf);
g_slice_free (struct to_be_sent, tbs);
Expand Down
Loading