From 881ddaf1ab9f92ebf5fdc99c7f0ebc4b6cb3aa4a Mon Sep 17 00:00:00 2001 From: Anders Kaseorg Date: Mon, 13 Apr 2026 14:34:24 -0700 Subject: [PATCH 1/8] Use QualName for ParserSink::ElemName Signed-off-by: Anders Kaseorg --- treedom/src/parser.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/treedom/src/parser.rs b/treedom/src/parser.rs index 2984bc5..c81782c 100644 --- a/treedom/src/parser.rs +++ b/treedom/src/parser.rs @@ -108,7 +108,7 @@ impl ParserSink { impl markup5ever::interface::TreeSink for ParserSink { type Output = Self; type Handle = ego_tree::NodeId; - type ElemName<'a> = markup5ever::ExpandedName<'a>; + type ElemName<'a> = &'a markup5ever::QualName; // Consume this sink and return the overall result of parsing. fn finish(self) -> Self::Output { @@ -165,7 +165,7 @@ impl markup5ever::interface::TreeSink for ParserSink { let item = self.tree_mut().get(*target).unwrap(); if let Some(x) = item.value().element() { - x.name.expanded() + &x.name } else { unreachable!("target is not a element"); } From c7ce9f4c26efe7b2fc4c31856e8fbc22d638f982 Mon Sep 17 00:00:00 2001 From: Anders Kaseorg Date: Mon, 13 Apr 2026 14:43:54 -0700 Subject: [PATCH 2/8] Replace UnsafeCell with RefCell in ParserSink Signed-off-by: Anders Kaseorg --- treedom/src/parser.rs | 106 ++++++++++++++++++++++-------------------- 1 file changed, 56 insertions(+), 50 deletions(-) diff --git a/treedom/src/parser.rs b/treedom/src/parser.rs index c81782c..698baab 100644 --- a/treedom/src/parser.rs +++ b/treedom/src/parser.rs @@ -1,16 +1,16 @@ use super::dom::IDTreeDOM; use super::interface; use hashbrown::HashMap; -use std::cell::{Cell, UnsafeCell}; +use std::cell::{Cell, Ref, RefCell}; /// Markup parser that implemented [`markup5ever::interface::TreeSink`] #[derive(Debug)] pub struct ParserSink { - tree: UnsafeCell>, - errors: UnsafeCell>>, + tree: RefCell>, + errors: RefCell>>, quirks_mode: Cell, - namespaces: UnsafeCell>, - lineno: UnsafeCell, + namespaces: RefCell>, + lineno: Cell, } impl Default for ParserSink { @@ -23,25 +23,19 @@ impl ParserSink { /// Creates a new [`ParserSink`] pub fn new() -> Self { Self { - tree: UnsafeCell::new(ego_tree::Tree::new(interface::Interface::new( + tree: RefCell::new(ego_tree::Tree::new(interface::Interface::new( interface::DocumentInterface, ))), - errors: UnsafeCell::new(Vec::new()), + errors: RefCell::new(Vec::new()), quirks_mode: Cell::new(markup5ever::interface::QuirksMode::NoQuirks), - namespaces: UnsafeCell::new(HashMap::new()), - lineno: UnsafeCell::new(1), + namespaces: RefCell::new(HashMap::new()), + lineno: Cell::new(1), } } - #[allow(clippy::mut_from_ref)] - fn tree_mut(&self) -> &mut ego_tree::Tree { - // SAFETY: Parser is not Send/Sync so cannot be used in multi threads. - unsafe { &mut *self.tree.get() } - } - /// Returns the errors which are detected while parsing - pub fn errors(&self) -> &Vec> { - unsafe { &*self.errors.get() } + pub fn errors(&'_ self) -> Ref<'_, Vec>> { + self.errors.borrow() } /// Returns the Quirks Mode @@ -51,7 +45,7 @@ impl ParserSink { /// Returns the line count of the parsed content (does nothing for XML) pub fn lineno(&self) -> u64 { - unsafe { *self.lineno.get() } + self.lineno.get() } /// Consumes the self and returns [`IDTreeDOM`] @@ -108,7 +102,7 @@ impl ParserSink { impl markup5ever::interface::TreeSink for ParserSink { type Output = Self; type Handle = ego_tree::NodeId; - type ElemName<'a> = &'a markup5ever::QualName; + type ElemName<'a> = Ref<'a, markup5ever::QualName>; // Consume this sink and return the overall result of parsing. fn finish(self) -> Self::Output { @@ -117,14 +111,12 @@ impl markup5ever::interface::TreeSink for ParserSink { // Signal a parse error. fn parse_error(&self, msg: std::borrow::Cow<'static, str>) { - unsafe { &mut *self.errors.get() }.push(msg); + self.errors.borrow_mut().push(msg); } // Called whenever the line number changes. fn set_current_line(&self, n: u64) { - unsafe { - *self.lineno.get() = n; - } + self.lineno.set(n); } // Set the document's quirks mode. @@ -134,13 +126,14 @@ impl markup5ever::interface::TreeSink for ParserSink { // Get a handle to the `Document` node. fn get_document(&self) -> Self::Handle { - self.tree_mut().root().id() + self.tree.borrow().root().id() } // Get a handle to a template's template contents. // The tree builder promises this will never be called with something else than a template element. fn get_template_contents(&self, target: &Self::Handle) -> Self::Handle { - let item = self.tree_mut().get(*target).unwrap(); + let tree = self.tree.borrow(); + let item = tree.get(*target).unwrap(); if let Some(x) = item.value().element() { if x.template { @@ -162,13 +155,15 @@ impl markup5ever::interface::TreeSink for ParserSink { // // Should never be called on a non-element node; feel free to panic!. fn elem_name<'a>(&'a self, target: &'a Self::Handle) -> Self::ElemName<'a> { - let item = self.tree_mut().get(*target).unwrap(); + Ref::map(self.tree.borrow(), |tree| { + let item = tree.get(*target).unwrap(); - if let Some(x) = item.value().element() { - &x.name - } else { - unreachable!("target is not a element"); - } + if let Some(x) = item.value().element() { + &x.name + } else { + unreachable!("target is not a element"); + } + }) } // Create an element. @@ -184,12 +179,11 @@ impl markup5ever::interface::TreeSink for ParserSink { flags: markup5ever::interface::ElementFlags, ) -> Self::Handle { // Keep all the namespaces in a hashmap, we need them for css selectors - unsafe { - if let Some(ref prefix) = name.prefix { - (*self.namespaces.get()).insert(prefix.clone(), name.ns.clone()); - } else if (*self.namespaces.get()).is_empty() { - (*self.namespaces.get()).insert(markup5ever::Prefix::from(""), name.ns.clone()); - } + let mut namespaces = self.namespaces.borrow_mut(); + if let Some(ref prefix) = name.prefix { + namespaces.insert(prefix.clone(), name.ns.clone()); + } else if namespaces.is_empty() { + namespaces.insert(markup5ever::Prefix::from(""), name.ns.clone()); } let element = interface::ElementInterface::from_non_atomic( @@ -199,13 +193,15 @@ impl markup5ever::interface::TreeSink for ParserSink { flags.mathml_annotation_xml_integration_point, ); - let node = self.tree_mut().orphan(interface::Interface::new(element)); + let mut tree = self.tree.borrow_mut(); + let node = tree.orphan(interface::Interface::new(element)); node.id() } // Create a comment node. fn create_comment(&self, text: tendril::StrTendril) -> Self::Handle { - let node = self.tree_mut().orphan(interface::Interface::new( + let mut tree = self.tree.borrow_mut(); + let node = tree.orphan(interface::Interface::new( interface::CommentInterface::from_non_atomic(text), )); @@ -214,7 +210,8 @@ impl markup5ever::interface::TreeSink for ParserSink { // Create a Processing Instruction node. fn create_pi(&self, target: tendril::StrTendril, data: tendril::StrTendril) -> Self::Handle { - let node = self.tree_mut().orphan(interface::Interface::new( + let mut tree = self.tree.borrow_mut(); + let node = tree.orphan(interface::Interface::new( interface::ProcessingInstructionInterface::from_non_atomic(data, target), )); @@ -228,9 +225,12 @@ impl markup5ever::interface::TreeSink for ParserSink { public_id: tendril::StrTendril, system_id: tendril::StrTendril, ) { - self.tree_mut().root_mut().append(interface::Interface::new( - interface::DoctypeInterface::from_non_atomic(name, public_id, system_id), - )); + self.tree + .borrow_mut() + .root_mut() + .append(interface::Interface::new( + interface::DoctypeInterface::from_non_atomic(name, public_id, system_id), + )); } // Append a node as the last child of the given node. If this would produce adjacent sibling text nodes, it should concatenate the text instead. @@ -241,7 +241,8 @@ impl markup5ever::interface::TreeSink for ParserSink { parent: &Self::Handle, child: markup5ever::interface::NodeOrText, ) { - let mut parent = self.tree_mut().get_mut(*parent).unwrap(); + let mut tree = self.tree.borrow_mut(); + let mut parent = tree.get_mut(*parent).unwrap(); match child { markup5ever::interface::NodeOrText::AppendNode(handle) => { @@ -272,7 +273,8 @@ impl markup5ever::interface::TreeSink for ParserSink { sibling_id: &Self::Handle, new_node_id: markup5ever::interface::NodeOrText, ) { - let mut sibling = self.tree_mut().get_mut(*sibling_id).unwrap(); + let mut tree = self.tree.borrow_mut(); + let mut sibling = tree.get_mut(*sibling_id).unwrap(); match (new_node_id, sibling.prev_sibling()) { (markup5ever::interface::NodeOrText::AppendText(text), None) => { @@ -306,7 +308,8 @@ impl markup5ever::interface::TreeSink for ParserSink { prev_item_index: &Self::Handle, child: markup5ever::interface::NodeOrText, ) { - let item = self.tree_mut().get(*item_index).unwrap(); + let tree = self.tree.borrow_mut(); + let item = tree.get(*item_index).unwrap(); if item.parent().is_some() { self.append_before_sibling(item_index, child); @@ -318,7 +321,8 @@ impl markup5ever::interface::TreeSink for ParserSink { // Add each attribute to the given element, if no attribute with that name already exists. // The tree builder promises this will never be called with something else than an element. fn add_attrs_if_missing(&self, target: &Self::Handle, attrs: Vec) { - let mut node = self.tree_mut().get_mut(*target).unwrap(); + let mut tree = self.tree.borrow_mut(); + let mut node = tree.get_mut(*target).unwrap(); if let Some(element) = node.value().element_mut() { element.attrs.extend( @@ -333,12 +337,13 @@ impl markup5ever::interface::TreeSink for ParserSink { // Detach the given node from its parent. fn remove_from_parent(&self, target: &Self::Handle) { - self.tree_mut().get_mut(*target).unwrap().detach(); + self.tree.borrow_mut().get_mut(*target).unwrap().detach(); } // Remove all the children from node and append them to new_parent. fn reparent_children(&self, node: &Self::Handle, new_parent: &Self::Handle) { - self.tree_mut() + self.tree + .borrow_mut() .get_mut(*new_parent) .unwrap() .reparent_from_id_append(*node); @@ -346,7 +351,8 @@ impl markup5ever::interface::TreeSink for ParserSink { // Returns true if the adjusted current node is an HTML integration point and the token is a start tag. fn is_mathml_annotation_xml_integration_point(&self, target: &Self::Handle) -> bool { - let item = self.tree_mut().get(*target).unwrap(); + let tree = self.tree.borrow_mut(); + let item = tree.get(*target).unwrap(); if let Some(x) = item.value().element() { x.mathml_annotation_xml_integration_point From a9448664e426aafb99a8a99f606e69d4dcb54d44 Mon Sep 17 00:00:00 2001 From: Anders Kaseorg Date: Mon, 13 Apr 2026 14:57:51 -0700 Subject: [PATCH 3/8] Remove unsafe usage from Serializer Signed-off-by: Anders Kaseorg --- treedom/src/dom.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/treedom/src/dom.rs b/treedom/src/dom.rs index 3e31f70..fff86d1 100644 --- a/treedom/src/dom.rs +++ b/treedom/src/dom.rs @@ -257,7 +257,7 @@ impl markup5ever::serialize::Serialize for Serializer<'_> { where S: markup5ever::serialize::Serializer, { - let mut traverse = unsafe { self.dom.tree.get_unchecked(self.id).traverse() }; + let mut traverse = self.dom.tree.get(self.id).unwrap().traverse(); if let markup5ever::serialize::TraversalScope::ChildrenOnly(_) = traversal_scope { traverse.next(); From b30799cae868e75b4d30db51d93af374f9915969 Mon Sep 17 00:00:00 2001 From: Anders Kaseorg Date: Mon, 13 Apr 2026 14:55:21 -0700 Subject: [PATCH 4/8] Remove unsafe marker from CssNodeRef::new_unchecked Using this incorrectly may lead to weird behavior but not memory unsafety. Signed-off-by: Anders Kaseorg --- matching/src/selectable.rs | 2 +- src/select.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/matching/src/selectable.rs b/matching/src/selectable.rs index 34b73e9..22ad947 100644 --- a/matching/src/selectable.rs +++ b/matching/src/selectable.rs @@ -15,7 +15,7 @@ impl<'a> CssNodeRef<'a> { /// # Safety /// `node` value must be element - pub unsafe fn new_unchecked(node: treedom::NodeRef<'a>) -> Self { + pub fn new_unchecked(node: treedom::NodeRef<'a>) -> Self { Self(node) } diff --git a/src/select.rs b/src/select.rs index 07ee781..235e68e 100644 --- a/src/select.rs +++ b/src/select.rs @@ -45,7 +45,7 @@ impl Iterator for PySelectInner { let node = tree.get(guard.id).unwrap(); if self.expr.matches( - unsafe { ::matching::CssNodeRef::new_unchecked(node) }, + ::matching::CssNodeRef::new_unchecked(node), None, &mut Default::default(), ) { From 7ebf668a9906fe62bef73d8644bf80e2d021a9c1 Mon Sep 17 00:00:00 2001 From: Anders Kaseorg Date: Sat, 11 Apr 2026 08:40:07 -0700 Subject: [PATCH 5/8] Mark PyParser, PySelect unsendable; remove unsafe impl Send, Sync Tendril is neither Send nor Sync, and it is unsound to pretend otherwise. Instead, we make it the user's responsibility not to use a parser from multiple threads. Fixes #10. Signed-off-by: Anders Kaseorg --- matching/src/_impl.rs | 2 -- src/parser.rs | 5 +---- src/select.rs | 2 +- 3 files changed, 2 insertions(+), 7 deletions(-) diff --git a/matching/src/_impl.rs b/matching/src/_impl.rs index f930b8c..d2d68cb 100644 --- a/matching/src/_impl.rs +++ b/matching/src/_impl.rs @@ -47,8 +47,6 @@ impl AsRef for CssTendril { } } -unsafe impl Sync for CssTendril {} - /// A [`treedom::markup5ever::LocalName`] which implemented [`cssparser::ToCss`] #[derive(Clone, PartialEq, Eq)] pub struct CssLocalName(pub treedom::markup5ever::LocalName); diff --git a/src/parser.rs b/src/parser.rs index 2f7fd9d..1efbf57 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -238,7 +238,7 @@ enum Input { /// /// This is very easy to use and allows you to stream input using `.process()` method; By this way /// you are don't worry about memory usages of huge inputs. -#[pyo3::pyclass(name = "Parser", module = "markupever._rustlib", frozen)] +#[pyo3::pyclass(name = "Parser", module = "markupever._rustlib", frozen, unsendable)] pub struct PyParser { state: parking_lot::Mutex, } @@ -405,9 +405,6 @@ impl PyParser { } } -unsafe impl Send for PyParser {} -unsafe impl Sync for PyParser {} - #[pyo3::pyfunction] #[pyo3(signature=(node, indent=4, include_self=true, is_html=None))] pub fn serialize( diff --git a/src/select.rs b/src/select.rs index 235e68e..718186d 100644 --- a/src/select.rs +++ b/src/select.rs @@ -58,7 +58,7 @@ impl Iterator for PySelectInner { } } -#[pyo3::pyclass(name = "Select", module = "markupever._rustlib", frozen)] +#[pyo3::pyclass(name = "Select", module = "markupever._rustlib", frozen, unsendable)] pub struct PySelect { inner: Arc>, } From 3fdf1b5c8edc6b89f3ed1aa2c80f9ea87b312e85 Mon Sep 17 00:00:00 2001 From: Anders Kaseorg Date: Fri, 10 Apr 2026 16:45:15 -0700 Subject: [PATCH 6/8] Remove unnecessary unsafe synchronization from ElementAttributes This is unnecessary because PyParser is not Send or Sync. Signed-off-by: Anders Kaseorg --- treedom/src/atomic.rs | 92 ---------------------------------------- treedom/src/interface.rs | 11 ++--- 2 files changed, 6 insertions(+), 97 deletions(-) diff --git a/treedom/src/atomic.rs b/treedom/src/atomic.rs index d4018f1..a891cac 100644 --- a/treedom/src/atomic.rs +++ b/treedom/src/atomic.rs @@ -1,95 +1,3 @@ -/// A synchronization primitive which can nominally be written to only once. -/// -/// Uses [`parking_lot::Once`] instead of [`std::sync::Once`]: -/// - Lower memory usage. -/// - Not required to be 'static. -/// - Relaxed memory barriers in the fast path, which can significantly improve performance on some architectures. -/// - Efficient handling of micro-contention using adaptive spinning. -pub struct OnceLock { - once: ::parking_lot::Once, - value: ::std::cell::UnsafeCell<::std::mem::MaybeUninit>, - phantom: ::std::marker::PhantomData, -} - -impl OnceLock { - pub const fn new() -> OnceLock { - OnceLock { - once: ::parking_lot::Once::new(), - value: ::std::cell::UnsafeCell::new(::std::mem::MaybeUninit::uninit()), - phantom: ::std::marker::PhantomData, - } - } - - pub fn get(&self) -> Option<&T> { - if self.once.state().done() { - Some(unsafe { (*self.value.get()).assume_init_ref() }) - } else { - None - } - } - - #[cold] - fn initialize(&self, f: F) - where - F: FnOnce() -> T, - { - let slot = &self.value; - - self.once.call_once_force(|_| { - unsafe { (*slot.get()).write(f()) }; - }); - } - - pub fn get_or_init(&self, f: F) -> &T - where - F: FnOnce() -> T, - { - if let Some(value) = self.get() { - return value; - } - - self.initialize(f); - - debug_assert!(self.once.state().done()); - - // SAFETY: The inner value has been initialized - unsafe { (*self.value.get()).assume_init_ref() } - } - - pub fn take(&mut self) -> Option { - if self.once.state().done() { - self.once = ::parking_lot::Once::new(); - // SAFETY: `self.value` is initialized and contains a valid `T`. - // `self.once` is reset, so `is_initialized()` will be false again - // which prevents the value from being read twice. - unsafe { Some((*self.value.get()).assume_init_read()) } - } else { - None - } - } -} - -impl std::fmt::Debug for OnceLock { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self.get() { - Some(val) => write!(f, "OnceLock<{:?}>", val), - None => write!(f, "OnceLock"), - } - } -} - -impl Clone for OnceLock { - fn clone(&self) -> Self { - Self::new() - } -} - -impl Default for OnceLock { - fn default() -> Self { - Self::new() - } -} - pub type AtomicTendril = tendril::Tendril; /// Makes a [`AtomicTendril`] from a non-atomic tendril diff --git a/treedom/src/interface.rs b/treedom/src/interface.rs index ec99136..bf0630d 100644 --- a/treedom/src/interface.rs +++ b/treedom/src/interface.rs @@ -1,4 +1,5 @@ -use crate::atomic::{make_atomic_tendril, AtomicTendril, OnceLock}; +use crate::atomic::{make_atomic_tendril, AtomicTendril}; +use std::cell::OnceCell; use tendril::StrTendril; /// The root of a document @@ -133,16 +134,16 @@ impl From for AttributeKey { #[derive(Clone)] pub struct ElementAttributes { list: Vec<(AttributeKey, AtomicTendril)>, - id: OnceLock>, - class: OnceLock>, + id: OnceCell>, + class: OnceCell>, } impl ElementAttributes { pub fn new(list: Vec<(AttributeKey, AtomicTendril)>) -> Self { Self { list, - id: OnceLock::new(), - class: OnceLock::new(), + id: OnceCell::new(), + class: OnceCell::new(), } } From 5388bc4f5d4d1a12b0cb8217250e6e37f0b5b30c Mon Sep 17 00:00:00 2001 From: Anders Kaseorg Date: Sun, 12 Apr 2026 16:27:13 -0700 Subject: [PATCH 7/8] Remove unnecessary synchronization from PySelect This is unnecessary because PySelect is not Send or Sync. Signed-off-by: Anders Kaseorg --- src/select.rs | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/src/select.rs b/src/select.rs index 718186d..a0dc27f 100644 --- a/src/select.rs +++ b/src/select.rs @@ -1,5 +1,3 @@ -use std::sync::Arc; - struct PySelectInner { traverse: crate::iter::PyTraverse, expr: ::matching::ExpressionGroup, @@ -58,9 +56,9 @@ impl Iterator for PySelectInner { } } -#[pyo3::pyclass(name = "Select", module = "markupever._rustlib", frozen, unsendable)] +#[pyo3::pyclass(name = "Select", module = "markupever._rustlib", unsendable)] pub struct PySelect { - inner: Arc>, + inner: PySelectInner, } #[pyo3::pymethods] @@ -70,9 +68,7 @@ impl PySelect { let node = node.as_node_guard().clone(); Ok(Self { - inner: Arc::new(parking_lot::Mutex::new(PySelectInner::new( - node, expression, - )?)), + inner: PySelectInner::new(node, expression)?, }) } @@ -80,10 +76,9 @@ impl PySelect { self_ } - pub fn __next__(&self) -> pyo3::PyResult { - let mut lock = self.inner.lock(); - - lock.next() + pub fn __next__(&mut self) -> pyo3::PyResult { + self.inner + .next() .ok_or_else(|| pyo3::PyErr::new::(())) } } From adb886ddc69b05d4d8f93261fa6b8c1c123cd42c Mon Sep 17 00:00:00 2001 From: Anders Kaseorg Date: Mon, 13 Apr 2026 14:58:32 -0700 Subject: [PATCH 8/8] Forbid unsafe code The documentation says this library avoids the use of unsafe code blocks; ensure we keep that promise. Signed-off-by: Anders Kaseorg --- src/lib.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index aff83d1..b0b41c5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,5 @@ +#![forbid(unsafe_code)] + use pyo3::prelude::*; extern crate matching;