diff --git a/matching/src/_impl.rs b/matching/src/_impl.rs index f930b8c..d2d68cb 100644 --- a/matching/src/_impl.rs +++ b/matching/src/_impl.rs @@ -47,8 +47,6 @@ impl AsRef for CssTendril { } } -unsafe impl Sync for CssTendril {} - /// A [`treedom::markup5ever::LocalName`] which implemented [`cssparser::ToCss`] #[derive(Clone, PartialEq, Eq)] pub struct CssLocalName(pub treedom::markup5ever::LocalName); diff --git a/matching/src/selectable.rs b/matching/src/selectable.rs index 34b73e9..22ad947 100644 --- a/matching/src/selectable.rs +++ b/matching/src/selectable.rs @@ -15,7 +15,7 @@ impl<'a> CssNodeRef<'a> { /// # Safety /// `node` value must be element - pub unsafe fn new_unchecked(node: treedom::NodeRef<'a>) -> Self { + pub fn new_unchecked(node: treedom::NodeRef<'a>) -> Self { Self(node) } diff --git a/src/lib.rs b/src/lib.rs index aff83d1..b0b41c5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,5 @@ +#![forbid(unsafe_code)] + use pyo3::prelude::*; extern crate matching; diff --git a/src/parser.rs b/src/parser.rs index 2f7fd9d..1efbf57 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -238,7 +238,7 @@ enum Input { /// /// This is very easy to use and allows you to stream input using `.process()` method; By this way /// you are don't worry about memory usages of huge inputs. -#[pyo3::pyclass(name = "Parser", module = "markupever._rustlib", frozen)] +#[pyo3::pyclass(name = "Parser", module = "markupever._rustlib", frozen, unsendable)] pub struct PyParser { state: parking_lot::Mutex, } @@ -405,9 +405,6 @@ impl PyParser { } } -unsafe impl Send for PyParser {} -unsafe impl Sync for PyParser {} - #[pyo3::pyfunction] #[pyo3(signature=(node, indent=4, include_self=true, is_html=None))] pub fn serialize( diff --git a/src/select.rs b/src/select.rs index 07ee781..a0dc27f 100644 --- a/src/select.rs +++ b/src/select.rs @@ -1,5 +1,3 @@ -use std::sync::Arc; - struct PySelectInner { traverse: crate::iter::PyTraverse, expr: ::matching::ExpressionGroup, @@ -45,7 +43,7 @@ impl Iterator for PySelectInner { let node = tree.get(guard.id).unwrap(); if self.expr.matches( - unsafe { ::matching::CssNodeRef::new_unchecked(node) }, + ::matching::CssNodeRef::new_unchecked(node), None, &mut Default::default(), ) { @@ -58,9 +56,9 @@ impl Iterator for PySelectInner { } } -#[pyo3::pyclass(name = "Select", module = "markupever._rustlib", frozen)] +#[pyo3::pyclass(name = "Select", module = "markupever._rustlib", unsendable)] pub struct PySelect { - inner: Arc>, + inner: PySelectInner, } #[pyo3::pymethods] @@ -70,9 +68,7 @@ impl PySelect { let node = node.as_node_guard().clone(); Ok(Self { - inner: Arc::new(parking_lot::Mutex::new(PySelectInner::new( - node, expression, - )?)), + inner: PySelectInner::new(node, expression)?, }) } @@ -80,10 +76,9 @@ impl PySelect { self_ } - pub fn __next__(&self) -> pyo3::PyResult { - let mut lock = self.inner.lock(); - - lock.next() + pub fn __next__(&mut self) -> pyo3::PyResult { + self.inner + .next() .ok_or_else(|| pyo3::PyErr::new::(())) } } diff --git a/treedom/src/atomic.rs b/treedom/src/atomic.rs index d4018f1..a891cac 100644 --- a/treedom/src/atomic.rs +++ b/treedom/src/atomic.rs @@ -1,95 +1,3 @@ -/// A synchronization primitive which can nominally be written to only once. -/// -/// Uses [`parking_lot::Once`] instead of [`std::sync::Once`]: -/// - Lower memory usage. -/// - Not required to be 'static. -/// - Relaxed memory barriers in the fast path, which can significantly improve performance on some architectures. -/// - Efficient handling of micro-contention using adaptive spinning. -pub struct OnceLock { - once: ::parking_lot::Once, - value: ::std::cell::UnsafeCell<::std::mem::MaybeUninit>, - phantom: ::std::marker::PhantomData, -} - -impl OnceLock { - pub const fn new() -> OnceLock { - OnceLock { - once: ::parking_lot::Once::new(), - value: ::std::cell::UnsafeCell::new(::std::mem::MaybeUninit::uninit()), - phantom: ::std::marker::PhantomData, - } - } - - pub fn get(&self) -> Option<&T> { - if self.once.state().done() { - Some(unsafe { (*self.value.get()).assume_init_ref() }) - } else { - None - } - } - - #[cold] - fn initialize(&self, f: F) - where - F: FnOnce() -> T, - { - let slot = &self.value; - - self.once.call_once_force(|_| { - unsafe { (*slot.get()).write(f()) }; - }); - } - - pub fn get_or_init(&self, f: F) -> &T - where - F: FnOnce() -> T, - { - if let Some(value) = self.get() { - return value; - } - - self.initialize(f); - - debug_assert!(self.once.state().done()); - - // SAFETY: The inner value has been initialized - unsafe { (*self.value.get()).assume_init_ref() } - } - - pub fn take(&mut self) -> Option { - if self.once.state().done() { - self.once = ::parking_lot::Once::new(); - // SAFETY: `self.value` is initialized and contains a valid `T`. - // `self.once` is reset, so `is_initialized()` will be false again - // which prevents the value from being read twice. - unsafe { Some((*self.value.get()).assume_init_read()) } - } else { - None - } - } -} - -impl std::fmt::Debug for OnceLock { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self.get() { - Some(val) => write!(f, "OnceLock<{:?}>", val), - None => write!(f, "OnceLock"), - } - } -} - -impl Clone for OnceLock { - fn clone(&self) -> Self { - Self::new() - } -} - -impl Default for OnceLock { - fn default() -> Self { - Self::new() - } -} - pub type AtomicTendril = tendril::Tendril; /// Makes a [`AtomicTendril`] from a non-atomic tendril diff --git a/treedom/src/dom.rs b/treedom/src/dom.rs index 3e31f70..fff86d1 100644 --- a/treedom/src/dom.rs +++ b/treedom/src/dom.rs @@ -257,7 +257,7 @@ impl markup5ever::serialize::Serialize for Serializer<'_> { where S: markup5ever::serialize::Serializer, { - let mut traverse = unsafe { self.dom.tree.get_unchecked(self.id).traverse() }; + let mut traverse = self.dom.tree.get(self.id).unwrap().traverse(); if let markup5ever::serialize::TraversalScope::ChildrenOnly(_) = traversal_scope { traverse.next(); diff --git a/treedom/src/interface.rs b/treedom/src/interface.rs index ec99136..bf0630d 100644 --- a/treedom/src/interface.rs +++ b/treedom/src/interface.rs @@ -1,4 +1,5 @@ -use crate::atomic::{make_atomic_tendril, AtomicTendril, OnceLock}; +use crate::atomic::{make_atomic_tendril, AtomicTendril}; +use std::cell::OnceCell; use tendril::StrTendril; /// The root of a document @@ -133,16 +134,16 @@ impl From for AttributeKey { #[derive(Clone)] pub struct ElementAttributes { list: Vec<(AttributeKey, AtomicTendril)>, - id: OnceLock>, - class: OnceLock>, + id: OnceCell>, + class: OnceCell>, } impl ElementAttributes { pub fn new(list: Vec<(AttributeKey, AtomicTendril)>) -> Self { Self { list, - id: OnceLock::new(), - class: OnceLock::new(), + id: OnceCell::new(), + class: OnceCell::new(), } } diff --git a/treedom/src/parser.rs b/treedom/src/parser.rs index 2984bc5..698baab 100644 --- a/treedom/src/parser.rs +++ b/treedom/src/parser.rs @@ -1,16 +1,16 @@ use super::dom::IDTreeDOM; use super::interface; use hashbrown::HashMap; -use std::cell::{Cell, UnsafeCell}; +use std::cell::{Cell, Ref, RefCell}; /// Markup parser that implemented [`markup5ever::interface::TreeSink`] #[derive(Debug)] pub struct ParserSink { - tree: UnsafeCell>, - errors: UnsafeCell>>, + tree: RefCell>, + errors: RefCell>>, quirks_mode: Cell, - namespaces: UnsafeCell>, - lineno: UnsafeCell, + namespaces: RefCell>, + lineno: Cell, } impl Default for ParserSink { @@ -23,25 +23,19 @@ impl ParserSink { /// Creates a new [`ParserSink`] pub fn new() -> Self { Self { - tree: UnsafeCell::new(ego_tree::Tree::new(interface::Interface::new( + tree: RefCell::new(ego_tree::Tree::new(interface::Interface::new( interface::DocumentInterface, ))), - errors: UnsafeCell::new(Vec::new()), + errors: RefCell::new(Vec::new()), quirks_mode: Cell::new(markup5ever::interface::QuirksMode::NoQuirks), - namespaces: UnsafeCell::new(HashMap::new()), - lineno: UnsafeCell::new(1), + namespaces: RefCell::new(HashMap::new()), + lineno: Cell::new(1), } } - #[allow(clippy::mut_from_ref)] - fn tree_mut(&self) -> &mut ego_tree::Tree { - // SAFETY: Parser is not Send/Sync so cannot be used in multi threads. - unsafe { &mut *self.tree.get() } - } - /// Returns the errors which are detected while parsing - pub fn errors(&self) -> &Vec> { - unsafe { &*self.errors.get() } + pub fn errors(&'_ self) -> Ref<'_, Vec>> { + self.errors.borrow() } /// Returns the Quirks Mode @@ -51,7 +45,7 @@ impl ParserSink { /// Returns the line count of the parsed content (does nothing for XML) pub fn lineno(&self) -> u64 { - unsafe { *self.lineno.get() } + self.lineno.get() } /// Consumes the self and returns [`IDTreeDOM`] @@ -108,7 +102,7 @@ impl ParserSink { impl markup5ever::interface::TreeSink for ParserSink { type Output = Self; type Handle = ego_tree::NodeId; - type ElemName<'a> = markup5ever::ExpandedName<'a>; + type ElemName<'a> = Ref<'a, markup5ever::QualName>; // Consume this sink and return the overall result of parsing. fn finish(self) -> Self::Output { @@ -117,14 +111,12 @@ impl markup5ever::interface::TreeSink for ParserSink { // Signal a parse error. fn parse_error(&self, msg: std::borrow::Cow<'static, str>) { - unsafe { &mut *self.errors.get() }.push(msg); + self.errors.borrow_mut().push(msg); } // Called whenever the line number changes. fn set_current_line(&self, n: u64) { - unsafe { - *self.lineno.get() = n; - } + self.lineno.set(n); } // Set the document's quirks mode. @@ -134,13 +126,14 @@ impl markup5ever::interface::TreeSink for ParserSink { // Get a handle to the `Document` node. fn get_document(&self) -> Self::Handle { - self.tree_mut().root().id() + self.tree.borrow().root().id() } // Get a handle to a template's template contents. // The tree builder promises this will never be called with something else than a template element. fn get_template_contents(&self, target: &Self::Handle) -> Self::Handle { - let item = self.tree_mut().get(*target).unwrap(); + let tree = self.tree.borrow(); + let item = tree.get(*target).unwrap(); if let Some(x) = item.value().element() { if x.template { @@ -162,13 +155,15 @@ impl markup5ever::interface::TreeSink for ParserSink { // // Should never be called on a non-element node; feel free to panic!. fn elem_name<'a>(&'a self, target: &'a Self::Handle) -> Self::ElemName<'a> { - let item = self.tree_mut().get(*target).unwrap(); + Ref::map(self.tree.borrow(), |tree| { + let item = tree.get(*target).unwrap(); - if let Some(x) = item.value().element() { - x.name.expanded() - } else { - unreachable!("target is not a element"); - } + if let Some(x) = item.value().element() { + &x.name + } else { + unreachable!("target is not a element"); + } + }) } // Create an element. @@ -184,12 +179,11 @@ impl markup5ever::interface::TreeSink for ParserSink { flags: markup5ever::interface::ElementFlags, ) -> Self::Handle { // Keep all the namespaces in a hashmap, we need them for css selectors - unsafe { - if let Some(ref prefix) = name.prefix { - (*self.namespaces.get()).insert(prefix.clone(), name.ns.clone()); - } else if (*self.namespaces.get()).is_empty() { - (*self.namespaces.get()).insert(markup5ever::Prefix::from(""), name.ns.clone()); - } + let mut namespaces = self.namespaces.borrow_mut(); + if let Some(ref prefix) = name.prefix { + namespaces.insert(prefix.clone(), name.ns.clone()); + } else if namespaces.is_empty() { + namespaces.insert(markup5ever::Prefix::from(""), name.ns.clone()); } let element = interface::ElementInterface::from_non_atomic( @@ -199,13 +193,15 @@ impl markup5ever::interface::TreeSink for ParserSink { flags.mathml_annotation_xml_integration_point, ); - let node = self.tree_mut().orphan(interface::Interface::new(element)); + let mut tree = self.tree.borrow_mut(); + let node = tree.orphan(interface::Interface::new(element)); node.id() } // Create a comment node. fn create_comment(&self, text: tendril::StrTendril) -> Self::Handle { - let node = self.tree_mut().orphan(interface::Interface::new( + let mut tree = self.tree.borrow_mut(); + let node = tree.orphan(interface::Interface::new( interface::CommentInterface::from_non_atomic(text), )); @@ -214,7 +210,8 @@ impl markup5ever::interface::TreeSink for ParserSink { // Create a Processing Instruction node. fn create_pi(&self, target: tendril::StrTendril, data: tendril::StrTendril) -> Self::Handle { - let node = self.tree_mut().orphan(interface::Interface::new( + let mut tree = self.tree.borrow_mut(); + let node = tree.orphan(interface::Interface::new( interface::ProcessingInstructionInterface::from_non_atomic(data, target), )); @@ -228,9 +225,12 @@ impl markup5ever::interface::TreeSink for ParserSink { public_id: tendril::StrTendril, system_id: tendril::StrTendril, ) { - self.tree_mut().root_mut().append(interface::Interface::new( - interface::DoctypeInterface::from_non_atomic(name, public_id, system_id), - )); + self.tree + .borrow_mut() + .root_mut() + .append(interface::Interface::new( + interface::DoctypeInterface::from_non_atomic(name, public_id, system_id), + )); } // Append a node as the last child of the given node. If this would produce adjacent sibling text nodes, it should concatenate the text instead. @@ -241,7 +241,8 @@ impl markup5ever::interface::TreeSink for ParserSink { parent: &Self::Handle, child: markup5ever::interface::NodeOrText, ) { - let mut parent = self.tree_mut().get_mut(*parent).unwrap(); + let mut tree = self.tree.borrow_mut(); + let mut parent = tree.get_mut(*parent).unwrap(); match child { markup5ever::interface::NodeOrText::AppendNode(handle) => { @@ -272,7 +273,8 @@ impl markup5ever::interface::TreeSink for ParserSink { sibling_id: &Self::Handle, new_node_id: markup5ever::interface::NodeOrText, ) { - let mut sibling = self.tree_mut().get_mut(*sibling_id).unwrap(); + let mut tree = self.tree.borrow_mut(); + let mut sibling = tree.get_mut(*sibling_id).unwrap(); match (new_node_id, sibling.prev_sibling()) { (markup5ever::interface::NodeOrText::AppendText(text), None) => { @@ -306,7 +308,8 @@ impl markup5ever::interface::TreeSink for ParserSink { prev_item_index: &Self::Handle, child: markup5ever::interface::NodeOrText, ) { - let item = self.tree_mut().get(*item_index).unwrap(); + let tree = self.tree.borrow_mut(); + let item = tree.get(*item_index).unwrap(); if item.parent().is_some() { self.append_before_sibling(item_index, child); @@ -318,7 +321,8 @@ impl markup5ever::interface::TreeSink for ParserSink { // Add each attribute to the given element, if no attribute with that name already exists. // The tree builder promises this will never be called with something else than an element. fn add_attrs_if_missing(&self, target: &Self::Handle, attrs: Vec) { - let mut node = self.tree_mut().get_mut(*target).unwrap(); + let mut tree = self.tree.borrow_mut(); + let mut node = tree.get_mut(*target).unwrap(); if let Some(element) = node.value().element_mut() { element.attrs.extend( @@ -333,12 +337,13 @@ impl markup5ever::interface::TreeSink for ParserSink { // Detach the given node from its parent. fn remove_from_parent(&self, target: &Self::Handle) { - self.tree_mut().get_mut(*target).unwrap().detach(); + self.tree.borrow_mut().get_mut(*target).unwrap().detach(); } // Remove all the children from node and append them to new_parent. fn reparent_children(&self, node: &Self::Handle, new_parent: &Self::Handle) { - self.tree_mut() + self.tree + .borrow_mut() .get_mut(*new_parent) .unwrap() .reparent_from_id_append(*node); @@ -346,7 +351,8 @@ impl markup5ever::interface::TreeSink for ParserSink { // Returns true if the adjusted current node is an HTML integration point and the token is a start tag. fn is_mathml_annotation_xml_integration_point(&self, target: &Self::Handle) -> bool { - let item = self.tree_mut().get(*target).unwrap(); + let tree = self.tree.borrow_mut(); + let item = tree.get(*target).unwrap(); if let Some(x) = item.value().element() { x.mathml_annotation_xml_integration_point