From e56eaa95ecb89bfad351d2941ab8b60429df9dfe Mon Sep 17 00:00:00 2001 From: Anders Kaseorg Date: Fri, 6 Mar 2026 17:11:13 -0800 Subject: [PATCH 1/8] Let PyO3 type-check parse input This efficiently avoids copying the input where possible, while being simpler and safer. Signed-off-by: Anders Kaseorg --- src/parser.rs | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index e6ff4e2..72a73a1 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -192,12 +192,12 @@ impl ParserState { ))) } - fn process(&mut self, content: Vec) -> pyo3::PyResult<()> { + fn process(&mut self, content: &[u8]) -> pyo3::PyResult<()> { use treedom::tendril::TendrilSink; match self { - Self::OnHtml(x) => x.process(treedom::tendril::ByteTendril::from_slice(&content)), - Self::OnXml(x) => x.process(treedom::tendril::ByteTendril::from_slice(&content)), + Self::OnHtml(x) => x.process(treedom::tendril::ByteTendril::from_slice(content)), + Self::OnXml(x) => x.process(treedom::tendril::ByteTendril::from_slice(content)), _ => { return Err(pyo3::PyErr::new::( "The parser is completed parsing", @@ -230,6 +230,12 @@ impl std::fmt::Debug for ParserState { } } +#[derive(pyo3::FromPyObject)] +enum Input { + Bytes(pyo3::pybacked::PyBackedBytes), + Str(pyo3::pybacked::PyBackedStr), +} + /// An HTML/XML parser, ready to receive unicode input. /// /// This is very easy to use and allows you to stream input using `.process()` method; By this way @@ -294,18 +300,10 @@ impl PyParser { /// `content` must be `str` or `bytes`. /// /// Raises `RuntimeError` if `.finish()` method is called. - fn process(&self, content: &pyo3::Bound<'_, pyo3::PyAny>) -> pyo3::PyResult<()> { - let content = if let Ok(b) = content.extract::>() { - b - } else if let Ok(s) = content.extract::() { - s.into_bytes() - } else { - return Err(pyo3::PyErr::new::( - format!( - "expected bytes or str for content, got {}", - crate::tools::get_type_name(content) - ), - )); + fn process(&self, content: Input) -> pyo3::PyResult<()> { + let content = match &content { + Input::Bytes(b) => b, + Input::Str(s) => s.as_bytes(), }; let mut state = self.state.lock(); From affb8240bb1db8658f73de59ee441cc73bb2b6db Mon Sep 17 00:00:00 2001 From: Anders Kaseorg Date: Fri, 6 Mar 2026 17:31:31 -0800 Subject: [PATCH 2/8] Let PyO3 type-check QualName | str Signed-off-by: Anders Kaseorg --- src/nodes.rs | 85 ++++++++----------------------------------------- src/qualname.rs | 16 +++++----- src/tools.rs | 38 ++++++---------------- 3 files changed, 31 insertions(+), 108 deletions(-) diff --git a/src/nodes.rs b/src/nodes.rs index 2820f71..fc81aa6 100644 --- a/src/nodes.rs +++ b/src/nodes.rs @@ -797,17 +797,10 @@ impl PyAttrsList { &self, py: pyo3::Python<'_>, index: usize, - key: pyo3::Py, + key: crate::tools::PyQualNameOrStr, value: pyo3::Py, ) -> pyo3::PyResult<()> { - let key = crate::tools::qualname_from_pyobject(py, &key) - .into_qualname() - .map_err(|_| { - pyo3::PyErr::new::(format!( - "expected QualName or str for key, got {}", - crate::tools::get_type_name(key.bind(py)) - )) - })?; + let key = key.into_qualname(); let Ok(val) = value.bind(py).extract::() else { return Err(pyo3::PyErr::new::( @@ -836,17 +829,10 @@ impl PyAttrsList { fn push( &self, py: pyo3::Python<'_>, - key: pyo3::Py, + key: crate::tools::PyQualNameOrStr, value: pyo3::Py, ) -> pyo3::PyResult<()> { - let key = crate::tools::qualname_from_pyobject(py, &key) - .into_qualname() - .map_err(|_| { - pyo3::PyErr::new::(format!( - "expected QualName or str for key, got {}", - crate::tools::get_type_name(key.bind(py)) - )) - })?; + let key = key.into_qualname(); let Ok(val) = value.bind(py).extract::() else { return Err(pyo3::PyErr::new::( @@ -877,17 +863,10 @@ impl PyAttrsList { &self, py: pyo3::Python<'_>, index: usize, - key: pyo3::Py, + key: crate::tools::PyQualNameOrStr, value: pyo3::Py, ) -> pyo3::PyResult<()> { - let key = crate::tools::qualname_from_pyobject(py, &key) - .into_qualname() - .map_err(|_| { - pyo3::PyErr::new::(format!( - "expected QualName or str for key, got {}", - crate::tools::get_type_name(key.bind(py)) - )) - })?; + let key = key.into_qualname(); let Ok(val) = value.bind(py).extract::() else { return Err(pyo3::PyErr::new::( @@ -1057,8 +1036,8 @@ impl PyElement { #[new] fn new( treedom: &pyo3::Bound<'_, pyo3::PyAny>, - name: pyo3::Py, - attrs: Vec<(pyo3::Py, pyo3::Py)>, + name: crate::tools::PyQualNameOrStr, + attrs: Vec<(crate::tools::PyQualNameOrStr, pyo3::Py)>, template: bool, mathml_annotation_xml_integration_point: bool, ) -> pyo3::PyResult { @@ -1071,30 +1050,12 @@ impl PyElement { )) })?; - let name = crate::tools::qualname_from_pyobject(treedom.py(), &name) - .into_qualname() - .map_err(|_| { - pyo3::PyErr::new::(format!( - "expected QualName or str for name, got {}", - crate::tools::get_type_name(name.bind(treedom.py())) - )) - })?; + let name = name.into_qualname(); let mut attributes = Vec::with_capacity(attrs.len()); for (key, val) in attrs.into_iter() { - let key = match crate::tools::qualname_from_pyobject(treedom.py(), &key).into_qualname() - { - Ok(x) => x, - Err(_) => { - return Err(pyo3::PyErr::new::( - format!( - "expected QualName or str for attrs #1, got {}", - crate::tools::get_type_name(key.bind(treedom.py())) - ), - )) - } - }; + let key = key.into_qualname(); let Ok(val) = val.bind(treedom.py()).extract::() else { return Err(pyo3::PyErr::new::( @@ -1132,21 +1093,13 @@ impl PyElement { } #[setter] - fn set_name(&self, name: &pyo3::Bound<'_, pyo3::PyAny>) -> pyo3::PyResult<()> { + fn set_name(&self, name: crate::tools::PyQualNameOrStr) { let mut tree = self.0.tree.lock(); let mut node = tree.get_mut(self.0.id).unwrap(); - let name = crate::tools::qualname_from_pyobject(name.py(), name.as_unbound()) - .into_qualname() - .map_err(|_| { - pyo3::PyErr::new::(format!( - "expected QualName or str for name, got {}", - crate::tools::get_type_name(name) - )) - })?; + let name = name.into_qualname(); node.value().element_mut().unwrap().name = name; - Ok(()) } #[getter] @@ -1158,7 +1111,7 @@ impl PyElement { fn set_attrs( &self, py: pyo3::Python<'_>, - attrs: Vec<(pyo3::Py, pyo3::Py)>, + attrs: Vec<(crate::tools::PyQualNameOrStr, pyo3::Py)>, ) -> pyo3::PyResult<()> { let mut tree = self.0.tree.lock(); let mut node = tree.get_mut(self.0.id).unwrap(); @@ -1166,17 +1119,7 @@ impl PyElement { let mut attributes = Vec::with_capacity(attrs.len()); for (key, val) in attrs.into_iter() { - let key = match crate::tools::qualname_from_pyobject(py, &key).into_qualname() { - Ok(x) => x, - Err(_) => { - return Err(pyo3::PyErr::new::( - format!( - "expected QualName or str for attrs #1, got {}", - crate::tools::get_type_name(key.bind(py)) - ), - )) - } - }; + let key = key.into_qualname(); let Ok(val) = val.bind(py).extract::() else { return Err(pyo3::PyErr::new::( diff --git a/src/qualname.rs b/src/qualname.rs index bd84b19..a7be3c3 100644 --- a/src/qualname.rs +++ b/src/qualname.rs @@ -149,21 +149,21 @@ impl PyQualName { match cmp { pyo3::basic::CompareOp::Eq => { - match crate::tools::qualname_from_pyobject(self_.py(), &other) { - crate::tools::QualNameFromPyObjectResult::QualName(x) => { + match other.extract::(self_.py()) { + Ok(crate::tools::PyQualNameOrStr::QualName(x)) => { Ok(x.name == self_.get().name) } - crate::tools::QualNameFromPyObjectResult::Str(x) => Ok(self_.get().name.local == x), - crate::tools::QualNameFromPyObjectResult::Err(_) => Ok(false), + Ok(crate::tools::PyQualNameOrStr::Str(x)) => Ok(*self_.get().name.local == x), + Err(_) => Ok(false), } } pyo3::basic::CompareOp::Ne => { - match crate::tools::qualname_from_pyobject(self_.py(), &other) { - crate::tools::QualNameFromPyObjectResult::QualName(x) => { + match other.extract::(self_.py()) { + Ok(crate::tools::PyQualNameOrStr::QualName(x)) => { Ok(x.name != self_.get().name) } - crate::tools::QualNameFromPyObjectResult::Str(x) => Ok(self_.get().name.local != x), - crate::tools::QualNameFromPyObjectResult::Err(_) => Ok(true), + Ok(crate::tools::PyQualNameOrStr::Str(x)) => Ok(*self_.get().name.local != x), + Err(_) => Ok(true), } } pyo3::basic::CompareOp::Gt => { diff --git a/src/tools.rs b/src/tools.rs index b9b2b7b..b92b94f 100644 --- a/src/tools.rs +++ b/src/tools.rs @@ -8,41 +8,21 @@ pub fn get_type_name(obj: &pyo3::Bound) -> String { type_.name().unwrap().to_str().unwrap().into() } -pub enum QualNameFromPyObjectResult<'p> { +#[derive(pyo3::FromPyObject)] +pub enum PyQualNameOrStr<'p> { QualName(pyo3::PyRef<'p, crate::qualname::PyQualName>), - Str(String), - Err(pyo3::PyErr), + Str(pyo3::pybacked::PyBackedStr), } -impl QualNameFromPyObjectResult<'_> { - pub fn into_qualname(self) -> pyo3::PyResult { +impl PyQualNameOrStr<'_> { + pub fn into_qualname(self) -> treedom::markup5ever::QualName { match self { - Self::QualName(q) => Ok(q.name.clone()), - Self::Str(s) => Ok(treedom::markup5ever::QualName::new( + Self::QualName(q) => q.name.clone(), + Self::Str(s) => treedom::markup5ever::QualName::new( None, treedom::markup5ever::namespace_url!(""), - s.into(), - )), - Self::Err(e) => Err(e), - } - } -} - -pub fn qualname_from_pyobject<'a>( - py: pyo3::Python<'a>, - object: &pyo3::Py, -) -> QualNameFromPyObjectResult<'a> { - use pyo3::types::PyAnyMethods; - - if let Ok(x) = object.bind(py).extract::() { - QualNameFromPyObjectResult::Str(x) - } else { - match object - .bind(py) - .extract::>() - { - Ok(x) => QualNameFromPyObjectResult::QualName(x), - Err(e) => QualNameFromPyObjectResult::Err(e.into()), + (*s).into(), + ), } } } From 9389e4f8ecfddfed32776f51da4e87a351ab02fc Mon Sep 17 00:00:00 2001 From: Anders Kaseorg Date: Fri, 6 Mar 2026 17:44:28 -0800 Subject: [PATCH 3/8] Let PyO3 type-check str Signed-off-by: Anders Kaseorg --- src/nodes.rs | 96 +++++++--------------------------------------------- 1 file changed, 13 insertions(+), 83 deletions(-) diff --git a/src/nodes.rs b/src/nodes.rs index fc81aa6..0a215e1 100644 --- a/src/nodes.rs +++ b/src/nodes.rs @@ -795,22 +795,12 @@ impl PyAttrsList { fn insert( &self, - py: pyo3::Python<'_>, index: usize, key: crate::tools::PyQualNameOrStr, - value: pyo3::Py, + value: &str, ) -> pyo3::PyResult<()> { let key = key.into_qualname(); - let Ok(val) = value.bind(py).extract::() else { - return Err(pyo3::PyErr::new::( - format!( - "expected str for value, got {}", - crate::tools::get_type_name(value.bind(py)) - ), - )); - }; - let mut tree = self.0.tree.lock(); let mut node = tree.get_mut(self.0.id).unwrap(); let elem = node.value().element_mut().unwrap(); @@ -821,35 +811,19 @@ impl PyAttrsList { )); } - elem.attrs.insert(index, (key.into(), val.into())); + elem.attrs.insert(index, (key.into(), value.into())); Ok(()) } - fn push( - &self, - py: pyo3::Python<'_>, - key: crate::tools::PyQualNameOrStr, - value: pyo3::Py, - ) -> pyo3::PyResult<()> { + fn push(&self, key: crate::tools::PyQualNameOrStr, value: &str) { let key = key.into_qualname(); - let Ok(val) = value.bind(py).extract::() else { - return Err(pyo3::PyErr::new::( - format!( - "expected str for value, got {}", - crate::tools::get_type_name(value.bind(py)) - ), - )); - }; - let mut tree = self.0.tree.lock(); let mut node = tree.get_mut(self.0.id).unwrap(); let elem = node.value().element_mut().unwrap(); - elem.attrs.push((key.into(), val.into())); - - Ok(()) + elem.attrs.push((key.into(), value.into())); } fn items(self_: pyo3::PyRef<'_, Self>) -> PyAttrsListItems { @@ -861,22 +835,12 @@ impl PyAttrsList { fn update_item( &self, - py: pyo3::Python<'_>, index: usize, key: crate::tools::PyQualNameOrStr, - value: pyo3::Py, + value: &str, ) -> pyo3::PyResult<()> { let key = key.into_qualname(); - let Ok(val) = value.bind(py).extract::() else { - return Err(pyo3::PyErr::new::( - format!( - "expected str for value, got {}", - crate::tools::get_type_name(value.bind(py)) - ), - )); - }; - let mut tree = self.0.tree.lock(); let mut node = tree.get_mut(self.0.id).unwrap(); let elem = node.value().element_mut().unwrap(); @@ -884,7 +848,7 @@ impl PyAttrsList { match elem.attrs.get_mut(index) { Some(x) => { x.0 = key.into(); - x.1 = val.into(); + x.1 = value.into(); Ok(()) } None => Err(pyo3::PyErr::new::( @@ -893,22 +857,9 @@ impl PyAttrsList { } } - fn update_value( - self_: pyo3::PyRef<'_, Self>, - index: usize, - value: pyo3::Py, - ) -> pyo3::PyResult<()> { - let Ok(value) = value.bind(self_.py()).extract::() else { - return Err(pyo3::PyErr::new::( - format!( - "expected str for value, got {}", - crate::tools::get_type_name(value.bind(self_.py())) - ), - )); - }; - - let mut tree = self_.0.tree.lock(); - let mut node = tree.get_mut(self_.0.id).unwrap(); + fn update_value(&self, index: usize, value: &str) -> pyo3::PyResult<()> { + let mut tree = self.0.tree.lock(); + let mut node = tree.get_mut(self.0.id).unwrap(); let elem = node.value().element_mut().unwrap(); match elem.attrs.get_mut(index) { @@ -1037,7 +988,7 @@ impl PyElement { fn new( treedom: &pyo3::Bound<'_, pyo3::PyAny>, name: crate::tools::PyQualNameOrStr, - attrs: Vec<(crate::tools::PyQualNameOrStr, pyo3::Py)>, + attrs: Vec<(crate::tools::PyQualNameOrStr, pyo3::pybacked::PyBackedStr)>, template: bool, mathml_annotation_xml_integration_point: bool, ) -> pyo3::PyResult { @@ -1056,17 +1007,7 @@ impl PyElement { for (key, val) in attrs.into_iter() { let key = key.into_qualname(); - - let Ok(val) = val.bind(treedom.py()).extract::() else { - return Err(pyo3::PyErr::new::( - format!( - "expected str for attrs #2, got {}", - crate::tools::get_type_name(val.bind(treedom.py())) - ), - )); - }; - - attributes.push((key, treedom::atomic::AtomicTendril::from(val))); + attributes.push((key, treedom::atomic::AtomicTendril::from(&*val))); } let val = ::treedom::interface::ElementInterface::new( @@ -1110,8 +1051,7 @@ impl PyElement { #[setter] fn set_attrs( &self, - py: pyo3::Python<'_>, - attrs: Vec<(crate::tools::PyQualNameOrStr, pyo3::Py)>, + attrs: Vec<(crate::tools::PyQualNameOrStr, pyo3::pybacked::PyBackedStr)>, ) -> pyo3::PyResult<()> { let mut tree = self.0.tree.lock(); let mut node = tree.get_mut(self.0.id).unwrap(); @@ -1120,19 +1060,9 @@ impl PyElement { for (key, val) in attrs.into_iter() { let key = key.into_qualname(); - - let Ok(val) = val.bind(py).extract::() else { - return Err(pyo3::PyErr::new::( - format!( - "expected str for attrs #2, got {}", - crate::tools::get_type_name(val.bind(py)) - ), - )); - }; - attributes.push(( treedom::interface::AttributeKey::from(key), - treedom::atomic::AtomicTendril::from(val), + treedom::atomic::AtomicTendril::from(&*val), )); } From cc5b4dc2d5817c962efc1fa9dc86c211ec674296 Mon Sep 17 00:00:00 2001 From: Anders Kaseorg Date: Fri, 6 Mar 2026 17:55:32 -0800 Subject: [PATCH 4/8] Let PyO3 type-check parse options Signed-off-by: Anders Kaseorg --- src/parser.rs | 72 +++++++++++++++++++++++++-------------------------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 72a73a1..510461e 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,5 +1,3 @@ -use pyo3::types::PyAnyMethods; - /// These are options for HTML parsing. /// /// # Note @@ -245,6 +243,12 @@ pub struct PyParser { state: parking_lot::Mutex, } +#[derive(pyo3::FromPyObject)] +enum PyParserOptions<'p> { + Html(pyo3::PyRef<'p, PyHtmlOptions>), + Xml(pyo3::PyRef<'p, PyXmlOptions>), +} + #[pyo3::pymethods] impl PyParser { /// Creates a new [`PyParser`] @@ -252,41 +256,37 @@ impl PyParser { /// - `options`: If your input is a HTML document, pass a PyHtmlOptions; /// If your input is a XML document, pass PyXmlOptions. #[new] - fn new(options: &pyo3::Bound<'_, pyo3::PyAny>) -> pyo3::PyResult { + fn new(options: PyParserOptions) -> pyo3::PyResult { let state = { - if let Ok(options) = options.extract::>() { - ParserState::as_html(treedom::ParserSink::parse_html( - options.full_document, - treedom::html5ever::tokenizer::TokenizerOpts { - exact_errors: options.exact_errors, - discard_bom: options.discard_bom, - profile: options.profile, - ..Default::default() - }, - treedom::html5ever::tree_builder::TreeBuilderOpts { - exact_errors: options.exact_errors, - iframe_srcdoc: options.iframe_srcdoc, - drop_doctype: options.drop_doctype, - quirks_mode: options.quirks_mode, - ..Default::default() - }, - )) - } else if let Ok(options) = options.extract::>() { - ParserState::as_xml(treedom::ParserSink::parse_xml( - treedom::xml5ever::tokenizer::XmlTokenizerOpts { - exact_errors: options.exact_errors, - discard_bom: options.discard_bom, - profile: options.profile, - ..Default::default() - }, - )) - } else { - return Err(pyo3::PyErr::new::( - format!( - "expected HtmlOptions or XmlOptions for options, got {}", - crate::tools::get_type_name(options) - ), - )); + match options { + PyParserOptions::Html(options) => { + ParserState::as_html(treedom::ParserSink::parse_html( + options.full_document, + treedom::html5ever::tokenizer::TokenizerOpts { + exact_errors: options.exact_errors, + discard_bom: options.discard_bom, + profile: options.profile, + ..Default::default() + }, + treedom::html5ever::tree_builder::TreeBuilderOpts { + exact_errors: options.exact_errors, + iframe_srcdoc: options.iframe_srcdoc, + drop_doctype: options.drop_doctype, + quirks_mode: options.quirks_mode, + ..Default::default() + }, + )) + } + PyParserOptions::Xml(options) => { + ParserState::as_xml(treedom::ParserSink::parse_xml( + treedom::xml5ever::tokenizer::XmlTokenizerOpts { + exact_errors: options.exact_errors, + discard_bom: options.discard_bom, + profile: options.profile, + ..Default::default() + }, + )) + } } }; From 303c98c36c64be82c268670d9ef5394d32146946 Mon Sep 17 00:00:00 2001 From: Anders Kaseorg Date: Fri, 6 Mar 2026 17:57:37 -0800 Subject: [PATCH 5/8] Let PyO3 type-check TreeDom Signed-off-by: Anders Kaseorg --- src/iter/iterator.rs | 16 +++-------- src/nodes.rs | 63 +++++--------------------------------------- 2 files changed, 10 insertions(+), 69 deletions(-) diff --git a/src/iter/iterator.rs b/src/iter/iterator.rs index 2c2680f..d30ee87 100644 --- a/src/iter/iterator.rs +++ b/src/iter/iterator.rs @@ -1,4 +1,3 @@ -use pyo3::types::PyAnyMethods; use std::sync::atomic; use std::sync::Arc; @@ -12,20 +11,11 @@ pub struct PyIterator { #[pyo3::pymethods] impl PyIterator { #[new] - fn new(dom: &pyo3::Bound<'_, pyo3::PyAny>) -> pyo3::PyResult { - let dom = dom - .extract::>() - .map_err(|_| { - pyo3::PyErr::new::(format!( - "expected TreeDom for dom, got {}", - crate::tools::get_type_name(dom) - )) - })?; - - Ok(Self { + fn new(dom: &crate::tree::PyTreeDom) -> Self { + Self { dom: dom.dom.clone(), index: atomic::AtomicUsize::new(0), - }) + } } fn __iter__(self_: pyo3::PyRef<'_, Self>) -> pyo3::PyRef<'_, Self> { diff --git a/src/nodes.rs b/src/nodes.rs index 0a215e1..7f4ea5e 100644 --- a/src/nodes.rs +++ b/src/nodes.rs @@ -275,20 +275,11 @@ pub struct PyDoctype(pub(super) NodeGuard); impl PyDoctype { #[new] fn new( - treedom: &pyo3::Bound<'_, pyo3::PyAny>, + treedom: &super::tree::PyTreeDom, name: String, public_id: String, system_id: String, ) -> pyo3::PyResult { - let treedom = treedom - .extract::>() - .map_err(|_| { - pyo3::PyErr::new::(format!( - "expected TreeDom for treedom, got {}", - crate::tools::get_type_name(treedom) - )) - })?; - let val = ::treedom::interface::DoctypeInterface::new( name.into(), public_id.into(), @@ -437,16 +428,7 @@ pub struct PyComment(pub(super) NodeGuard); #[pyo3::pymethods] impl PyComment { #[new] - fn new(treedom: &pyo3::Bound<'_, pyo3::PyAny>, content: String) -> pyo3::PyResult { - let treedom = treedom - .extract::>() - .map_err(|_| { - pyo3::PyErr::new::(format!( - "expected TreeDom for treedom, got {}", - crate::tools::get_type_name(treedom) - )) - })?; - + fn new(treedom: &super::tree::PyTreeDom, content: String) -> pyo3::PyResult { let val = ::treedom::interface::CommentInterface::new(content.into()); let mut dom = treedom.dom.lock(); @@ -560,16 +542,7 @@ pub struct PyText(pub(super) NodeGuard); #[pyo3::pymethods] impl PyText { #[new] - fn new(treedom: &pyo3::Bound<'_, pyo3::PyAny>, content: String) -> pyo3::PyResult { - let treedom = treedom - .extract::>() - .map_err(|_| { - pyo3::PyErr::new::(format!( - "expected TreeDom for treedom, got {}", - crate::tools::get_type_name(treedom) - )) - })?; - + fn new(treedom: &super::tree::PyTreeDom, content: String) -> pyo3::PyResult { let val = ::treedom::interface::TextInterface::new(content.into()); let mut dom = treedom.dom.lock(); @@ -986,21 +959,12 @@ pub struct PyElement(pub(super) NodeGuard); impl PyElement { #[new] fn new( - treedom: &pyo3::Bound<'_, pyo3::PyAny>, + treedom: &super::tree::PyTreeDom, name: crate::tools::PyQualNameOrStr, attrs: Vec<(crate::tools::PyQualNameOrStr, pyo3::pybacked::PyBackedStr)>, template: bool, mathml_annotation_xml_integration_point: bool, - ) -> pyo3::PyResult { - let treedom = treedom - .extract::>() - .map_err(|_| { - pyo3::PyErr::new::(format!( - "expected TreeDom for treedom, got {}", - crate::tools::get_type_name(treedom) - )) - })?; - + ) -> Self { let name = name.into_qualname(); let mut attributes = Vec::with_capacity(attrs.len()); @@ -1020,7 +984,7 @@ impl PyElement { let mut dom = treedom.dom.lock(); let node = dom.orphan(val.into()); - Ok(Self(NodeGuard::from_nodemut(treedom.dom.clone(), node))) + Self(NodeGuard::from_nodemut(treedom.dom.clone(), node)) } #[getter] @@ -1225,20 +1189,7 @@ pub struct PyProcessingInstruction(pub(super) NodeGuard); #[pyo3::pymethods] impl PyProcessingInstruction { #[new] - fn new( - treedom: &pyo3::Bound, - data: String, - target: String, - ) -> pyo3::PyResult { - let treedom = treedom - .extract::>() - .map_err(|_| { - pyo3::PyErr::new::(format!( - "expected TreeDom for treedom, got {}", - crate::tools::get_type_name(treedom) - )) - })?; - + fn new(treedom: &super::tree::PyTreeDom, data: String, target: String) -> pyo3::PyResult { let val = ::treedom::interface::ProcessingInstructionInterface::new(data.into(), target.into()); From a9aa7a00063e16f3bf73470a0c31b5fd72c8ebf3 Mon Sep 17 00:00:00 2001 From: Anders Kaseorg Date: Fri, 6 Mar 2026 23:37:31 -0800 Subject: [PATCH 6/8] Let PyO3 type-check nodes This also reduces the number of NodeGuard::clone calls. Signed-off-by: Anders Kaseorg --- src/iter/iterator.rs | 18 +--- src/iter/traverse.rs | 19 +--- src/nodes.rs | 42 +++++---- src/parser.rs | 9 +- src/select.rs | 9 +- src/tree.rs | 209 +++++++++++++------------------------------ 6 files changed, 98 insertions(+), 208 deletions(-) diff --git a/src/iter/iterator.rs b/src/iter/iterator.rs index d30ee87..464d902 100644 --- a/src/iter/iterator.rs +++ b/src/iter/iterator.rs @@ -59,13 +59,8 @@ macro_rules! axis_iterators { #[pyo3::pymethods] impl $name { #[new] - fn new(node: &pyo3::Bound<'_, pyo3::PyAny>) -> pyo3::PyResult { - let node = crate::nodes::NodeGuard::from_pyobject(node).map_err(|_| { - pyo3::PyErr::new::(format!( - "expected a node (such as Element, Text, Comment, ...) for node, got {}", - crate::tools::get_type_name(node) - )) - })?; + fn new(node: crate::nodes::PyNodeRef) -> pyo3::PyResult { + let node = node.as_node_guard(); Ok(Self { guard: $f(&node) }) } @@ -112,13 +107,8 @@ pub struct PyChildren { #[pyo3::pymethods] impl PyChildren { #[new] - fn new(node: &pyo3::Bound<'_, pyo3::PyAny>) -> pyo3::PyResult { - let node = crate::nodes::NodeGuard::from_pyobject(node).map_err(|_| { - pyo3::PyErr::new::(format!( - "expected a node (such as Element, Text, Comment, ...) for node, got {}", - crate::tools::get_type_name(node) - )) - })?; + fn new(node: crate::nodes::PyNodeRef) -> pyo3::PyResult { + let node = node.as_node_guard(); let front = node.first_child(); let back = node.last_child(); diff --git a/src/iter/traverse.rs b/src/iter/traverse.rs index 7d58537..4d032b8 100644 --- a/src/iter/traverse.rs +++ b/src/iter/traverse.rs @@ -55,14 +55,8 @@ impl PyTraverse { #[pyo3::pymethods] impl PyTraverse { #[new] - fn new(node: &pyo3::Bound<'_, pyo3::PyAny>) -> pyo3::PyResult { - let node = crate::nodes::NodeGuard::from_pyobject(node).map_err(|_| { - pyo3::PyErr::new::(format!( - "expected a node (such as Element, Text, Comment, ...) for node, got {}", - crate::tools::get_type_name(node) - )) - })?; - + fn new(node: crate::nodes::PyNodeRef) -> pyo3::PyResult { + let node = node.as_node_guard().clone(); Ok(Self::from_nodeguard(node)) } @@ -86,13 +80,8 @@ pub struct PyDescendants(PyTraverse); #[pyo3::pymethods] impl PyDescendants { #[new] - fn new(node: &pyo3::Bound<'_, pyo3::PyAny>) -> pyo3::PyResult { - let node = crate::nodes::NodeGuard::from_pyobject(node).map_err(|_| { - pyo3::PyErr::new::(format!( - "expected a node (such as Element, Text, Comment, ...) for node, got {}", - crate::tools::get_type_name(node) - )) - })?; + fn new(node: crate::nodes::PyNodeRef) -> pyo3::PyResult { + let node = node.as_node_guard().clone(); Ok(Self(PyTraverse { root: Some(node), diff --git a/src/nodes.rs b/src/nodes.rs index 7f4ea5e..ba29cde 100644 --- a/src/nodes.rs +++ b/src/nodes.rs @@ -1,4 +1,3 @@ -use pyo3::types::PyAnyMethods; use std::sync::Arc; #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -11,6 +10,29 @@ pub enum NodeGuardType { Pi, } +#[derive(pyo3::FromPyObject)] +pub enum PyNodeRef<'p> { + Document(pyo3::PyRef<'p, PyDocument>), + Doctype(pyo3::PyRef<'p, PyDoctype>), + Comment(pyo3::PyRef<'p, PyComment>), + Text(pyo3::PyRef<'p, PyText>), + Element(pyo3::PyRef<'p, PyElement>), + Pi(pyo3::PyRef<'p, PyProcessingInstruction>), +} + +impl PyNodeRef<'_> { + pub fn as_node_guard(&self) -> &NodeGuard { + match self { + PyNodeRef::Document(x) => &x.0, + PyNodeRef::Doctype(x) => &x.0, + PyNodeRef::Comment(x) => &x.0, + PyNodeRef::Text(x) => &x.0, + PyNodeRef::Element(x) => &x.0, + PyNodeRef::Pi(x) => &x.0, + } + } +} + impl From<&::treedom::interface::Interface> for NodeGuardType { fn from(value: &::treedom::interface::Interface) -> Self { match value { @@ -105,24 +127,6 @@ impl NodeGuard { node.has_children() } - pub fn from_pyobject(object: &pyo3::Bound<'_, pyo3::PyAny>) -> Result { - if let Ok(x) = object.extract::>() { - Ok(x.0.clone()) - } else if let Ok(x) = object.extract::>() { - Ok(x.0.clone()) - } else if let Ok(x) = object.extract::>() { - Ok(x.0.clone()) - } else if let Ok(x) = object.extract::>() { - Ok(x.0.clone()) - } else if let Ok(x) = object.extract::>() { - Ok(x.0.clone()) - } else if let Ok(x) = object.extract::>() { - Ok(x.0.clone()) - } else { - Err(()) - } - } - pub fn into_any(self, py: pyo3::Python<'_>) -> pyo3::Py { match &self.type_ { NodeGuardType::Document => pyo3::Py::new(py, PyDocument(self)).unwrap().into_any(), diff --git a/src/parser.rs b/src/parser.rs index 510461e..2f7fd9d 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -411,17 +411,12 @@ unsafe impl Sync for PyParser {} #[pyo3::pyfunction] #[pyo3(signature=(node, indent=4, include_self=true, is_html=None))] pub fn serialize( - node: &pyo3::Bound<'_, pyo3::PyAny>, + node: crate::nodes::PyNodeRef, indent: usize, include_self: bool, is_html: Option, ) -> pyo3::PyResult> { - let node = super::nodes::NodeGuard::from_pyobject(node).map_err(|_| { - pyo3::PyErr::new::(format!( - "expected an node (such as Element, Text, Comment, ...) for node, got {}", - crate::tools::get_type_name(node) - )) - })?; + let node = node.as_node_guard(); let is_html = match is_html { Some(x) => x, diff --git a/src/select.rs b/src/select.rs index dab0217..e5c5615 100644 --- a/src/select.rs +++ b/src/select.rs @@ -66,13 +66,8 @@ pub struct PySelect { #[pyo3::pymethods] impl PySelect { #[new] - fn new(node: &pyo3::Bound<'_, pyo3::PyAny>, expression: String) -> pyo3::PyResult { - let node = crate::nodes::NodeGuard::from_pyobject(node).map_err(|_| { - pyo3::PyErr::new::(format!( - "expected a node (such as Element, Text, Comment, ...) for node, got {}", - crate::tools::get_type_name(node) - )) - })?; + fn new(node: crate::nodes::PyNodeRef, expression: String) -> pyo3::PyResult { + let node = node.as_node_guard().clone(); Ok(Self { inner: Arc::new(parking_lot::Mutex::new(PySelectInner::new( diff --git a/src/tree.rs b/src/tree.rs index dbbca26..4919545 100644 --- a/src/tree.rs +++ b/src/tree.rs @@ -150,33 +150,20 @@ impl PyTreeDom { } fn append( - self_: pyo3::PyRef<'_, Self>, - parent: pyo3::Py, - child: pyo3::Py, + &self, + parent: crate::nodes::PyNodeRef, + child: crate::nodes::PyNodeRef, ) -> pyo3::PyResult<()> { - let parent = - super::nodes::NodeGuard::from_pyobject(parent.bind(self_.py())).map_err(|_| { - pyo3::PyErr::new::(format!( - "expected an node (such as Element, Text, Comment, ...) for parent, got {}", - crate::tools::get_type_name(parent.bind(self_.py())) - )) - })?; - - let child = - super::nodes::NodeGuard::from_pyobject(child.bind(self_.py())).map_err(|_| { - pyo3::PyErr::new::(format!( - "expected an node (such as Element, Text, Comment, ...) for child, got {}", - crate::tools::get_type_name(child.bind(self_.py())) - )) - })?; - - if !Arc::ptr_eq(&self_.dom, &parent.tree) { + let parent = parent.as_node_guard(); + let child = child.as_node_guard(); + + if !Arc::ptr_eq(&self.dom, &parent.tree) { return Err(pyo3::PyErr::new::( "the given parent parent is not for this dom", )); } - if !Arc::ptr_eq(&self_.dom, &child.tree) { + if !Arc::ptr_eq(&self.dom, &child.tree) { return Err(pyo3::PyErr::new::( "the given parent child is not for this dom", )); @@ -188,44 +175,31 @@ impl PyTreeDom { )); } - let mut tree = self_.dom.lock(); + let mut tree = self.dom.lock(); let mut parent = tree.get_mut(parent.id).unwrap(); parent.append_id(child.id); - self_.add_new_namespace(tree, child.id); + self.add_new_namespace(tree, child.id); Ok(()) } fn prepend( - self_: pyo3::PyRef<'_, Self>, - parent: pyo3::Py, - child: pyo3::Py, + &self, + parent: crate::nodes::PyNodeRef, + child: crate::nodes::PyNodeRef, ) -> pyo3::PyResult<()> { - let parent = - super::nodes::NodeGuard::from_pyobject(parent.bind(self_.py())).map_err(|_| { - pyo3::PyErr::new::(format!( - "expected an node (such as Element, Text, Comment, ...) for parent, got {}", - crate::tools::get_type_name(parent.bind(self_.py())) - )) - })?; - - let child = - super::nodes::NodeGuard::from_pyobject(child.bind(self_.py())).map_err(|_| { - pyo3::PyErr::new::(format!( - "expected an node (such as Element, Text, Comment, ...) for child, got {}", - crate::tools::get_type_name(child.bind(self_.py())) - )) - })?; - - if !Arc::ptr_eq(&self_.dom, &parent.tree) { + let parent = parent.as_node_guard(); + let child = child.as_node_guard(); + + if !Arc::ptr_eq(&self.dom, &parent.tree) { return Err(pyo3::PyErr::new::( "the given parent parent is not for this dom", )); } - if !Arc::ptr_eq(&self_.dom, &child.tree) { + if !Arc::ptr_eq(&self.dom, &child.tree) { return Err(pyo3::PyErr::new::( "the given parent child is not for this dom", )); @@ -237,44 +211,31 @@ impl PyTreeDom { )); } - let mut tree = self_.dom.lock(); + let mut tree = self.dom.lock(); let mut parent = tree.get_mut(parent.id).unwrap(); parent.prepend_id(child.id); - self_.add_new_namespace(tree, child.id); + self.add_new_namespace(tree, child.id); Ok(()) } fn insert_before( - self_: pyo3::PyRef<'_, Self>, - parent: pyo3::Py, - child: pyo3::Py, + &self, + parent: crate::nodes::PyNodeRef, + child: crate::nodes::PyNodeRef, ) -> pyo3::PyResult<()> { - let parent = - super::nodes::NodeGuard::from_pyobject(parent.bind(self_.py())).map_err(|_| { - pyo3::PyErr::new::(format!( - "expected an node (such as Element, Text, Comment, ...) for parent, got {}", - crate::tools::get_type_name(parent.bind(self_.py())) - )) - })?; - - let child = - super::nodes::NodeGuard::from_pyobject(child.bind(self_.py())).map_err(|_| { - pyo3::PyErr::new::(format!( - "expected an node (such as Element, Text, Comment, ...) for child, got {}", - crate::tools::get_type_name(child.bind(self_.py())) - )) - })?; - - if !Arc::ptr_eq(&self_.dom, &parent.tree) { + let parent = parent.as_node_guard(); + let child = child.as_node_guard(); + + if !Arc::ptr_eq(&self.dom, &parent.tree) { return Err(pyo3::PyErr::new::( "the given parent parent is not for this dom", )); } - if !Arc::ptr_eq(&self_.dom, &child.tree) { + if !Arc::ptr_eq(&self.dom, &child.tree) { return Err(pyo3::PyErr::new::( "the given parent child is not for this dom", )); @@ -286,44 +247,31 @@ impl PyTreeDom { )); } - let mut tree = self_.dom.lock(); + let mut tree = self.dom.lock(); let mut parent = tree.get_mut(parent.id).unwrap(); parent.insert_id_before(child.id); - self_.add_new_namespace(tree, child.id); + self.add_new_namespace(tree, child.id); Ok(()) } fn insert_after( - self_: pyo3::PyRef<'_, Self>, - parent: pyo3::Py, - child: pyo3::Py, + &self, + parent: crate::nodes::PyNodeRef, + child: crate::nodes::PyNodeRef, ) -> pyo3::PyResult<()> { - let parent = - super::nodes::NodeGuard::from_pyobject(parent.bind(self_.py())).map_err(|_| { - pyo3::PyErr::new::(format!( - "expected an node (such as Element, Text, Comment, ...) for parent, got {}", - crate::tools::get_type_name(parent.bind(self_.py())) - )) - })?; - - let child = - super::nodes::NodeGuard::from_pyobject(child.bind(self_.py())).map_err(|_| { - pyo3::PyErr::new::(format!( - "expected an node (such as Element, Text, Comment, ...) for child, got {}", - crate::tools::get_type_name(child.bind(self_.py())) - )) - })?; - - if !Arc::ptr_eq(&self_.dom, &parent.tree) { + let parent = parent.as_node_guard(); + let child = child.as_node_guard(); + + if !Arc::ptr_eq(&self.dom, &parent.tree) { return Err(pyo3::PyErr::new::( "the given parent parent is not for this dom", )); } - if !Arc::ptr_eq(&self_.dom, &child.tree) { + if !Arc::ptr_eq(&self.dom, &child.tree) { return Err(pyo3::PyErr::new::( "the given parent child is not for this dom", )); @@ -335,76 +283,58 @@ impl PyTreeDom { )); } - let mut tree = self_.dom.lock(); + let mut tree = self.dom.lock(); let mut parent = tree.get_mut(parent.id).unwrap(); parent.insert_id_after(child.id); - self_.add_new_namespace(tree, child.id); + self.add_new_namespace(tree, child.id); Ok(()) } - fn detach(self_: pyo3::PyRef<'_, Self>, node: pyo3::Py) -> pyo3::PyResult<()> { - let node = super::nodes::NodeGuard::from_pyobject(node.bind(self_.py())).map_err(|_| { - pyo3::PyErr::new::(format!( - "expected an node (such as Element, Text, Comment, ...) for node, got {}", - crate::tools::get_type_name(node.bind(self_.py())) - )) - })?; + fn detach(&self, node: crate::nodes::PyNodeRef) -> pyo3::PyResult<()> { + let node = node.as_node_guard(); - if !Arc::ptr_eq(&self_.dom, &node.tree) { + if !Arc::ptr_eq(&self.dom, &node.tree) { return Err(pyo3::PyErr::new::( "the given node node is not for this dom", )); } - let mut tree = self_.dom.lock(); + let mut tree = self.dom.lock(); let mut node = tree.get_mut(node.id).unwrap(); node.detach(); let id = node.id(); let _ = node; - self_.remove_old_namespace(tree, id); + self.remove_old_namespace(tree, id); Ok(()) } fn reparent_append( - self_: pyo3::PyRef<'_, Self>, - parent: pyo3::Py, - child: pyo3::Py, + &self, + parent: crate::nodes::PyNodeRef, + child: crate::nodes::PyNodeRef, ) -> pyo3::PyResult<()> { - let parent = - super::nodes::NodeGuard::from_pyobject(parent.bind(self_.py())).map_err(|_| { - pyo3::PyErr::new::(format!( - "expected an node (such as Element, Text, Comment, ...) for parent, got {}", - crate::tools::get_type_name(parent.bind(self_.py())) - )) - })?; - - let child = - super::nodes::NodeGuard::from_pyobject(child.bind(self_.py())).map_err(|_| { - pyo3::PyErr::new::(format!( - "expected an node (such as Element, Text, Comment, ...) for child, got {}", - crate::tools::get_type_name(child.bind(self_.py())) - )) - })?; - - if !Arc::ptr_eq(&self_.dom, &parent.tree) { + let parent = parent.as_node_guard(); + let child = child.as_node_guard(); + + if !Arc::ptr_eq(&self.dom, &parent.tree) { return Err(pyo3::PyErr::new::( "the given parent parent is not for this dom", )); } - if !Arc::ptr_eq(&self_.dom, &child.tree) { + if !Arc::ptr_eq(&self.dom, &child.tree) { return Err(pyo3::PyErr::new::( "the given parent child is not for this dom", )); } - let mut tree = self_.dom.lock(); + let mut tree = self.dom.lock(); let mut parent = tree.get_mut(parent.id).unwrap(); parent.reparent_from_id_append(child.id); @@ -413,39 +343,26 @@ impl PyTreeDom { } fn reparent_prepend( - self_: pyo3::PyRef<'_, Self>, - parent: pyo3::Py, - child: pyo3::Py, + &self, + parent: crate::nodes::PyNodeRef, + child: crate::nodes::PyNodeRef, ) -> pyo3::PyResult<()> { - let parent = - super::nodes::NodeGuard::from_pyobject(parent.bind(self_.py())).map_err(|_| { - pyo3::PyErr::new::(format!( - "expected an node (such as Element, Text, Comment, ...) for parent, got {}", - crate::tools::get_type_name(parent.bind(self_.py())) - )) - })?; - - let child = - super::nodes::NodeGuard::from_pyobject(child.bind(self_.py())).map_err(|_| { - pyo3::PyErr::new::(format!( - "expected an node (such as Element, Text, Comment, ...) for child, got {}", - crate::tools::get_type_name(child.bind(self_.py())) - )) - })?; - - if !Arc::ptr_eq(&self_.dom, &parent.tree) { + let parent = parent.as_node_guard(); + let child = child.as_node_guard(); + + if !Arc::ptr_eq(&self.dom, &parent.tree) { return Err(pyo3::PyErr::new::( "the given parent parent is not for this dom", )); } - if !Arc::ptr_eq(&self_.dom, &child.tree) { + if !Arc::ptr_eq(&self.dom, &child.tree) { return Err(pyo3::PyErr::new::( "the given parent child is not for this dom", )); } - let mut tree = self_.dom.lock(); + let mut tree = self.dom.lock(); let mut parent = tree.get_mut(parent.id).unwrap(); parent.reparent_from_id_prepend(child.id); From f22a0e87957d16eebd9c1c33edc441b6c953f3a4 Mon Sep 17 00:00:00 2001 From: Anders Kaseorg Date: Sat, 7 Mar 2026 00:25:19 -0800 Subject: [PATCH 7/8] Let PyO3 type-check PyDict Signed-off-by: Anders Kaseorg --- src/tree.rs | 70 ++++++++++++++++------------------------------------- 1 file changed, 21 insertions(+), 49 deletions(-) diff --git a/src/tree.rs b/src/tree.rs index 4919545..8481841 100644 --- a/src/tree.rs +++ b/src/tree.rs @@ -64,8 +64,10 @@ impl PyTreeDom { #[pyo3(signature=(*, namespaces=None))] fn new( cls: &pyo3::Bound<'_, pyo3::types::PyType>, - namespaces: Option>, - ) -> pyo3::PyResult { + namespaces: Option< + std::collections::HashMap, + >, + ) -> Self { Self::with_capacity(cls, 0, namespaces) } @@ -73,41 +75,17 @@ impl PyTreeDom { #[classmethod] #[pyo3(signature=(capacity, *, namespaces=None))] fn with_capacity( - cls: &pyo3::Bound<'_, pyo3::types::PyType>, + _cls: &pyo3::Bound<'_, pyo3::types::PyType>, capacity: usize, - namespaces: Option>, - ) -> pyo3::PyResult { - let mut ns = ::treedom::NamespaceMap::new(); - - if let Some(namespaces) = namespaces { - let namespaces = namespaces - .bind(cls.py()) - .cast::() - .map_err(|_| { - pyo3::PyErr::new::(format!( - "expected dict[str, str] for namespaces, got {}", - crate::tools::get_type_name(namespaces.bind(cls.py())) - )) - })?; - - for (key, val) in pyo3::types::PyDictMethods::iter(namespaces) { - let key = key.cast::().map_err(|_| { - pyo3::PyErr::new::(format!( - "expected dict[str, str] for namespaces, but found a key with type {} (keys must be strings)", - crate::tools::get_type_name(&key) - )) - }).map(|x| pyo3::types::PyStringMethods::to_string_lossy(x).into_owned())?; - - let val = val.cast::().map_err(|_| { - pyo3::PyErr::new::(format!( - "expected dict[str, str] for namespaces, but found a value with type {} (values must be strings)", - crate::tools::get_type_name(&val) - )) - }).map(|x| pyo3::types::PyStringMethods::to_string_lossy(x).into_owned())?; - - ns.insert(key.into(), val.into()); - } - } + namespaces: Option< + std::collections::HashMap, + >, + ) -> Self { + let ns = namespaces + .into_iter() + .flatten() + .map(|(key, val)| ((*key).into(), (*val).into())) + .collect(); let dom = if capacity == 0 { ::treedom::IDTreeDOM::new(::treedom::interface::DocumentInterface, ns) @@ -119,24 +97,18 @@ impl PyTreeDom { ) }; - Ok(Self { + Self { dom: Arc::new(parking_lot::Mutex::new(dom)), - }) + } } /// Returns the available namespaces in DOM as a `dict`. - fn namespaces<'a>(&self, py: pyo3::Python<'a>) -> pyo3::PyResult> { - use pyo3::types::{PyDict, PyDictMethods}; - - let dict = PyDict::new(py); - + fn namespaces(&self) -> std::collections::HashMap { let dom = self.dom.lock(); - - for (key, val) in dom.namespaces().iter() { - dict.set_item(key.to_string(), val.to_string())?; - } - - Ok(dict.into_any()) + dom.namespaces() + .iter() + .map(|(key, val)| (key.to_string(), val.to_string())) + .collect() } /// Returns the root node (always is PyDocument). From 968640e372d4adecb6423858d61329c9e3088ecd Mon Sep 17 00:00:00 2001 From: Anders Kaseorg Date: Sat, 7 Mar 2026 03:54:01 -0800 Subject: [PATCH 8/8] Let PyO3 convert NodeGuard to Python object Signed-off-by: Anders Kaseorg --- src/iter/iterator.rs | 39 +++++------ src/iter/traverse.rs | 15 ++--- src/nodes.rs | 156 ++++++++++++++++++++++++------------------- src/select.rs | 5 +- 4 files changed, 112 insertions(+), 103 deletions(-) diff --git a/src/iter/iterator.rs b/src/iter/iterator.rs index 464d902..ca89da2 100644 --- a/src/iter/iterator.rs +++ b/src/iter/iterator.rs @@ -22,23 +22,20 @@ impl PyIterator { self_ } - fn __next__(self_: pyo3::PyRef<'_, Self>) -> pyo3::PyResult> { + fn __next__(&self) -> pyo3::PyResult { let node = { - let tree = self_.dom.lock(); + let tree = self.dom.lock(); // NOTE: // Unfortunately the ego_tree crate does not let us to use directly usize for getting nodes. - match tree - .nodes() - .nth(self_.index.load(atomic::Ordering::Relaxed)) - { - Some(x) => crate::nodes::NodeGuard::from_noderef(self_.dom.clone(), x), + match tree.nodes().nth(self.index.load(atomic::Ordering::Relaxed)) { + Some(x) => crate::nodes::NodeGuard::from_noderef(self.dom.clone(), x), None => return Err(pyo3::PyErr::new::(())), } }; - self_.index.fetch_add(1, atomic::Ordering::Relaxed); - Ok(node.into_any(self_.py())) + self.index.fetch_add(1, atomic::Ordering::Relaxed); + Ok(node) } } @@ -69,12 +66,11 @@ macro_rules! axis_iterators { self_ } - fn __next__(mut self_: pyo3::PyRefMut<'_, Self>) -> pyo3::PyResult> { - let node = self_.guard.take(); - self_.guard = node.as_ref().and_then($f); + fn __next__(&mut self) -> pyo3::PyResult { + let node = self.guard.take(); + self.guard = node.as_ref().and_then($f); - node.map(|x| x.into_any(self_.py())) - .ok_or_else(|| pyo3::PyErr::new::(())) + node.ok_or_else(|| pyo3::PyErr::new::(())) } } )* @@ -120,10 +116,10 @@ impl PyChildren { self_ } - fn __next__(mut self_: pyo3::PyRefMut<'_, Self>) -> pyo3::PyResult> { + fn __next__(&mut self) -> pyo3::PyResult { let mut is_same = false; - if let (Some(x), Some(y)) = (&self_.front, &self_.back) { + if let (Some(x), Some(y)) = (&self.front, &self.back) { if x.id == y.id { is_same = true; } @@ -131,19 +127,18 @@ impl PyChildren { let node = { if is_same { - let node = self_.front.take(); - self_.back = None; + let node = self.front.take(); + self.back = None; node } else { - let node = self_.front.take(); - self_.front = node + let node = self.front.take(); + self.front = node .as_ref() .and_then(crate::nodes::NodeGuard::next_sibling); node } }; - node.map(|x| x.into_any(self_.py())) - .ok_or_else(|| pyo3::PyErr::new::(())) + node.ok_or_else(|| pyo3::PyErr::new::(())) } } diff --git a/src/iter/traverse.rs b/src/iter/traverse.rs index 4d032b8..47a6829 100644 --- a/src/iter/traverse.rs +++ b/src/iter/traverse.rs @@ -64,10 +64,9 @@ impl PyTraverse { self_ } - pub fn __next__(mut self_: pyo3::PyRefMut<'_, Self>) -> pyo3::PyResult<(pyo3::Py, bool)> { - let py = self_.py(); - match self_.next_edge() { - Some((x, y)) => Ok((x.into_any(py), y)), + pub fn __next__(&mut self) -> pyo3::PyResult<(crate::nodes::NodeGuard, bool)> { + match self.next_edge() { + Some((x, y)) => Ok((x, y)), None => Err(pyo3::PyErr::new::(())), } } @@ -93,15 +92,13 @@ impl PyDescendants { self_ } - fn __next__(mut self_: pyo3::PyRefMut<'_, Self>) -> pyo3::PyResult> { - let py = self_.py(); - - while let Some((node, is_close)) = self_.0.next_edge() { + fn __next__(&mut self) -> pyo3::PyResult { + while let Some((node, is_close)) = self.0.next_edge() { if is_close { continue; } - return Ok(node.into_any(py)); + return Ok(node); } Err(pyo3::PyErr::new::(())) diff --git a/src/nodes.rs b/src/nodes.rs index ba29cde..ba0e4c3 100644 --- a/src/nodes.rs +++ b/src/nodes.rs @@ -10,6 +10,16 @@ pub enum NodeGuardType { Pi, } +#[derive(pyo3::IntoPyObject)] +pub enum PyNode { + Document(PyDocument), + Doctype(PyDoctype), + Comment(PyComment), + Text(PyText), + Element(PyElement), + Pi(PyProcessingInstruction), +} + #[derive(pyo3::FromPyObject)] pub enum PyNodeRef<'p> { Document(pyo3::PyRef<'p, PyDocument>), @@ -127,16 +137,14 @@ impl NodeGuard { node.has_children() } - pub fn into_any(self, py: pyo3::Python<'_>) -> pyo3::Py { + pub fn into_py_node(self) -> PyNode { match &self.type_ { - NodeGuardType::Document => pyo3::Py::new(py, PyDocument(self)).unwrap().into_any(), - NodeGuardType::Comment => pyo3::Py::new(py, PyComment(self)).unwrap().into_any(), - NodeGuardType::Doctype => pyo3::Py::new(py, PyDoctype(self)).unwrap().into_any(), - NodeGuardType::Element => pyo3::Py::new(py, PyElement(self)).unwrap().into_any(), - NodeGuardType::Text => pyo3::Py::new(py, PyText(self)).unwrap().into_any(), - NodeGuardType::Pi => pyo3::Py::new(py, PyProcessingInstruction(self)) - .unwrap() - .into_any(), + NodeGuardType::Document => PyNode::Document(PyDocument(self)), + NodeGuardType::Doctype => PyNode::Doctype(PyDoctype(self)), + NodeGuardType::Comment => PyNode::Comment(PyComment(self)), + NodeGuardType::Text => PyNode::Text(PyText(self)), + NodeGuardType::Element => PyNode::Element(PyElement(self)), + NodeGuardType::Pi => PyNode::Pi(PyProcessingInstruction(self)), } } } @@ -161,6 +169,16 @@ impl PartialEq for NodeGuard { } impl Eq for NodeGuard {} +impl<'py> pyo3::IntoPyObject<'py> for NodeGuard { + type Target = pyo3::PyAny; + type Output = pyo3::Bound<'py, pyo3::PyAny>; + type Error = pyo3::PyErr; + + fn into_pyobject(self, py: pyo3::Python<'py>) -> Result { + self.into_py_node().into_pyobject(py) + } +} + macro_rules! create_richcmp_notimplemented { ($token:expr, $selfobj:expr) => {{ Err(pyo3::PyErr::new::( @@ -195,24 +213,24 @@ impl PyDocument { self.0.tree() } - fn parent(&self, py: pyo3::Python<'_>) -> Option> { - self.0.parent().map(move |x| x.into_any(py)) + fn parent(&self) -> Option { + self.0.parent() } - fn prev_sibling(&self, py: pyo3::Python<'_>) -> Option> { - self.0.prev_sibling().map(move |x| x.into_any(py)) + fn prev_sibling(&self) -> Option { + self.0.prev_sibling() } - fn next_sibling(&self, py: pyo3::Python<'_>) -> Option> { - self.0.next_sibling().map(move |x| x.into_any(py)) + fn next_sibling(&self) -> Option { + self.0.next_sibling() } - fn first_child(&self, py: pyo3::Python<'_>) -> Option> { - self.0.first_child().map(move |x| x.into_any(py)) + fn first_child(&self) -> Option { + self.0.first_child() } - fn last_child(&self, py: pyo3::Python<'_>) -> Option> { - self.0.last_child().map(move |x| x.into_any(py)) + fn last_child(&self) -> Option { + self.0.last_child() } fn has_children(&self) -> bool { @@ -342,24 +360,24 @@ impl PyDoctype { self.0.tree() } - fn parent(&self, py: pyo3::Python<'_>) -> Option> { - self.0.parent().map(move |x| x.into_any(py)) + fn parent(&self) -> Option { + self.0.parent() } - fn prev_sibling(&self, py: pyo3::Python<'_>) -> Option> { - self.0.prev_sibling().map(move |x| x.into_any(py)) + fn prev_sibling(&self) -> Option { + self.0.prev_sibling() } - fn next_sibling(&self, py: pyo3::Python<'_>) -> Option> { - self.0.next_sibling().map(move |x| x.into_any(py)) + fn next_sibling(&self) -> Option { + self.0.next_sibling() } - fn first_child(&self, py: pyo3::Python<'_>) -> Option> { - self.0.first_child().map(move |x| x.into_any(py)) + fn first_child(&self) -> Option { + self.0.first_child() } - fn last_child(&self, py: pyo3::Python<'_>) -> Option> { - self.0.last_child().map(move |x| x.into_any(py)) + fn last_child(&self) -> Option { + self.0.last_child() } fn has_children(&self) -> bool { @@ -459,24 +477,24 @@ impl PyComment { self.0.tree() } - fn parent(&self, py: pyo3::Python<'_>) -> Option> { - self.0.parent().map(move |x| x.into_any(py)) + fn parent(&self) -> Option { + self.0.parent() } - fn prev_sibling(&self, py: pyo3::Python<'_>) -> Option> { - self.0.prev_sibling().map(move |x| x.into_any(py)) + fn prev_sibling(&self) -> Option { + self.0.prev_sibling() } - fn next_sibling(&self, py: pyo3::Python<'_>) -> Option> { - self.0.next_sibling().map(move |x| x.into_any(py)) + fn next_sibling(&self) -> Option { + self.0.next_sibling() } - fn first_child(&self, py: pyo3::Python<'_>) -> Option> { - self.0.first_child().map(move |x| x.into_any(py)) + fn first_child(&self) -> Option { + self.0.first_child() } - fn last_child(&self, py: pyo3::Python<'_>) -> Option> { - self.0.last_child().map(move |x| x.into_any(py)) + fn last_child(&self) -> Option { + self.0.last_child() } fn has_children(&self) -> bool { @@ -573,24 +591,24 @@ impl PyText { self.0.tree() } - fn parent(&self, py: pyo3::Python<'_>) -> Option> { - self.0.parent().map(move |x| x.into_any(py)) + fn parent(&self) -> Option { + self.0.parent() } - fn prev_sibling(&self, py: pyo3::Python<'_>) -> Option> { - self.0.prev_sibling().map(move |x| x.into_any(py)) + fn prev_sibling(&self) -> Option { + self.0.prev_sibling() } - fn next_sibling(&self, py: pyo3::Python<'_>) -> Option> { - self.0.next_sibling().map(move |x| x.into_any(py)) + fn next_sibling(&self) -> Option { + self.0.next_sibling() } - fn first_child(&self, py: pyo3::Python<'_>) -> Option> { - self.0.first_child().map(move |x| x.into_any(py)) + fn first_child(&self) -> Option { + self.0.first_child() } - fn last_child(&self, py: pyo3::Python<'_>) -> Option> { - self.0.last_child().map(move |x| x.into_any(py)) + fn last_child(&self) -> Option { + self.0.last_child() } fn has_children(&self) -> bool { @@ -1100,24 +1118,24 @@ impl PyElement { self.0.tree() } - fn parent(&self, py: pyo3::Python<'_>) -> Option> { - self.0.parent().map(move |x| x.into_any(py)) + fn parent(&self) -> Option { + self.0.parent() } - fn prev_sibling(&self, py: pyo3::Python<'_>) -> Option> { - self.0.prev_sibling().map(move |x| x.into_any(py)) + fn prev_sibling(&self) -> Option { + self.0.prev_sibling() } - fn next_sibling(&self, py: pyo3::Python<'_>) -> Option> { - self.0.next_sibling().map(move |x| x.into_any(py)) + fn next_sibling(&self) -> Option { + self.0.next_sibling() } - fn first_child(&self, py: pyo3::Python<'_>) -> Option> { - self.0.first_child().map(move |x| x.into_any(py)) + fn first_child(&self) -> Option { + self.0.first_child() } - fn last_child(&self, py: pyo3::Python<'_>) -> Option> { - self.0.last_child().map(move |x| x.into_any(py)) + fn last_child(&self) -> Option { + self.0.last_child() } fn has_children(&self) -> bool { @@ -1243,24 +1261,24 @@ impl PyProcessingInstruction { self.0.tree() } - fn parent(&self, py: pyo3::Python<'_>) -> Option> { - self.0.parent().map(move |x| x.into_any(py)) + fn parent(&self) -> Option { + self.0.parent() } - fn prev_sibling(&self, py: pyo3::Python<'_>) -> Option> { - self.0.prev_sibling().map(move |x| x.into_any(py)) + fn prev_sibling(&self) -> Option { + self.0.prev_sibling() } - fn next_sibling(&self, py: pyo3::Python<'_>) -> Option> { - self.0.next_sibling().map(move |x| x.into_any(py)) + fn next_sibling(&self) -> Option { + self.0.next_sibling() } - fn first_child(&self, py: pyo3::Python<'_>) -> Option> { - self.0.first_child().map(move |x| x.into_any(py)) + fn first_child(&self) -> Option { + self.0.first_child() } - fn last_child(&self, py: pyo3::Python<'_>) -> Option> { - self.0.last_child().map(move |x| x.into_any(py)) + fn last_child(&self) -> Option { + self.0.last_child() } fn has_children(&self) -> bool { diff --git a/src/select.rs b/src/select.rs index e5c5615..07ee781 100644 --- a/src/select.rs +++ b/src/select.rs @@ -80,11 +80,10 @@ impl PySelect { self_ } - pub fn __next__(self_: pyo3::PyRef<'_, Self>) -> pyo3::PyResult> { - let mut lock = self_.inner.lock(); + pub fn __next__(&self) -> pyo3::PyResult { + let mut lock = self.inner.lock(); lock.next() .ok_or_else(|| pyo3::PyErr::new::(())) - .map(|x| x.into_any(self_.py())) } }