Skip to main content

aidoku/imports/
html.rs

1//! Module for working with HTML.
2//!
3//! It provides a convenient API for extracting data, using HTML5
4//! DOM methods and CSS selectors.
5//!
6//! The backend of this module is [SwiftSoup](https://github.com/scinfu/SwiftSoup).
7use super::{
8	FFIResult, Rid,
9	std::{destroy, read_string_and_destroy},
10};
11use crate::alloc::{String, Vec};
12use core::fmt::Display;
13
14#[link(wasm_import_module = "html")]
15unsafe extern "C" {
16	fn parse(
17		html: *const u8,
18		html_len: usize,
19		base_url: *const u8,
20		base_url_len: usize,
21	) -> FFIResult;
22	fn parse_fragment(
23		html: *const u8,
24		html_len: usize,
25		base_url: *const u8,
26		base_url_len: usize,
27	) -> FFIResult;
28	fn escape(text: *const u8, text_len: usize) -> FFIResult;
29	fn unescape(text: *const u8, text_len: usize) -> FFIResult;
30
31	fn kind(rid: Rid) -> FFIResult;
32
33	fn child_nodes(rid: Rid) -> FFIResult;
34	fn has_attr(rid: Rid, attr: *const u8, attr_len: usize) -> bool;
35	fn set_attr(
36		rid: Rid,
37		key: *const u8,
38		key_len: usize,
39		value: *const u8,
40		value_len: usize,
41	) -> FFIResult;
42	fn remove_attr(rid: Rid, attr: *const u8, attr_len: usize) -> FFIResult;
43
44	fn set_text(rid: Rid, text: *const u8, text_len: usize) -> FFIResult;
45	fn set_html(rid: Rid, html: *const u8, html_len: usize) -> FFIResult;
46	fn prepend(rid: Rid, html: *const u8, html_len: usize) -> FFIResult;
47	fn append(rid: Rid, html: *const u8, html_len: usize) -> FFIResult;
48	fn children(rid: Rid) -> FFIResult;
49	fn base_uri(rid: Rid) -> FFIResult;
50	fn own_text(rid: Rid) -> FFIResult;
51	fn data(rid: Rid) -> FFIResult;
52	fn id(rid: Rid) -> FFIResult;
53	fn tag_name(rid: Rid) -> FFIResult;
54	fn class_name(rid: Rid) -> FFIResult;
55	fn has_class(rid: Rid, class: *const u8, class_len: usize) -> bool;
56	fn add_class(rid: Rid, class: *const u8, class_len: usize) -> FFIResult;
57	fn remove_class(rid: Rid, class: *const u8, class_len: usize) -> FFIResult;
58
59	fn first(rid: Rid) -> FFIResult;
60	fn last(rid: Rid) -> FFIResult;
61	#[allow(clashing_extern_declarations)]
62	#[link_name = "get"]
63	fn html_get(rid: Rid, index: usize) -> FFIResult;
64	fn size(rid: Rid) -> FFIResult;
65
66	fn parent(rid: Rid) -> FFIResult;
67	fn siblings(rid: Rid) -> FFIResult;
68	fn next(rid: Rid) -> FFIResult;
69	fn previous(rid: Rid) -> FFIResult;
70
71	fn attr(rid: Rid, key: *const u8, key_len: usize) -> FFIResult;
72	fn outer_html(rid: Rid) -> FFIResult;
73	fn remove(rid: Rid) -> FFIResult;
74
75	fn select(rid: Rid, query: *const u8, query_len: usize) -> FFIResult;
76	fn select_first(rid: Rid, query: *const u8, query_len: usize) -> FFIResult;
77	fn text(rid: Rid) -> FFIResult;
78	fn untrimmed_text(rid: Rid) -> FFIResult;
79	fn html(rid: Rid) -> FFIResult;
80}
81
82/// Error type for HTML operations.
83#[derive(PartialEq, Eq, Debug, Clone)]
84pub enum HtmlError {
85	InvalidDescriptor,
86	InvalidString,
87	InvalidHtml,
88	InvalidQuery,
89	NoResult,
90	SwiftSoupError,
91}
92
93impl HtmlError {
94	fn from(value: FFIResult) -> Option<Self> {
95		match value {
96			-1 => Some(Self::InvalidDescriptor),
97			-2 => Some(Self::InvalidString),
98			-3 => Some(Self::InvalidHtml),
99			-4 => Some(Self::InvalidQuery),
100			-5 => Some(Self::NoResult),
101			-6 => Some(Self::SwiftSoupError),
102			_ => None,
103		}
104	}
105}
106
107/// Namespace for HTML-related functions.
108#[derive(Debug)]
109pub struct Html;
110
111impl Html {
112	/// Parse HTML into a Document.
113	///
114	/// As there is no base URL specified, absolute URL resolution requires the
115	/// HTML to have a `<base href>` tag.
116	pub fn parse<T: AsRef<[u8]>>(html: T) -> Result<Document, HtmlError> {
117		let buf = html.as_ref();
118		let rid = unsafe { parse(buf.as_ptr(), buf.len(), "".as_ptr(), 0) };
119		if let Some(error) = HtmlError::from(rid) {
120			Err(error)
121		} else {
122			Ok(Document(unsafe { Element::from(rid) }))
123		}
124	}
125
126	/// Parse HTML into a Document, with a base URL.
127	///
128	/// The given `base_url` will be used for any URLs that occurs before a
129	/// `<base href>` tag is defined.
130	pub fn parse_with_url<T: AsRef<[u8]>, B: AsRef<str>>(
131		html: T,
132		base_url: B,
133	) -> Result<Document, HtmlError> {
134		let buf = html.as_ref();
135		let url = base_url.as_ref();
136		let rid = unsafe { parse(buf.as_ptr(), buf.len(), url.as_ptr(), url.len()) };
137		if let Some(error) = HtmlError::from(rid) {
138			Err(error)
139		} else {
140			Ok(Document(unsafe { Element::from(rid) }))
141		}
142	}
143
144	/// Parse a HTML fragment, assuming that it forms the `body` of the HTML.
145	///
146	/// Similar to [Html::parse], relative URLs will not be resolved unless
147	/// there is a `<base href>` tag.
148	pub fn parse_fragment<T: AsRef<[u8]>>(html: T) -> Result<Document, HtmlError> {
149		let buf = html.as_ref();
150		let rid = unsafe { parse_fragment(buf.as_ptr(), buf.len(), "".as_ptr(), 0) };
151		if let Some(error) = HtmlError::from(rid) {
152			Err(error)
153		} else {
154			Ok(Document(unsafe { Element::from(rid) }))
155		}
156	}
157
158	/// Parse a HTML fragment, assuming that it forms the `body` of the HTML, with a base URL.
159	///
160	/// Similar to [Html::parse_with_url], URL resolution occurs for any that appears
161	/// before a `<base href>` tag.
162	pub fn parse_fragment_with_url<T: AsRef<[u8]>, B: AsRef<str>>(
163		html: T,
164		base_url: B,
165	) -> Result<Document, HtmlError> {
166		let buf = html.as_ref();
167		let url = base_url.as_ref();
168		let rid = unsafe { parse_fragment(buf.as_ptr(), buf.len(), url.as_ptr(), url.len()) };
169		if let Some(error) = HtmlError::from(rid) {
170			Err(error)
171		} else {
172			Ok(Document(unsafe { Element::from(rid) }))
173		}
174	}
175
176	/// Escape any HTML-reserved characters to HTML entities.
177	///
178	/// # Examples
179	/// ```ignore
180	/// use aidoku::imports::html::Html;
181	/// assert_eq!(
182	///     Html::escape("Hello &<> Å å π 新 there ¾ © »"),
183	///     "Hello &amp;&lt;&gt; Å å π 新 there ¾ © »",
184	/// );
185	/// ```
186	pub fn escape<T: AsRef<str>>(text: T) -> String {
187		let text = text.as_ref();
188		let rid = unsafe { escape(text.as_ptr(), text.len()) };
189		read_string_and_destroy(rid).unwrap_or_default()
190	}
191
192	/// Unescape any HTML entities to their original characters.
193	///
194	/// # Examples
195	/// ```ignore
196	/// use aidoku::imports::html::Html;
197	/// assert_eq!(
198	///     Html::unescape("Hello &amp;&lt;&gt; Å å π 新 there ¾ © »"),
199	///     Some("Hello &<> Å å π 新 there ¾ © »".into()),
200	/// );
201	/// ```
202	pub fn unescape<T: AsRef<str>>(text: T) -> Option<String> {
203		let text = text.as_ref();
204		let rid = unsafe { unescape(text.as_ptr(), text.len()) };
205		if HtmlError::from(rid).is_some() {
206			return None;
207		}
208		read_string_and_destroy(rid)
209	}
210}
211
212#[derive(PartialEq, Eq, Debug)]
213pub enum Kind {
214	Unknown,
215	Node,
216	TextNode,
217	DataNode,
218	Comment,
219	Element,
220	ElementList,
221	Document,
222}
223
224impl From<FFIResult> for Kind {
225	fn from(value: FFIResult) -> Self {
226		match value {
227			0 => Kind::Unknown,
228			1 => Kind::Node,
229			2 => Kind::TextNode,
230			3 => Kind::DataNode,
231			4 => Kind::Comment,
232			5 => Kind::Element,
233			6 => Kind::ElementList,
234			7 => Kind::Document,
235			_ => Kind::Unknown,
236		}
237	}
238}
239
240/// A single HTML node.
241pub struct Node {
242	rid: Rid,
243}
244
245impl Node {
246	/// Get an instance from a [Rid].
247	unsafe fn from(rid: Rid) -> Self {
248		Self { rid }
249	}
250
251	/// Get the kind of the node.
252	pub fn kind(&self) -> Kind {
253		unsafe { kind(self.rid) }.into()
254	}
255
256	/// Get the node's parent node, returning `None` if there isn't one.
257	pub fn parent(&self) -> Option<Node> {
258		let rid = unsafe { parent(self.rid) };
259		if HtmlError::from(rid).is_some() {
260			return None;
261		}
262		Some(unsafe { Node::from(rid) })
263	}
264
265	fn read_node_list(rid: i32) -> Vec<Node> {
266		let mut nodes = Vec::new();
267		if rid < 0 {
268			return nodes;
269		}
270		let len = unsafe { size(rid) };
271		if len <= 0 {
272			return nodes;
273		}
274		for index in 0..len as usize {
275			let node_rid = unsafe { html_get(rid, index) };
276			if node_rid < 0 {
277				continue;
278			}
279			nodes.push(unsafe { Node::from(node_rid) });
280		}
281		nodes
282	}
283
284	/// Get the node's child nodes.
285	pub fn child_nodes(&self) -> Vec<Node> {
286		let rid = unsafe { child_nodes(self.rid) };
287		Self::read_node_list(rid)
288	}
289
290	/// Get the sibling nodes of the node.
291	pub fn siblings(&self) -> Vec<Node> {
292		let rid = unsafe { siblings(self.rid) };
293		Self::read_node_list(rid)
294	}
295
296	/// Get the next sibling of the node, returning `None` if there isn't one.
297	pub fn next(&self) -> Option<Node> {
298		let rid = unsafe { next(self.rid) };
299		if HtmlError::from(rid).is_some() {
300			return None;
301		}
302		Some(unsafe { Node::from(rid) })
303	}
304
305	/// Get the previous sibling of the node, returning `None` if there isn't one.
306	pub fn prev(&self) -> Option<Node> {
307		let rid = unsafe { previous(self.rid) };
308		if HtmlError::from(rid).is_some() {
309			return None;
310		}
311		Some(unsafe { Node::from(rid) })
312	}
313
314	/// Get the node's outer HTML.
315	pub fn outer_html(&self) -> Option<String> {
316		let rid = unsafe { outer_html(self.rid) };
317		if HtmlError::from(rid).is_some() {
318			return None;
319		}
320		read_string_and_destroy(rid)
321	}
322
323	/// Test if this node has an attribute. Case insensitive.
324	pub fn has_attr<T: AsRef<str>>(&self, attr_name: T) -> bool {
325		let attr_name = attr_name.as_ref();
326		unsafe { has_attr(self.rid, attr_name.as_ptr(), attr_name.len()) }
327	}
328
329	/// Set an attribute value on this node.
330	///
331	/// If this node already has an attribute with the key, its value is updated;
332	/// otherwise, a new attribute is added.
333	pub fn set_attr<K: AsRef<str>, V: AsRef<str>>(
334		&mut self,
335		key: K,
336		value: V,
337	) -> Result<(), HtmlError> {
338		let key = key.as_ref();
339		let value = value.as_ref();
340		let result = unsafe {
341			set_attr(
342				self.rid,
343				key.as_ptr(),
344				key.len(),
345				value.as_ptr(),
346				value.len(),
347			)
348		};
349
350		if let Some(error) = HtmlError::from(result) {
351			Err(error)
352		} else {
353			Ok(())
354		}
355	}
356
357	/// Remove an attribute from this node.
358	pub fn remove_attr<T: AsRef<str>>(&mut self, attr: T) -> Result<(), HtmlError> {
359		let attr = attr.as_ref();
360		let result = unsafe { remove_attr(self.rid, attr.as_ptr(), attr.len()) };
361
362		if let Some(error) = HtmlError::from(result) {
363			Err(error)
364		} else {
365			Ok(())
366		}
367	}
368
369	/// Get the text of this node, if it is a text node.
370	pub fn text(&self) -> Option<String> {
371		let kind = self.kind();
372		if !matches!(kind, Kind::TextNode | Kind::Element) {
373			return None;
374		}
375		let rid = unsafe { text(self.rid) };
376		if HtmlError::from(rid).is_some() {
377			return None;
378		}
379		read_string_and_destroy(rid)
380	}
381
382	/// Get the data of this node, if it is a data node or comment.
383	pub fn data(&self) -> Option<String> {
384		let kind = self.kind();
385		if !matches!(kind, Kind::DataNode | Kind::Comment | Kind::Element) {
386			return None;
387		}
388		let rid = unsafe { data(self.rid) };
389		if HtmlError::from(rid).is_some() {
390			return None;
391		}
392		read_string_and_destroy(rid)
393	}
394}
395
396impl Display for Node {
397	/// Returns the outer HTML of the node.
398	fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
399		write!(f, "{}", self.outer_html().unwrap_or_default())
400	}
401}
402
403impl Drop for Node {
404	fn drop(&mut self) {
405		unsafe { destroy(self.rid) }
406	}
407}
408
409/// A single HTML element.
410pub struct Element(pub(crate) Node);
411
412impl Element {
413	/// Get an instance from a [Rid].
414	unsafe fn from(rid: Rid) -> Self {
415		Self(unsafe { Node::from(rid) })
416	}
417
418	/// Get the kind of the element.
419	pub fn kind(&self) -> Kind {
420		self.0.kind()
421	}
422
423	/// Find elements that match the given CSS (or JQuery) selector.
424	pub fn select<T: AsRef<str>>(&self, css_query: T) -> Option<ElementList> {
425		let query = css_query.as_ref();
426		let rid = unsafe { select(self.0.rid, query.as_ptr(), query.len()) };
427		if HtmlError::from(rid).is_some() {
428			return None;
429		}
430		Some(unsafe { ElementList::from(rid) })
431	}
432
433	/// Find the first element that matches the given CSS (or JQuery) selector.
434	pub fn select_first<T: AsRef<str>>(&self, css_query: T) -> Option<Element> {
435		let query = css_query.as_ref();
436		let rid = unsafe { select_first(self.0.rid, query.as_ptr(), query.len()) };
437		if HtmlError::from(rid).is_some() {
438			return None;
439		}
440		Some(unsafe { Element::from(rid) })
441	}
442
443	/// Get an attribute value by its key.
444	///
445	/// To get an absolute URL from an attribute that may be a relative URL,
446	/// prefix the key with `abs:`.
447	///
448	/// # Examples
449	/// ```ignore
450	/// use aidoku::imports::html::Html;
451	/// let html = Html::parse_with_url("<img src=\"/image.jpg\" />", "https://example.com").unwrap();
452	/// let el = html.select_first("img").unwrap();
453	/// assert_eq!(
454	///     el.attr("abs:src"),
455	///     Some("https://example.com/image.jpg".into())
456	/// );
457	/// ```
458	pub fn attr<T: AsRef<str>>(&self, attr_name: T) -> Option<String> {
459		let attr_name = attr_name.as_ref();
460		let rid = unsafe { attr(self.0.rid, attr_name.as_ptr(), attr_name.len()) };
461		if HtmlError::from(rid).is_some() {
462			return None;
463		}
464		read_string_and_destroy(rid)
465	}
466
467	/// Get the normalized, combined text of this element and its children.
468	///
469	/// Whitespace is normalized and trimmed.
470	///
471	/// Note that this method returns text that would be presented to a reader.
472	/// The contents of data nodes (e.g. `<script>` tags) are not considered text,
473	/// and instead, [Element::html] or [Element::data] can be used for them.
474	///
475	/// # Examples
476	/// ```ignore
477	/// use aidoku::imports::html::Html;
478	/// let html = Html::parse("<p>Hello <b>there</b> now! </p>").unwrap();
479	/// let el = html.select_first("p").unwrap();
480	/// assert_eq!(el.text(), Some("Hello there now!".into()));
481	/// ```
482	pub fn text(&self) -> Option<String> {
483		let rid = unsafe { text(self.0.rid) };
484		if HtmlError::from(rid).is_some() {
485			return None;
486		}
487		read_string_and_destroy(rid)
488	}
489
490	/// Get the text of this element and its children.
491	///
492	/// Whitespace is *not* normalized and trimmed.
493	///
494	/// Notices from [Element::text] apply.
495	///
496	/// # Examples
497	/// ```ignore
498	/// use aidoku::imports::html::Html;
499	/// let html = Html::parse("<p>Hello <b>there</b> now! </p>").unwrap();
500	/// let el = html.select_first("p").unwrap();
501	/// assert_eq!(el.untrimmed_text(), Some("Hello there now! ".into()));
502	/// ```
503	pub fn untrimmed_text(&self) -> Option<String> {
504		let rid = unsafe { untrimmed_text(self.0.rid) };
505		if HtmlError::from(rid).is_some() {
506			return None;
507		}
508		read_string_and_destroy(rid)
509	}
510
511	/// Get the element's inner HTML.
512	///
513	/// # Examples
514	/// ```ignore
515	/// use aidoku::imports::html::Html;
516	/// let html = Html::parse("<div><p></p></div>").unwrap();
517	/// let div = html.select_first("div").unwrap();
518	/// assert_eq!(div.html(), Some("<p></p>".into()));
519	/// ```
520	pub fn html(&self) -> Option<String> {
521		let rid = unsafe { html(self.0.rid) };
522		if HtmlError::from(rid).is_some() {
523			return None;
524		}
525		read_string_and_destroy(rid)
526	}
527
528	/// Get the element's outer HTML.
529	///
530	/// # Examples
531	/// ```ignore
532	/// use aidoku::imports::html::Html;
533	/// let html = Html::parse("<div><p></p></div>").unwrap();
534	/// let div = html.select_first("div").unwrap();
535	/// assert_eq!(div.outer_html(), Some("<div><p></p></div>".into()));
536	/// ```
537	pub fn outer_html(&self) -> Option<String> {
538		self.0.outer_html()
539	}
540
541	/// Remove this element from the DOM tree.
542	pub fn remove(self) {
543		_ = unsafe { remove(self.0.rid) };
544	}
545
546	/// Get the element's parent element, returning `None` if there isn't one.
547	pub fn parent(&self) -> Option<Element> {
548		let rid = unsafe { parent(self.0.rid) };
549		if HtmlError::from(rid).is_some() {
550			return None;
551		}
552		Some(unsafe { Element::from(rid) })
553	}
554
555	/// Get the elements's child nodes.
556	pub fn child_nodes(&self) -> Vec<Node> {
557		self.0.child_nodes()
558	}
559
560	/// Get the element's children elements.
561	pub fn children(&self) -> ElementList {
562		let rid = unsafe { children(self.0.rid) };
563		unsafe { ElementList::from(rid) }
564	}
565
566	/// Get the sibling elements of the element.
567	pub fn siblings(&self) -> ElementList {
568		let rid = unsafe { siblings(self.0.rid) };
569		unsafe { ElementList::from(rid) }
570	}
571
572	/// Get the next sibling element of the element, returning `None` if there isn't one.
573	pub fn next(&self) -> Option<Element> {
574		let rid = unsafe { next(self.0.rid) };
575		if HtmlError::from(rid).is_some() {
576			return None;
577		}
578		Some(unsafe { Element::from(rid) })
579	}
580
581	/// Get the previous sibling element of the element, returning `None` if there isn't one.
582	pub fn prev(&self) -> Option<Element> {
583		let rid = unsafe { previous(self.0.rid) };
584		if HtmlError::from(rid).is_some() {
585			return None;
586		}
587		Some(unsafe { Element::from(rid) })
588	}
589
590	/// Set the element's text content, clearing any existing content.
591	pub fn set_text<T: AsRef<str>>(&mut self, text: T) -> Result<(), HtmlError> {
592		let text = text.as_ref();
593		let result = unsafe { set_text(self.0.rid, text.as_ptr(), text.len()) };
594
595		if let Some(error) = HtmlError::from(result) {
596			Err(error)
597		} else {
598			Ok(())
599		}
600	}
601
602	/// Set the element's inner HTML, clearing the existing HTML.
603	pub fn set_html<T: AsRef<str>>(&mut self, text: T) -> Result<(), HtmlError> {
604		let text = text.as_ref();
605		let result = unsafe { set_html(self.0.rid, text.as_ptr(), text.len()) };
606
607		if let Some(error) = HtmlError::from(result) {
608			Err(error)
609		} else {
610			Ok(())
611		}
612	}
613
614	/// Prepend inner HTML into this element.
615	///
616	/// The given HTML will be parsed, and each node prepended to the start
617	/// of the element's children.
618	pub fn prepend<T: AsRef<str>>(&mut self, text: T) -> Result<(), HtmlError> {
619		let text = text.as_ref();
620		let result = unsafe { prepend(self.0.rid, text.as_ptr(), text.len()) };
621
622		if let Some(error) = HtmlError::from(result) {
623			Err(error)
624		} else {
625			Ok(())
626		}
627	}
628
629	/// Append inner HTML into this element.
630	///
631	/// The given HTML will be parsed, and each node appended to the end
632	/// of the element's children.
633	pub fn append<T: AsRef<str>>(&mut self, text: T) -> Result<(), HtmlError> {
634		let text = text.as_ref();
635		let result = unsafe { append(self.0.rid, text.as_ptr(), text.len()) };
636
637		if let Some(error) = HtmlError::from(result) {
638			Err(error)
639		} else {
640			Ok(())
641		}
642	}
643
644	/// Get the base URI of this Element.
645	pub fn base_uri(&self) -> Option<String> {
646		let rid = unsafe { base_uri(self.0.rid) };
647		if HtmlError::from(rid).is_some() {
648			return None;
649		}
650		read_string_and_destroy(rid)
651	}
652
653	/// Gets the (normalized) text owned by this element.
654	pub fn own_text(&self) -> Option<String> {
655		let rid = unsafe { own_text(self.0.rid) };
656		if HtmlError::from(rid).is_some() {
657			return None;
658		}
659		read_string_and_destroy(rid)
660	}
661
662	/// Get the combined data (e.g. the inside of a `<script>` tag) of this element.
663	///
664	/// Note that data is NOT the text of the element. Use [Element::text]
665	/// to get the text that would be visible to a user, and [Element::data]
666	/// for the contents of scripts, comments, CSS styles, etc.
667	pub fn data(&self) -> Option<String> {
668		let rid = unsafe { data(self.0.rid) };
669		if HtmlError::from(rid).is_some() {
670			return None;
671		}
672		read_string_and_destroy(rid)
673	}
674
675	/// Get the `id` attribute of this element.
676	pub fn id(&self) -> Option<String> {
677		let rid = unsafe { id(self.0.rid) };
678		if HtmlError::from(rid).is_some() {
679			return None;
680		}
681		read_string_and_destroy(rid)
682	}
683
684	/// Get the name of the tag for this element.
685	///
686	/// This will always be the lowercased version. For example, `<DIV>` and
687	/// `<div>` would both return `div`.
688	pub fn tag_name(&self) -> Option<String> {
689		let rid = unsafe { tag_name(self.0.rid) };
690		if HtmlError::from(rid).is_some() {
691			return None;
692		}
693		read_string_and_destroy(rid)
694	}
695
696	/// Get the literal value of this node's `class` attribute.
697	///
698	/// For example, on `<div class="header gray">` this would return `header gray`.
699	pub fn class_name(&self) -> Option<String> {
700		let rid = unsafe { class_name(self.0.rid) };
701		if HtmlError::from(rid).is_some() {
702			return None;
703		}
704		read_string_and_destroy(rid)
705	}
706
707	/// Test if this element has a class. Case insensitive.
708	pub fn has_class<T: AsRef<str>>(&self, class_name: T) -> bool {
709		let class_name = class_name.as_ref();
710		unsafe { has_class(self.0.rid, class_name.as_ptr(), class_name.len()) }
711	}
712
713	/// Add a class name to this element's class attribute.
714	pub fn add_class<T: AsRef<str>>(&mut self, class_name: T) -> Result<(), HtmlError> {
715		let class_name = class_name.as_ref();
716		let result = unsafe { add_class(self.0.rid, class_name.as_ptr(), class_name.len()) };
717
718		if let Some(error) = HtmlError::from(result) {
719			Err(error)
720		} else {
721			Ok(())
722		}
723	}
724
725	/// Remove a class name from this element's class attribute.
726	pub fn remove_class<T: AsRef<str>>(&mut self, class_name: T) -> Result<(), HtmlError> {
727		let class_name = class_name.as_ref();
728		let result = unsafe { remove_class(self.0.rid, class_name.as_ptr(), class_name.len()) };
729
730		if let Some(error) = HtmlError::from(result) {
731			Err(error)
732		} else {
733			Ok(())
734		}
735	}
736
737	/// Test if this element has an attribute. Case insensitive.
738	pub fn has_attr<T: AsRef<str>>(&self, attr_name: T) -> bool {
739		self.0.has_attr(attr_name)
740	}
741
742	/// Set an attribute value on this element.
743	///
744	/// If this element already has an attribute with the key, its value is updated;
745	/// otherwise, a new attribute is added.
746	pub fn set_attr<K: AsRef<str>, V: AsRef<str>>(
747		&mut self,
748		key: K,
749		value: V,
750	) -> Result<(), HtmlError> {
751		self.0.set_attr(key, value)
752	}
753
754	/// Remove an attribute from this element.
755	pub fn remove_attr<T: AsRef<str>>(&mut self, attr: T) -> Result<(), HtmlError> {
756		self.0.remove_attr(attr)
757	}
758}
759
760impl Display for Element {
761	fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
762		self.0.fmt(f)
763	}
764}
765
766impl From<Element> for Node {
767	fn from(value: Element) -> Self {
768		value.0
769	}
770}
771
772impl TryFrom<Node> for Element {
773	type Error = HtmlError;
774
775	fn try_from(value: Node) -> Result<Self, HtmlError> {
776		let kind = value.kind();
777		match kind {
778			Kind::Element => Ok(Self(value)),
779			Kind::Document => Ok(Self(value)),
780			_ => Err(HtmlError::InvalidDescriptor),
781		}
782	}
783}
784
785/// A complete HTML document.
786pub struct Document(pub(crate) Element);
787
788impl Document {
789	/// Get an instance from a [Rid].
790	pub(crate) unsafe fn from(rid: Rid) -> Self {
791		Self(unsafe { Element::from(rid) })
792	}
793
794	/// Find elements that match the given CSS (or JQuery) selector.
795	///
796	/// <details>
797	///     <summary>Supported selectors</summary>
798	///
799	/// | Pattern                 | Matches                                                                                              | Example                                                           |
800	/// |-------------------------|------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------|
801	/// | `*`                     | any element                                                                                          | `*`                                                               |
802	/// | `tag`                   | elements with the given tag name                                                                     | `div`                                                             |
803	/// | <code>*\|E</code>       | elements of type E in any namespace (including non-namespaced)                                       | <code>*\|name</code> finds `<fb:name>` and `<name>` elements      |
804	/// | <code>ns\|E</code>      | elements of type E in the namespace ns                                                               | <code>fb\|name</code> finds `<fb:name>` elements                  |
805	/// | `#id`                   | elements with attribute ID of "id"                                                                   | `div#wrap`, `#logo`                                               |
806	/// | `.class`                | elements with a class name of "class"                                                                | `div.left`, `.result`                                             |
807	/// | `[attr]`                | elements with an attribute named "attr" (with any value)                                             | `a[href]`, `[title]`                                              |
808	/// | `[^attrPrefix]`         | elements with an attribute name starting with "attrPrefix". Use to find elements with HTML5 datasets | `[^data-]`, `div[^data-]`                                         |
809	/// | `[attr=val]`            | elements with an attribute named "attr", and value equal to "val"                                    | `img[width=500]`, `a[rel=nofollow]`                               |
810	/// | `[attr="val"]`          | elements with an attribute named "attr", and value equal to "val"                                    | `span[hello="Cleveland"][goodbye="Columbus"]`, `a[rel="nofollow"]`|
811	/// | `[attr^=valPrefix]`     | elements with an attribute named "attr", and value starting with "valPrefix"                         | `a[href^=http:]`                                                  |
812	/// | `[attr$=valSuffix]`     | elements with an attribute named "attr", and value ending with "valSuffix"                           | `img[src$=.png]`                                                  |
813	/// | `[attr*=valContaining]` | elements with an attribute named "attr", and value containing "valContaining"                        | `a[href*=/search/]`                                               |
814	/// | `[attr~=regex]`         | elements with an attribute named "attr", and value matching the regular expression                   | `img[src~=(?i)\\.(png\|jpe?g)]`                                   |
815	/// |                         | The above may be combined in any order                                                               | `div.header[title]`                                               |
816	///
817	/// ## Combinators
818	/// | Pattern   | Matches                                         | Example                     |
819	/// |-----------|-------------------------------------------------|-----------------------------|
820	/// | `E F`     | an F element descended from an E element        | `div a`, `.logo h1`         |
821	/// | `E > F`   | an F direct child of E                          | `ol > li`                   |
822	/// | `E + F`   | an F element immediately preceded by sibling E  | `li + li`, `div.head + div` |
823	/// | `E ~ F`   | an F element preceded by sibling E              | `h1 ~ p`                    |
824	/// | `E, F, G` | all matching elements E, F, or G                | `a[href], div, h3`          |
825	///
826	/// ## Pseudo selectors
827	/// | Pattern              | Matches                                                                                                                                                   | Example                                                                                                                                                      |
828	/// |----------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------|
829	/// | `:lt(n)`             | elements whose sibling index is less than n                                                                                                               | `td:lt(3)` finds the first 3 cells of each row                                                                                                               |
830	/// | `:gt(n)`             | elements whose sibling index is greater than n                                                                                                            | `td:gt(1)` finds cells after skipping the first two                                                                                                          |
831	/// | `:eq(n)`             | elements whose sibling index is equal to n                                                                                                                | `td:eq(0)` finds the first cell of each row                                                                                                                  |
832	/// | `:has(selector)`     | elements that contains at least one element matching the selector                                                                                         | `div:has(p)` finds divs that contain p elements; `div:has(> a)` selects div elements that have at least one direct child a element.                          |
833	/// | `:not(selector)`     | elements that do not match the selector.                                                                                                                  | `div:not(.logo)` finds all divs that do not have the "logo" class; `div:not(:has(div))` finds divs that do not contain divs.                                 |
834	/// | `:contains(text)`    | elements that contains the specified text. The search is case insensitive. The text may appear in the found element, or any of its descendants.           | `p:contains(SwiftSoup)` finds p elements containing the text "SwiftSoup"; `p:contains(hello \(there\))` finds p elements containing the text "Hello (There)" |
835	/// | `:matches(regex)`    | elements whose text matches the specified regular expression. The text may appear in the found element, or any of its descendants.                        | `td:matches(\\d+)` finds table cells containing digits. div:matches((?i)login) finds divs containing the text, case insensitively.                           |
836	/// | `:containsOwn(text)` | elements that directly contain the specified text. The search is case insensitive. The text must appear in the found element, not any of its descendants. | `p:containsOwn(SwiftSoup)` finds p elements with own text "SwiftSoup".                                                                                       |
837	/// | `:matchesOwn(regex)` | elements whose own text matches the specified regular expression. The text must appear in the found element, not any of its descendants.                  | `td:matchesOwn(\\d+)` finds table cells directly containing digits. div:matchesOwn((?i)login) finds divs containing the text, case insensitively.            |
838	///
839	/// ## Structural pseudo-selectors
840	/// | Pattern                   | Matches                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           | Example                                                |
841	/// |---------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------|
842	/// | `:root`                   | The element that is the root of the document. In HTML, this is the html element                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |                                                        |                                                                                                                                                                                                 |
843	/// | `:nth-child(an+b)`        | elements that have an+b-1 siblings before it in the document tree, for any positive integer or zero value of n, and has a parent element. For values of a and b greater than zero, this effectively divides the element's children into groups of a elements (the last group taking the remainder), and selecting the bth element of each group. For example, this allows the selectors to address every other row in a table, and could be used to alternate the color of paragraph text in a cycle of four. The a and b values must be integers (positive, negative, or zero). The index of the first child of an element is 1. |                                                        |
844	/// | `:nth-last-child(an+b)`   | elements that have an+b-1 siblings after it in the document tree. Otherwise like `:nth-child()`                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   | `tr:nth-last-child(-n+2)` the last two rows of a table |
845	/// | `:nth-of-type(an+b)`      | pseudo-class notation represents an element that has an+b-1 siblings with the same expanded element name before it in the document tree, for any zero or positive integer value of n, and has a parent element                                                                                                                                                                                                                                                                                                                                                                                                                    | `img:nth-of-type(2n+1)`                                |
846	/// | `:nth-last-of-type(an+b)` | pseudo-class notation represents an element that has an+b-1 siblings with the same expanded element name after it in the document tree, for any zero or positive integer value of n, and has a parent element                                                                                                                                                                                                                                                                                                                                                                                                                     | `img:nth-last-of-type(2n+1)`                           |
847	/// | `:first-child`            | elements that are the first child of some other element.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          | `div > p:first-child`                                  |
848	/// | `:last-child`             | elements that are the last child of some other element.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           | `ol > li:last-child`                                   |
849	/// | `:first-of-type`          | elements that are the first sibling of its type in the list of children of its parent element                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     | `dl dt:first-of-type`                                  |
850	/// | `:last-of-type`           | elements that are the last sibling of its type in the list of children of its parent element                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      | `tr > td:last-of-type`                                 |
851	/// | `:only-child`             | elements that have a parent element and whose parent element hasve no other element children                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      |                                                        |
852	/// | `:only-of-type`           |  an element that has a parent element and whose parent element has no other element children with the same expanded element name                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  |                                                        |
853	/// | `:empty`                  | elements that have no children at all                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             |                                                        |
854	/// </details>
855	pub fn select<T: AsRef<str>>(&self, css_query: T) -> Option<ElementList> {
856		self.0.select(css_query)
857	}
858
859	/// Find the first element that matches the given CSS (or JQuery) selector.
860	pub fn select_first<T: AsRef<str>>(&self, css_query: T) -> Option<Element> {
861		self.0.select_first(css_query)
862	}
863}
864
865impl From<Document> for Element {
866	fn from(value: Document) -> Self {
867		value.0
868	}
869}
870
871impl TryFrom<Node> for Document {
872	type Error = HtmlError;
873
874	fn try_from(value: Node) -> Result<Self, HtmlError> {
875		let kind = value.kind();
876		match kind {
877			Kind::Document => Ok(Self(Element(value))),
878			_ => Err(HtmlError::InvalidDescriptor),
879		}
880	}
881}
882
883impl TryFrom<Element> for Document {
884	type Error = HtmlError;
885
886	fn try_from(value: Element) -> Result<Self, HtmlError> {
887		let kind = value.kind();
888		match kind {
889			Kind::Document => Ok(Self(value)),
890			_ => Err(HtmlError::InvalidDescriptor),
891		}
892	}
893}
894
895/// A collection of HTML elements.
896pub struct ElementList {
897	rid: Rid,
898	lower_bound: usize,
899	upper_bound: usize,
900	size: usize,
901}
902
903impl ElementList {
904	/// Get an instance from a [Rid].
905	unsafe fn from(rid: Rid) -> Self {
906		let size = unsafe { size(rid) as usize };
907		Self {
908			rid,
909			lower_bound: 0,
910			upper_bound: size.wrapping_sub(1),
911			size,
912		}
913	}
914
915	/// Find elements that match the given CSS (or JQuery) selector.
916	pub fn select<T: AsRef<str>>(&self, css_query: T) -> Option<ElementList> {
917		let query = css_query.as_ref();
918		let rid = unsafe { select(self.rid, query.as_ptr(), query.len()) };
919		if HtmlError::from(rid).is_some() {
920			return None;
921		}
922		Some(unsafe { ElementList::from(rid) })
923	}
924
925	/// Find the first element that matches the given CSS (or JQuery) selector.
926	pub fn select_first<T: AsRef<str>>(&self, css_query: T) -> Option<Element> {
927		let query = css_query.as_ref();
928		let rid = unsafe { select_first(self.rid, query.as_ptr(), query.len()) };
929		if HtmlError::from(rid).is_some() {
930			return None;
931		}
932		Some(unsafe { Element::from(rid) })
933	}
934
935	/// Get the normalized, combined text of these elements and their children.
936	///
937	/// See [Element::text].
938	pub fn text(&self) -> Option<String> {
939		let rid = unsafe { text(self.rid) };
940		if HtmlError::from(rid).is_some() {
941			return None;
942		}
943		read_string_and_destroy(rid)
944	}
945
946	/// Get the text of these elements and their children.
947	///
948	/// See [Element::untrimmed_text].
949	pub fn untrimmed_text(&self) -> Option<String> {
950		let rid = unsafe { untrimmed_text(self.rid) };
951		if HtmlError::from(rid).is_some() {
952			return None;
953		}
954		read_string_and_destroy(rid)
955	}
956
957	/// Get the combined elements' inner HTML.
958	///
959	/// See [Element::html].
960	pub fn html(&self) -> Option<String> {
961		let rid = unsafe { html(self.rid) };
962		if HtmlError::from(rid).is_some() {
963			return None;
964		}
965		read_string_and_destroy(rid)
966	}
967
968	/// Get the combined elements' outer HTML.
969	///
970	/// See [Element::outer_html].
971	pub fn outer_html(&self) -> Option<String> {
972		let rid = unsafe { outer_html(self.rid) };
973		if HtmlError::from(rid).is_some() {
974			return None;
975		}
976		read_string_and_destroy(rid)
977	}
978
979	/// Remove each element from the DOM.
980	pub fn remove(self) {
981		_ = unsafe { remove(self.rid) };
982	}
983
984	/// Get the first element of this element list.
985	pub fn first(&self) -> Option<Element> {
986		let rid = unsafe { first(self.rid) };
987		if HtmlError::from(rid).is_some() {
988			return None;
989		}
990		Some(unsafe { Element::from(rid) })
991	}
992
993	/// Get the last element of this element list.
994	pub fn last(&self) -> Option<Element> {
995		let rid = unsafe { last(self.rid) };
996		if HtmlError::from(rid).is_some() {
997			return None;
998		}
999		Some(unsafe { Element::from(rid) })
1000	}
1001
1002	/// Get the element at the given index.
1003	pub fn get(&self, index: usize) -> Option<Element> {
1004		let rid = unsafe { html_get(self.rid, index) };
1005		if HtmlError::from(rid).is_some() {
1006			return None;
1007		}
1008		Some(unsafe { Element::from(rid) })
1009	}
1010
1011	/// Get the size of this element list.
1012	pub fn size(&self) -> usize {
1013		self.size
1014	}
1015
1016	/// Check if this element list is empty.
1017	pub fn is_empty(&self) -> bool {
1018		self.size() == 0
1019	}
1020}
1021
1022impl Iterator for ElementList {
1023	type Item = Element;
1024
1025	fn next(&mut self) -> Option<Self::Item> {
1026		if self.lower_bound > self.upper_bound || self.upper_bound == usize::MAX {
1027			return None;
1028		}
1029		let value_ref = self.get(self.lower_bound);
1030		self.lower_bound += 1;
1031		value_ref
1032	}
1033}
1034
1035impl DoubleEndedIterator for ElementList {
1036	fn next_back(&mut self) -> Option<Self::Item> {
1037		if self.lower_bound > self.upper_bound || self.upper_bound == usize::MAX {
1038			return None;
1039		}
1040		let value_ref = self.get(self.upper_bound);
1041		self.upper_bound = self.upper_bound.wrapping_sub(1);
1042		value_ref
1043	}
1044}
1045
1046impl Drop for ElementList {
1047	fn drop(&mut self) {
1048		unsafe { destroy(self.rid) }
1049	}
1050}