aidoku/imports/
html.rs

1//! Module for working with HTML.
2//!
3//! It provides a convenient API for extracting data, using HTML5
4//! DOM methods and CSS selectors.
5//!
6//! The backend of this module is [SwiftSoup](https://github.com/scinfu/SwiftSoup).
7use super::{
8	FFIResult, Rid,
9	std::{destroy, read_string_and_destroy},
10};
11use crate::alloc::String;
12use core::fmt::Display;
13
14#[link(wasm_import_module = "html")]
15unsafe extern "C" {
16	fn parse(
17		html: *const u8,
18		html_len: usize,
19		base_url: *const u8,
20		base_url_len: usize,
21	) -> FFIResult;
22	fn parse_fragment(
23		html: *const u8,
24		html_len: usize,
25		base_url: *const u8,
26		base_url_len: usize,
27	) -> FFIResult;
28	fn escape(text: *const u8, text_len: usize) -> FFIResult;
29	fn unescape(text: *const u8, text_len: usize) -> FFIResult;
30
31	fn select(rid: Rid, query: *const u8, query_len: usize) -> FFIResult;
32	fn select_first(rid: Rid, query: *const u8, query_len: usize) -> FFIResult;
33	fn attr(rid: Rid, key: *const u8, key_len: usize) -> FFIResult;
34	fn text(rid: Rid) -> FFIResult;
35	fn untrimmed_text(rid: Rid) -> FFIResult;
36	fn html(rid: Rid) -> FFIResult;
37	fn outer_html(rid: Rid) -> FFIResult;
38	fn remove(rid: Rid) -> FFIResult;
39
40	fn set_text(rid: Rid, text: *const u8, text_len: usize) -> FFIResult;
41	fn set_html(rid: Rid, html: *const u8, html_len: usize) -> FFIResult;
42	fn prepend(rid: Rid, html: *const u8, html_len: usize) -> FFIResult;
43	fn append(rid: Rid, html: *const u8, html_len: usize) -> FFIResult;
44	fn parent(rid: Rid) -> FFIResult;
45	fn children(rid: Rid) -> FFIResult;
46	fn siblings(rid: Rid) -> FFIResult;
47	fn next(rid: Rid) -> FFIResult;
48	fn previous(rid: Rid) -> FFIResult;
49	fn base_uri(rid: Rid) -> FFIResult;
50	fn own_text(rid: Rid) -> FFIResult;
51	fn data(rid: Rid) -> FFIResult;
52	fn id(rid: Rid) -> FFIResult;
53	fn tag_name(rid: Rid) -> FFIResult;
54	fn class_name(rid: Rid) -> FFIResult;
55	fn has_class(rid: Rid, class: *const u8, class_len: usize) -> bool;
56	fn add_class(rid: Rid, class: *const u8, class_len: usize) -> FFIResult;
57	fn remove_class(rid: Rid, class: *const u8, class_len: usize) -> FFIResult;
58	fn has_attr(rid: Rid, attr: *const u8, attr_len: usize) -> bool;
59	fn set_attr(
60		rid: Rid,
61		key: *const u8,
62		key_len: usize,
63		value: *const u8,
64		value_len: usize,
65	) -> FFIResult;
66	fn remove_attr(rid: Rid, attr: *const u8, attr_len: usize) -> FFIResult;
67
68	fn first(rid: Rid) -> FFIResult;
69	fn last(rid: Rid) -> FFIResult;
70	#[allow(clashing_extern_declarations)]
71	#[link_name = "get"]
72	fn html_get(rid: Rid, index: usize) -> FFIResult;
73	fn size(rid: Rid) -> FFIResult;
74}
75
76/// Error type for HTML operations.
77#[derive(PartialEq, Eq, Debug, Clone)]
78pub enum HtmlError {
79	InvalidDescriptor,
80	InvalidString,
81	InvalidHtml,
82	InvalidQuery,
83	NoResult,
84	SwiftSoupError,
85}
86
87impl HtmlError {
88	fn from(value: FFIResult) -> Option<Self> {
89		match value {
90			-1 => Some(Self::InvalidDescriptor),
91			-2 => Some(Self::InvalidString),
92			-3 => Some(Self::InvalidHtml),
93			-4 => Some(Self::InvalidQuery),
94			-5 => Some(Self::NoResult),
95			-6 => Some(Self::SwiftSoupError),
96			_ => None,
97		}
98	}
99}
100
101/// Namespace for HTML-related functions.
102#[derive(Debug)]
103pub struct Html;
104
105impl Html {
106	/// Parse HTML into a Document.
107	///
108	/// As there is no base URL specified, absolute URL resolution requires the
109	/// HTML to have a `<base href>` tag.
110	pub fn parse<T: AsRef<[u8]>>(html: T) -> Result<Document, HtmlError> {
111		let buf = html.as_ref();
112		let rid = unsafe { parse(buf.as_ptr(), buf.len(), "".as_ptr(), 0) };
113		if let Some(error) = HtmlError::from(rid) {
114			Err(error)
115		} else {
116			Ok(Document(unsafe { Element::from(rid) }))
117		}
118	}
119
120	/// Parse HTML into a Document, with a base URL.
121	///
122	/// The given `base_url` will be used for any URLs that occurs before a
123	/// `<base href>` tag is defined.
124	pub fn parse_with_url<T: AsRef<[u8]>, B: AsRef<str>>(
125		html: T,
126		base_url: B,
127	) -> Result<Document, HtmlError> {
128		let buf = html.as_ref();
129		let url = base_url.as_ref();
130		let rid = unsafe { parse(buf.as_ptr(), buf.len(), url.as_ptr(), url.len()) };
131		if let Some(error) = HtmlError::from(rid) {
132			Err(error)
133		} else {
134			Ok(Document(unsafe { Element::from(rid) }))
135		}
136	}
137
138	/// Parse a HTML fragment, assuming that it forms the `body` of the HTML.
139	///
140	/// Similar to [Html::parse], relative URLs will not be resolved unless
141	/// there is a `<base href>` tag.
142	pub fn parse_fragment<T: AsRef<[u8]>>(html: T) -> Result<Document, HtmlError> {
143		let buf = html.as_ref();
144		let rid = unsafe { parse_fragment(buf.as_ptr(), buf.len(), "".as_ptr(), 0) };
145		if let Some(error) = HtmlError::from(rid) {
146			Err(error)
147		} else {
148			Ok(Document(unsafe { Element::from(rid) }))
149		}
150	}
151
152	/// Parse a HTML fragment, assuming that it forms the `body` of the HTML, with a base URL.
153	///
154	/// Similar to [Html::parse_with_url], URL resolution occurs for any that appears
155	/// before a `<base href>` tag.
156	pub fn parse_fragment_with_url<T: AsRef<[u8]>, B: AsRef<str>>(
157		html: T,
158		base_url: B,
159	) -> Result<Document, HtmlError> {
160		let buf = html.as_ref();
161		let url = base_url.as_ref();
162		let rid = unsafe { parse_fragment(buf.as_ptr(), buf.len(), url.as_ptr(), url.len()) };
163		if let Some(error) = HtmlError::from(rid) {
164			Err(error)
165		} else {
166			Ok(Document(unsafe { Element::from(rid) }))
167		}
168	}
169
170	/// Escape any HTML-reserved characters to HTML entities.
171	///
172	/// # Examples
173	/// ```ignore
174	/// use aidoku::imports::html::Html;
175	/// assert_eq!(
176	///     Html::escape("Hello &<> Å å π 新 there ¾ © »"),
177	///     "Hello &amp;&lt;&gt; Å å π 新 there ¾ © »",
178	/// );
179	/// ```
180	pub fn escape<T: AsRef<str>>(text: T) -> String {
181		let text = text.as_ref();
182		let rid = unsafe { escape(text.as_ptr(), text.len()) };
183		read_string_and_destroy(rid).unwrap_or_default()
184	}
185
186	/// Unescape any HTML entities to their original characters.
187	///
188	/// # Examples
189	/// ```ignore
190	/// use aidoku::imports::html::Html;
191	/// assert_eq!(
192	///     Html::unescape("Hello &amp;&lt;&gt; Å å π 新 there ¾ © »"),
193	///     Some("Hello &<> Å å π 新 there ¾ © »".into()),
194	/// );
195	/// ```
196	pub fn unescape<T: AsRef<str>>(text: T) -> Option<String> {
197		let text = text.as_ref();
198		let rid = unsafe { unescape(text.as_ptr(), text.len()) };
199		if HtmlError::from(rid).is_some() {
200			return None;
201		}
202		read_string_and_destroy(rid)
203	}
204}
205
206/// A complete HTML document.
207pub struct Document(pub(crate) Element);
208
209impl Document {
210	/// Get an instance from a [Rid].
211	pub(crate) unsafe fn from(rid: Rid) -> Self {
212		Self(unsafe { Element::from(rid) })
213	}
214
215	/// Find elements that match the given CSS (or JQuery) selector.
216	///
217	/// <details>
218	///     <summary>Supported selectors</summary>
219	///
220	/// | Pattern                 | Matches                                                                                              | Example                                                           |
221	/// |-------------------------|------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------|
222	/// | `*`                     | any element                                                                                          | `*`                                                               |
223	/// | `tag`                   | elements with the given tag name                                                                     | `div`                                                             |
224	/// | <code>*\|E</code>       | elements of type E in any namespace (including non-namespaced)                                       | <code>*\|name</code> finds `<fb:name>` and `<name>` elements      |
225	/// | <code>ns\|E</code>      | elements of type E in the namespace ns                                                               | <code>fb\|name</code> finds `<fb:name>` elements                  |
226	/// | `#id`                   | elements with attribute ID of "id"                                                                   | `div#wrap`, `#logo`                                               |
227	/// | `.class`                | elements with a class name of "class"                                                                | `div.left`, `.result`                                             |
228	/// | `[attr]`                | elements with an attribute named "attr" (with any value)                                             | `a[href]`, `[title]`                                              |
229	/// | `[^attrPrefix]`         | elements with an attribute name starting with "attrPrefix". Use to find elements with HTML5 datasets | `[^data-]`, `div[^data-]`                                         |
230	/// | `[attr=val]`            | elements with an attribute named "attr", and value equal to "val"                                    | `img[width=500]`, `a[rel=nofollow]`                               |
231	/// | `[attr="val"]`          | elements with an attribute named "attr", and value equal to "val"                                    | `span[hello="Cleveland"][goodbye="Columbus"]`, `a[rel="nofollow"]`|
232	/// | `[attr^=valPrefix]`     | elements with an attribute named "attr", and value starting with "valPrefix"                         | `a[href^=http:]`                                                  |
233	/// | `[attr$=valSuffix]`     | elements with an attribute named "attr", and value ending with "valSuffix"                           | `img[src$=.png]`                                                  |
234	/// | `[attr*=valContaining]` | elements with an attribute named "attr", and value containing "valContaining"                        | `a[href*=/search/]`                                               |
235	/// | `[attr~=regex]`         | elements with an attribute named "attr", and value matching the regular expression                   | `img[src~=(?i)\\.(png\|jpe?g)]`                                   |
236	/// |                         | The above may be combined in any order                                                               | `div.header[title]`                                               |
237	///
238	/// ## Combinators
239	/// | Pattern   | Matches                                         | Example                     |
240	/// |-----------|-------------------------------------------------|-----------------------------|
241	/// | `E F`     | an F element descended from an E element        | `div a`, `.logo h1`         |
242	/// | `E > F`   | an F direct child of E                          | `ol > li`                   |
243	/// | `E + F`   | an F element immediately preceded by sibling E  | `li + li`, `div.head + div` |
244	/// | `E ~ F`   | an F element preceded by sibling E              | `h1 ~ p`                    |
245	/// | `E, F, G` | all matching elements E, F, or G                | `a[href], div, h3`          |
246	///
247	/// ## Pseudo selectors
248	/// | Pattern              | Matches                                                                                                                                                   | Example                                                                                                                                                      |
249	/// |----------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------|
250	/// | `:lt(n)`             | elements whose sibling index is less than n                                                                                                               | `td:lt(3)` finds the first 3 cells of each row                                                                                                               |
251	/// | `:gt(n)`             | elements whose sibling index is greater than n                                                                                                            | `td:gt(1)` finds cells after skipping the first two                                                                                                          |
252	/// | `:eq(n)`             | elements whose sibling index is equal to n                                                                                                                | `td:eq(0)` finds the first cell of each row                                                                                                                  |
253	/// | `:has(selector)`     | elements that contains at least one element matching the selector                                                                                         | `div:has(p)` finds divs that contain p elements; `div:has(> a)` selects div elements that have at least one direct child a element.                          |
254	/// | `:not(selector)`     | elements that do not match the selector.                                                                                                                  | `div:not(.logo)` finds all divs that do not have the "logo" class; `div:not(:has(div))` finds divs that do not contain divs.                                 |
255	/// | `:contains(text)`    | elements that contains the specified text. The search is case insensitive. The text may appear in the found element, or any of its descendants.           | `p:contains(SwiftSoup)` finds p elements containing the text "SwiftSoup"; `p:contains(hello \(there\))` finds p elements containing the text "Hello (There)" |
256	/// | `:matches(regex)`    | elements whose text matches the specified regular expression. The text may appear in the found element, or any of its descendants.                        | `td:matches(\\d+)` finds table cells containing digits. div:matches((?i)login) finds divs containing the text, case insensitively.                           |
257	/// | `:containsOwn(text)` | elements that directly contain the specified text. The search is case insensitive. The text must appear in the found element, not any of its descendants. | `p:containsOwn(SwiftSoup)` finds p elements with own text "SwiftSoup".                                                                                       |
258	/// | `:matchesOwn(regex)` | elements whose own text matches the specified regular expression. The text must appear in the found element, not any of its descendants.                  | `td:matchesOwn(\\d+)` finds table cells directly containing digits. div:matchesOwn((?i)login) finds divs containing the text, case insensitively.            |
259	///
260	/// ## Structural pseudo-selectors
261	/// | Pattern                   | Matches                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           | Example                                                |
262	/// |---------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------|
263	/// | `:root`                   | The element that is the root of the document. In HTML, this is the html element                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |                                                        |                                                                                                                                                                                                 |
264	/// | `:nth-child(an+b)`        | elements that have an+b-1 siblings before it in the document tree, for any positive integer or zero value of n, and has a parent element. For values of a and b greater than zero, this effectively divides the element's children into groups of a elements (the last group taking the remainder), and selecting the bth element of each group. For example, this allows the selectors to address every other row in a table, and could be used to alternate the color of paragraph text in a cycle of four. The a and b values must be integers (positive, negative, or zero). The index of the first child of an element is 1. |                                                        |
265	/// | `:nth-last-child(an+b)`   | elements that have an+b-1 siblings after it in the document tree. Otherwise like `:nth-child()`                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   | `tr:nth-last-child(-n+2)` the last two rows of a table |
266	/// | `:nth-of-type(an+b)`      | pseudo-class notation represents an element that has an+b-1 siblings with the same expanded element name before it in the document tree, for any zero or positive integer value of n, and has a parent element                                                                                                                                                                                                                                                                                                                                                                                                                    | `img:nth-of-type(2n+1)`                                |
267	/// | `:nth-last-of-type(an+b)` | pseudo-class notation represents an element that has an+b-1 siblings with the same expanded element name after it in the document tree, for any zero or positive integer value of n, and has a parent element                                                                                                                                                                                                                                                                                                                                                                                                                     | `img:nth-last-of-type(2n+1)`                           |
268	/// | `:first-child`            | elements that are the first child of some other element.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          | `div > p:first-child`                                  |
269	/// | `:last-child`             | elements that are the last child of some other element.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           | `ol > li:last-child`                                   |
270	/// | `:first-of-type`          | elements that are the first sibling of its type in the list of children of its parent element                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     | `dl dt:first-of-type`                                  |
271	/// | `:last-of-type`           | elements that are the last sibling of its type in the list of children of its parent element                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      | `tr > td:last-of-type`                                 |
272	/// | `:only-child`             | elements that have a parent element and whose parent element hasve no other element children                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      |                                                        |
273	/// | `:only-of-type`           |  an element that has a parent element and whose parent element has no other element children with the same expanded element name                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  |                                                        |
274	/// | `:empty`                  | elements that have no children at all                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             |                                                        |
275	/// </details>
276	pub fn select<T: AsRef<str>>(&self, css_query: T) -> Option<ElementList> {
277		self.0.select(css_query)
278	}
279
280	/// Find the first element that matches the given CSS (or JQuery) selector.
281	pub fn select_first<T: AsRef<str>>(&self, css_query: T) -> Option<Element> {
282		self.0.select_first(css_query)
283	}
284}
285
286/// A single HTML element.
287pub struct Element {
288	rid: Rid,
289}
290
291impl Element {
292	/// Get an instance from a [Rid].
293	unsafe fn from(rid: Rid) -> Self {
294		Self { rid }
295	}
296
297	/// Find elements that match the given CSS (or JQuery) selector.
298	pub fn select<T: AsRef<str>>(&self, css_query: T) -> Option<ElementList> {
299		let query = css_query.as_ref();
300		let rid = unsafe { select(self.rid, query.as_ptr(), query.len()) };
301		if HtmlError::from(rid).is_some() {
302			return None;
303		}
304		Some(unsafe { ElementList::from(rid) })
305	}
306
307	/// Find the first element that matches the given CSS (or JQuery) selector.
308	pub fn select_first<T: AsRef<str>>(&self, css_query: T) -> Option<Element> {
309		let query = css_query.as_ref();
310		let rid = unsafe { select_first(self.rid, query.as_ptr(), query.len()) };
311		if HtmlError::from(rid).is_some() {
312			return None;
313		}
314		Some(unsafe { Element::from(rid) })
315	}
316
317	/// Get an attribute value by its key.
318	///
319	/// To get an absolute URL from an attribute that may be a relative URL,
320	/// prefix the key with `abs:`.
321	///
322	/// # Examples
323	/// ```ignore
324	/// use aidoku::imports::html::Html;
325	/// let html = Html::parse_with_url("<img src=\"/image.jpg\" />", "https://example.com").unwrap();
326	/// let el = html.select_first("img").unwrap();
327	/// assert_eq!(
328	///     el.attr("abs:src"),
329	///     Some("https://example.com/image.jpg".into())
330	/// );
331	/// ```
332	pub fn attr<T: AsRef<str>>(&self, attr_name: T) -> Option<String> {
333		let attr_name = attr_name.as_ref();
334		let rid = unsafe { attr(self.rid, attr_name.as_ptr(), attr_name.len()) };
335		if HtmlError::from(rid).is_some() {
336			return None;
337		}
338		read_string_and_destroy(rid)
339	}
340
341	/// Get the normalized, combined text of this element and its children.
342	///
343	/// Whitespace is normalized and trimmed.
344	///
345	/// Note that this method returns text that would be presented to a reader.
346	/// The contents of data nodes (e.g. `<script>` tags) are not considered text,
347	/// and instead, [Element::html] or [Element::data] can be used for them.
348	///
349	/// # Examples
350	/// ```ignore
351	/// use aidoku::imports::html::Html;
352	/// let html = Html::parse("<p>Hello <b>there</b> now! </p>").unwrap();
353	/// let el = html.select_first("p").unwrap();
354	/// assert_eq!(el.text(), Some("Hello there now!".into()));
355	/// ```
356	pub fn text(&self) -> Option<String> {
357		let rid = unsafe { text(self.rid) };
358		if HtmlError::from(rid).is_some() {
359			return None;
360		}
361		read_string_and_destroy(rid)
362	}
363
364	/// Get the text of this element and its children.
365	///
366	/// Whitespace is *not* normalized and trimmed.
367	///
368	/// Notices from [Element::text] apply.
369	///
370	/// # Examples
371	/// ```ignore
372	/// use aidoku::imports::html::Html;
373	/// let html = Html::parse("<p>Hello <b>there</b> now! </p>").unwrap();
374	/// let el = html.select_first("p").unwrap();
375	/// assert_eq!(el.untrimmed_text(), Some("Hello there now! ".into()));
376	/// ```
377	pub fn untrimmed_text(&self) -> Option<String> {
378		let rid = unsafe { untrimmed_text(self.rid) };
379		if HtmlError::from(rid).is_some() {
380			return None;
381		}
382		read_string_and_destroy(rid)
383	}
384
385	/// Get the element's inner HTML.
386	///
387	/// # Examples
388	/// ```ignore
389	/// use aidoku::imports::html::Html;
390	/// let html = Html::parse("<div><p></p></div>").unwrap();
391	/// let div = html.select_first("div").unwrap();
392	/// assert_eq!(div.html(), Some("<p></p>".into()));
393	/// ```
394	pub fn html(&self) -> Option<String> {
395		let rid = unsafe { html(self.rid) };
396		if HtmlError::from(rid).is_some() {
397			return None;
398		}
399		read_string_and_destroy(rid)
400	}
401
402	/// Get the element's outer HTML.
403	///
404	/// # Examples
405	/// ```ignore
406	/// use aidoku::imports::html::Html;
407	/// let html = Html::parse("<div><p></p></div>").unwrap();
408	/// let div = html.select_first("div").unwrap();
409	/// assert_eq!(div.outer_html(), Some("<div><p></p></div>".into()));
410	/// ```
411	pub fn outer_html(&self) -> Option<String> {
412		let rid = unsafe { outer_html(self.rid) };
413		if HtmlError::from(rid).is_some() {
414			return None;
415		}
416		read_string_and_destroy(rid)
417	}
418
419	/// Remove this element from the DOM tree.
420	pub fn remove(self) {
421		_ = unsafe { remove(self.rid) };
422	}
423
424	/// Get the element's parent element, returning `None` if there isn't one.
425	pub fn parent(&self) -> Option<Element> {
426		let rid = unsafe { parent(self.rid) };
427		if HtmlError::from(rid).is_some() {
428			return None;
429		}
430		Some(unsafe { Element::from(rid) })
431	}
432
433	/// Get the element's children elements.
434	pub fn children(&self) -> ElementList {
435		let rid = unsafe { children(self.rid) };
436		unsafe { ElementList::from(rid) }
437	}
438
439	/// Get the sibling elements of the element.
440	pub fn siblings(&self) -> ElementList {
441		let rid = unsafe { siblings(self.rid) };
442		unsafe { ElementList::from(rid) }
443	}
444
445	/// Get the next sibling of the element, returning `None` if there isn't one.
446	pub fn next(&self) -> Option<Element> {
447		let rid = unsafe { next(self.rid) };
448		if HtmlError::from(rid).is_some() {
449			return None;
450		}
451		Some(unsafe { Element::from(rid) })
452	}
453
454	/// Get the previous sibling of the element, returning `None` if there isn't one.
455	pub fn prev(&self) -> Option<Element> {
456		let rid = unsafe { previous(self.rid) };
457		if HtmlError::from(rid).is_some() {
458			return None;
459		}
460		Some(unsafe { Element::from(rid) })
461	}
462
463	/// Set the element's text content, clearing any existing content.
464	pub fn set_text<T: AsRef<str>>(&mut self, text: T) -> Result<(), HtmlError> {
465		let text = text.as_ref();
466		let result = unsafe { set_text(self.rid, text.as_ptr(), text.len()) };
467
468		if let Some(error) = HtmlError::from(result) {
469			Err(error)
470		} else {
471			Ok(())
472		}
473	}
474
475	/// Set the element's inner HTML, clearing the existing HTML.
476	pub fn set_html<T: AsRef<str>>(&mut self, text: T) -> Result<(), HtmlError> {
477		let text = text.as_ref();
478		let result = unsafe { set_html(self.rid, text.as_ptr(), text.len()) };
479
480		if let Some(error) = HtmlError::from(result) {
481			Err(error)
482		} else {
483			Ok(())
484		}
485	}
486
487	/// Prepend inner HTML into this element.
488	///
489	/// The given HTML will be parsed, and each node prepended to the start
490	/// of the element's children.
491	pub fn prepend<T: AsRef<str>>(&mut self, text: T) -> Result<(), HtmlError> {
492		let text = text.as_ref();
493		let result = unsafe { prepend(self.rid, text.as_ptr(), text.len()) };
494
495		if let Some(error) = HtmlError::from(result) {
496			Err(error)
497		} else {
498			Ok(())
499		}
500	}
501
502	/// Append inner HTML into this element.
503	///
504	/// The given HTML will be parsed, and each node appended to the end
505	/// of the element's children.
506	pub fn append<T: AsRef<str>>(&mut self, text: T) -> Result<(), HtmlError> {
507		let text = text.as_ref();
508		let result = unsafe { append(self.rid, text.as_ptr(), text.len()) };
509
510		if let Some(error) = HtmlError::from(result) {
511			Err(error)
512		} else {
513			Ok(())
514		}
515	}
516
517	/// Get the base URI of this Element.
518	pub fn base_uri(&self) -> Option<String> {
519		let rid = unsafe { base_uri(self.rid) };
520		if HtmlError::from(rid).is_some() {
521			return None;
522		}
523		read_string_and_destroy(rid)
524	}
525
526	/// Gets the (normalized) text owned by this element.
527	pub fn own_text(&self) -> Option<String> {
528		let rid = unsafe { own_text(self.rid) };
529		if HtmlError::from(rid).is_some() {
530			return None;
531		}
532		read_string_and_destroy(rid)
533	}
534
535	/// Get the combined data (e.g. the inside of a `<script>` tag) of this element.
536	///
537	/// Note that data is NOT the text of the element. Use [Element::text]
538	/// to get the text that would be visible to a user, and [Element::data]
539	/// for the contents of scripts, comments, CSS styles, etc.
540	pub fn data(&self) -> Option<String> {
541		let rid = unsafe { data(self.rid) };
542		if HtmlError::from(rid).is_some() {
543			return None;
544		}
545		read_string_and_destroy(rid)
546	}
547
548	/// Get the `id` attribute of this element.
549	pub fn id(&self) -> Option<String> {
550		let rid = unsafe { id(self.rid) };
551		if HtmlError::from(rid).is_some() {
552			return None;
553		}
554		read_string_and_destroy(rid)
555	}
556
557	/// Get the name of the tag for this element.
558	///
559	/// This will always be the lowercased version. For example, `<DIV>` and
560	/// `<div>` would both return `div`.
561	pub fn tag_name(&self) -> Option<String> {
562		let rid = unsafe { tag_name(self.rid) };
563		if HtmlError::from(rid).is_some() {
564			return None;
565		}
566		read_string_and_destroy(rid)
567	}
568
569	/// Get the literal value of this node's `class` attribute.
570	///
571	/// For example, on `<div class="header gray">` this would return `header gray`.
572	pub fn class_name(&self) -> Option<String> {
573		let rid = unsafe { class_name(self.rid) };
574		if HtmlError::from(rid).is_some() {
575			return None;
576		}
577		read_string_and_destroy(rid)
578	}
579
580	/// Test if this element has a class. Case insensitive.
581	pub fn has_class<T: AsRef<str>>(&self, class_name: T) -> bool {
582		let class_name = class_name.as_ref();
583		unsafe { has_class(self.rid, class_name.as_ptr(), class_name.len()) }
584	}
585
586	/// Add a class name to this element's class attribute.
587	pub fn add_class<T: AsRef<str>>(&mut self, class_name: T) -> Result<(), HtmlError> {
588		let class_name = class_name.as_ref();
589		let result = unsafe { add_class(self.rid, class_name.as_ptr(), class_name.len()) };
590
591		if let Some(error) = HtmlError::from(result) {
592			Err(error)
593		} else {
594			Ok(())
595		}
596	}
597
598	/// Remove a class name from this element's class attribute.
599	pub fn remove_class<T: AsRef<str>>(&mut self, class_name: T) -> Result<(), HtmlError> {
600		let class_name = class_name.as_ref();
601		let result = unsafe { remove_class(self.rid, class_name.as_ptr(), class_name.len()) };
602
603		if let Some(error) = HtmlError::from(result) {
604			Err(error)
605		} else {
606			Ok(())
607		}
608	}
609
610	/// Test if this element has an attribute. Case insensitive.
611	pub fn has_attr<T: AsRef<str>>(&self, attr_name: T) -> bool {
612		let attr_name = attr_name.as_ref();
613		unsafe { has_attr(self.rid, attr_name.as_ptr(), attr_name.len()) }
614	}
615
616	/// Set an attribute value on this element.
617	///
618	/// If this element already has an attribute with the key, its value is updated;
619	/// otherwise, a new attribute is added.
620	pub fn set_attr<K: AsRef<str>, V: AsRef<str>>(
621		&mut self,
622		key: K,
623		value: V,
624	) -> Result<(), HtmlError> {
625		let key = key.as_ref();
626		let value = value.as_ref();
627		let result = unsafe {
628			set_attr(
629				self.rid,
630				key.as_ptr(),
631				key.len(),
632				value.as_ptr(),
633				value.len(),
634			)
635		};
636
637		if let Some(error) = HtmlError::from(result) {
638			Err(error)
639		} else {
640			Ok(())
641		}
642	}
643
644	/// Remove an attribute from this element.
645	pub fn remove_attr<T: AsRef<str>>(&mut self, attr: T) -> Result<(), HtmlError> {
646		let attr = attr.as_ref();
647		let result = unsafe { remove_attr(self.rid, attr.as_ptr(), attr.len()) };
648
649		if let Some(error) = HtmlError::from(result) {
650			Err(error)
651		} else {
652			Ok(())
653		}
654	}
655}
656
657impl Drop for Element {
658	fn drop(&mut self) {
659		unsafe { destroy(self.rid) }
660	}
661}
662
663impl Display for Element {
664	/// Returns the outer HTML of the node.
665	fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
666		write!(f, "{}", self.outer_html().unwrap_or_default())
667	}
668}
669
670/// A collection of HTML elements.
671pub struct ElementList {
672	rid: Rid,
673	lower_bound: usize,
674	upper_bound: usize,
675	size: usize,
676}
677
678impl ElementList {
679	/// Get an instance from a [Rid].
680	unsafe fn from(rid: Rid) -> Self {
681		let size = unsafe { size(rid) as usize };
682		Self {
683			rid,
684			lower_bound: 0,
685			upper_bound: size.wrapping_sub(1),
686			size,
687		}
688	}
689
690	/// Find elements that match the given CSS (or JQuery) selector.
691	pub fn select<T: AsRef<str>>(&self, css_query: T) -> Option<ElementList> {
692		let query = css_query.as_ref();
693		let rid = unsafe { select(self.rid, query.as_ptr(), query.len()) };
694		if HtmlError::from(rid).is_some() {
695			return None;
696		}
697		Some(unsafe { ElementList::from(rid) })
698	}
699
700	/// Find the first element that matches the given CSS (or JQuery) selector.
701	pub fn select_first<T: AsRef<str>>(&self, css_query: T) -> Option<Element> {
702		let query = css_query.as_ref();
703		let rid = unsafe { select_first(self.rid, query.as_ptr(), query.len()) };
704		if HtmlError::from(rid).is_some() {
705			return None;
706		}
707		Some(unsafe { Element::from(rid) })
708	}
709
710	/// Get the normalized, combined text of these elements and their children.
711	///
712	/// See [Element::text].
713	pub fn text(&self) -> Option<String> {
714		let rid = unsafe { text(self.rid) };
715		if HtmlError::from(rid).is_some() {
716			return None;
717		}
718		read_string_and_destroy(rid)
719	}
720
721	/// Get the text of these elements and their children.
722	///
723	/// See [Element::untrimmed_text].
724	pub fn untrimmed_text(&self) -> Option<String> {
725		let rid = unsafe { untrimmed_text(self.rid) };
726		if HtmlError::from(rid).is_some() {
727			return None;
728		}
729		read_string_and_destroy(rid)
730	}
731
732	/// Get the combined elements' inner HTML.
733	///
734	/// See [Element::html].
735	pub fn html(&self) -> Option<String> {
736		let rid = unsafe { html(self.rid) };
737		if HtmlError::from(rid).is_some() {
738			return None;
739		}
740		read_string_and_destroy(rid)
741	}
742
743	/// Get the combined elements' outer HTML.
744	///
745	/// See [Element::outer_html].
746	pub fn outer_html(&self) -> Option<String> {
747		let rid = unsafe { outer_html(self.rid) };
748		if HtmlError::from(rid).is_some() {
749			return None;
750		}
751		read_string_and_destroy(rid)
752	}
753
754	/// Remove each element from the DOM.
755	pub fn remove(self) {
756		_ = unsafe { remove(self.rid) };
757	}
758
759	/// Get the first element of this element list.
760	pub fn first(&self) -> Option<Element> {
761		let rid = unsafe { first(self.rid) };
762		if HtmlError::from(rid).is_some() {
763			return None;
764		}
765		Some(unsafe { Element::from(rid) })
766	}
767
768	/// Get the last element of this element list.
769	pub fn last(&self) -> Option<Element> {
770		let rid = unsafe { last(self.rid) };
771		if HtmlError::from(rid).is_some() {
772			return None;
773		}
774		Some(unsafe { Element::from(rid) })
775	}
776
777	/// Get the element at the given index.
778	pub fn get(&self, index: usize) -> Option<Element> {
779		let rid = unsafe { html_get(self.rid, index) };
780		if HtmlError::from(rid).is_some() {
781			return None;
782		}
783		Some(unsafe { Element::from(rid) })
784	}
785
786	/// Get the size of this element list.
787	pub fn size(&self) -> usize {
788		self.size
789	}
790
791	/// Check if this element list is empty.
792	pub fn is_empty(&self) -> bool {
793		self.size() == 0
794	}
795}
796
797impl Iterator for ElementList {
798	type Item = Element;
799
800	fn next(&mut self) -> Option<Self::Item> {
801		if self.lower_bound > self.upper_bound || self.upper_bound == usize::MAX {
802			return None;
803		}
804		let value_ref = self.get(self.lower_bound);
805		self.lower_bound += 1;
806		value_ref
807	}
808}
809
810impl DoubleEndedIterator for ElementList {
811	fn next_back(&mut self) -> Option<Self::Item> {
812		if self.lower_bound > self.upper_bound || self.upper_bound == usize::MAX {
813			return None;
814		}
815		let value_ref = self.get(self.upper_bound);
816		self.upper_bound = self.upper_bound.wrapping_sub(1);
817		value_ref
818	}
819}
820
821impl Drop for ElementList {
822	fn drop(&mut self) {
823		unsafe { destroy(self.rid) }
824	}
825}
aidoku/imports/html.rs

aidoku/imports/
html.rs