aidoku/imports/html.rs
1//! Module for working with HTML.
2//!
3//! It provides a convenient API for extracting data, using HTML5
4//! DOM methods and CSS selectors.
5//!
6//! The backend of this module is [SwiftSoup](https://github.com/scinfu/SwiftSoup).
7use super::{
8 FFIResult, Rid,
9 std::{destroy, read_string_and_destroy},
10};
11use crate::alloc::String;
12use core::fmt::Display;
13
14#[link(wasm_import_module = "html")]
15unsafe extern "C" {
16 fn parse(
17 html: *const u8,
18 html_len: usize,
19 base_url: *const u8,
20 base_url_len: usize,
21 ) -> FFIResult;
22 fn parse_fragment(
23 html: *const u8,
24 html_len: usize,
25 base_url: *const u8,
26 base_url_len: usize,
27 ) -> FFIResult;
28 fn escape(text: *const u8, text_len: usize) -> FFIResult;
29 fn unescape(text: *const u8, text_len: usize) -> FFIResult;
30
31 fn select(rid: Rid, query: *const u8, query_len: usize) -> FFIResult;
32 fn select_first(rid: Rid, query: *const u8, query_len: usize) -> FFIResult;
33 fn attr(rid: Rid, key: *const u8, key_len: usize) -> FFIResult;
34 fn text(rid: Rid) -> FFIResult;
35 fn untrimmed_text(rid: Rid) -> FFIResult;
36 fn html(rid: Rid) -> FFIResult;
37 fn outer_html(rid: Rid) -> FFIResult;
38 fn remove(rid: Rid) -> FFIResult;
39
40 fn set_text(rid: Rid, text: *const u8, text_len: usize) -> FFIResult;
41 fn set_html(rid: Rid, html: *const u8, html_len: usize) -> FFIResult;
42 fn prepend(rid: Rid, html: *const u8, html_len: usize) -> FFIResult;
43 fn append(rid: Rid, html: *const u8, html_len: usize) -> FFIResult;
44 fn parent(rid: Rid) -> FFIResult;
45 fn children(rid: Rid) -> FFIResult;
46 fn siblings(rid: Rid) -> FFIResult;
47 fn next(rid: Rid) -> FFIResult;
48 fn previous(rid: Rid) -> FFIResult;
49 fn base_uri(rid: Rid) -> FFIResult;
50 fn own_text(rid: Rid) -> FFIResult;
51 fn data(rid: Rid) -> FFIResult;
52 fn id(rid: Rid) -> FFIResult;
53 fn tag_name(rid: Rid) -> FFIResult;
54 fn class_name(rid: Rid) -> FFIResult;
55 fn has_class(rid: Rid, class: *const u8, class_len: usize) -> bool;
56 fn add_class(rid: Rid, class: *const u8, class_len: usize) -> FFIResult;
57 fn remove_class(rid: Rid, class: *const u8, class_len: usize) -> FFIResult;
58 fn has_attr(rid: Rid, attr: *const u8, attr_len: usize) -> bool;
59 fn set_attr(
60 rid: Rid,
61 key: *const u8,
62 key_len: usize,
63 value: *const u8,
64 value_len: usize,
65 ) -> FFIResult;
66 fn remove_attr(rid: Rid, attr: *const u8, attr_len: usize) -> FFIResult;
67
68 fn first(rid: Rid) -> FFIResult;
69 fn last(rid: Rid) -> FFIResult;
70 #[allow(clashing_extern_declarations)]
71 #[link_name = "get"]
72 fn html_get(rid: Rid, index: usize) -> FFIResult;
73 fn size(rid: Rid) -> FFIResult;
74}
75
76/// Error type for HTML operations.
77#[derive(PartialEq, Eq, Debug, Clone)]
78pub enum HtmlError {
79 InvalidDescriptor,
80 InvalidString,
81 InvalidHtml,
82 InvalidQuery,
83 NoResult,
84 SwiftSoupError,
85}
86
87impl HtmlError {
88 fn from(value: FFIResult) -> Option<Self> {
89 match value {
90 -1 => Some(Self::InvalidDescriptor),
91 -2 => Some(Self::InvalidString),
92 -3 => Some(Self::InvalidHtml),
93 -4 => Some(Self::InvalidQuery),
94 -5 => Some(Self::NoResult),
95 -6 => Some(Self::SwiftSoupError),
96 _ => None,
97 }
98 }
99}
100
101/// Namespace for HTML-related functions.
102#[derive(Debug)]
103pub struct Html;
104
105impl Html {
106 /// Parse HTML into a Document.
107 ///
108 /// As there is no base URL specified, absolute URL resolution requires the
109 /// HTML to have a `<base href>` tag.
110 pub fn parse<T: AsRef<[u8]>>(html: T) -> Result<Document, HtmlError> {
111 let buf = html.as_ref();
112 let rid = unsafe { parse(buf.as_ptr(), buf.len(), "".as_ptr(), 0) };
113 if let Some(error) = HtmlError::from(rid) {
114 Err(error)
115 } else {
116 Ok(Document(unsafe { Element::from(rid) }))
117 }
118 }
119
120 /// Parse HTML into a Document, with a base URL.
121 ///
122 /// The given `base_url` will be used for any URLs that occurs before a
123 /// `<base href>` tag is defined.
124 pub fn parse_with_url<T: AsRef<[u8]>, B: AsRef<str>>(
125 html: T,
126 base_url: B,
127 ) -> Result<Document, HtmlError> {
128 let buf = html.as_ref();
129 let url = base_url.as_ref();
130 let rid = unsafe { parse(buf.as_ptr(), buf.len(), url.as_ptr(), url.len()) };
131 if let Some(error) = HtmlError::from(rid) {
132 Err(error)
133 } else {
134 Ok(Document(unsafe { Element::from(rid) }))
135 }
136 }
137
138 /// Parse a HTML fragment, assuming that it forms the `body` of the HTML.
139 ///
140 /// Similar to [Html::parse], relative URLs will not be resolved unless
141 /// there is a `<base href>` tag.
142 pub fn parse_fragment<T: AsRef<[u8]>>(html: T) -> Result<Document, HtmlError> {
143 let buf = html.as_ref();
144 let rid = unsafe { parse_fragment(buf.as_ptr(), buf.len(), "".as_ptr(), 0) };
145 if let Some(error) = HtmlError::from(rid) {
146 Err(error)
147 } else {
148 Ok(Document(unsafe { Element::from(rid) }))
149 }
150 }
151
152 /// Parse a HTML fragment, assuming that it forms the `body` of the HTML, with a base URL.
153 ///
154 /// Similar to [Html::parse_with_url], URL resolution occurs for any that appears
155 /// before a `<base href>` tag.
156 pub fn parse_fragment_with_url<T: AsRef<[u8]>, B: AsRef<str>>(
157 html: T,
158 base_url: B,
159 ) -> Result<Document, HtmlError> {
160 let buf = html.as_ref();
161 let url = base_url.as_ref();
162 let rid = unsafe { parse_fragment(buf.as_ptr(), buf.len(), url.as_ptr(), url.len()) };
163 if let Some(error) = HtmlError::from(rid) {
164 Err(error)
165 } else {
166 Ok(Document(unsafe { Element::from(rid) }))
167 }
168 }
169
170 /// Escape any HTML-reserved characters to HTML entities.
171 ///
172 /// # Examples
173 /// ```ignore
174 /// use aidoku::imports::html::Html;
175 /// assert_eq!(
176 /// Html::escape("Hello &<> Å å π 新 there ¾ © »"),
177 /// "Hello &<> Å å π 新 there ¾ © »",
178 /// );
179 /// ```
180 pub fn escape<T: AsRef<str>>(text: T) -> String {
181 let text = text.as_ref();
182 let rid = unsafe { escape(text.as_ptr(), text.len()) };
183 read_string_and_destroy(rid).unwrap_or_default()
184 }
185
186 /// Unescape any HTML entities to their original characters.
187 ///
188 /// # Examples
189 /// ```ignore
190 /// use aidoku::imports::html::Html;
191 /// assert_eq!(
192 /// Html::unescape("Hello &<> Å å π 新 there ¾ © »"),
193 /// Some("Hello &<> Å å π 新 there ¾ © »".into()),
194 /// );
195 /// ```
196 pub fn unescape<T: AsRef<str>>(text: T) -> Option<String> {
197 let text = text.as_ref();
198 let rid = unsafe { unescape(text.as_ptr(), text.len()) };
199 if HtmlError::from(rid).is_some() {
200 return None;
201 }
202 read_string_and_destroy(rid)
203 }
204}
205
206/// A complete HTML document.
207pub struct Document(pub(crate) Element);
208
209impl Document {
210 /// Get an instance from a [Rid].
211 pub(crate) unsafe fn from(rid: Rid) -> Self {
212 Self(unsafe { Element::from(rid) })
213 }
214
215 /// Find elements that match the given CSS (or JQuery) selector.
216 ///
217 /// <details>
218 /// <summary>Supported selectors</summary>
219 ///
220 /// | Pattern | Matches | Example |
221 /// |-------------------------|------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------|
222 /// | `*` | any element | `*` |
223 /// | `tag` | elements with the given tag name | `div` |
224 /// | <code>*\|E</code> | elements of type E in any namespace (including non-namespaced) | <code>*\|name</code> finds `<fb:name>` and `<name>` elements |
225 /// | <code>ns\|E</code> | elements of type E in the namespace ns | <code>fb\|name</code> finds `<fb:name>` elements |
226 /// | `#id` | elements with attribute ID of "id" | `div#wrap`, `#logo` |
227 /// | `.class` | elements with a class name of "class" | `div.left`, `.result` |
228 /// | `[attr]` | elements with an attribute named "attr" (with any value) | `a[href]`, `[title]` |
229 /// | `[^attrPrefix]` | elements with an attribute name starting with "attrPrefix". Use to find elements with HTML5 datasets | `[^data-]`, `div[^data-]` |
230 /// | `[attr=val]` | elements with an attribute named "attr", and value equal to "val" | `img[width=500]`, `a[rel=nofollow]` |
231 /// | `[attr="val"]` | elements with an attribute named "attr", and value equal to "val" | `span[hello="Cleveland"][goodbye="Columbus"]`, `a[rel="nofollow"]`|
232 /// | `[attr^=valPrefix]` | elements with an attribute named "attr", and value starting with "valPrefix" | `a[href^=http:]` |
233 /// | `[attr$=valSuffix]` | elements with an attribute named "attr", and value ending with "valSuffix" | `img[src$=.png]` |
234 /// | `[attr*=valContaining]` | elements with an attribute named "attr", and value containing "valContaining" | `a[href*=/search/]` |
235 /// | `[attr~=regex]` | elements with an attribute named "attr", and value matching the regular expression | `img[src~=(?i)\\.(png\|jpe?g)]` |
236 /// | | The above may be combined in any order | `div.header[title]` |
237 ///
238 /// ## Combinators
239 /// | Pattern | Matches | Example |
240 /// |-----------|-------------------------------------------------|-----------------------------|
241 /// | `E F` | an F element descended from an E element | `div a`, `.logo h1` |
242 /// | `E > F` | an F direct child of E | `ol > li` |
243 /// | `E + F` | an F element immediately preceded by sibling E | `li + li`, `div.head + div` |
244 /// | `E ~ F` | an F element preceded by sibling E | `h1 ~ p` |
245 /// | `E, F, G` | all matching elements E, F, or G | `a[href], div, h3` |
246 ///
247 /// ## Pseudo selectors
248 /// | Pattern | Matches | Example |
249 /// |----------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------|
250 /// | `:lt(n)` | elements whose sibling index is less than n | `td:lt(3)` finds the first 3 cells of each row |
251 /// | `:gt(n)` | elements whose sibling index is greater than n | `td:gt(1)` finds cells after skipping the first two |
252 /// | `:eq(n)` | elements whose sibling index is equal to n | `td:eq(0)` finds the first cell of each row |
253 /// | `:has(selector)` | elements that contains at least one element matching the selector | `div:has(p)` finds divs that contain p elements; `div:has(> a)` selects div elements that have at least one direct child a element. |
254 /// | `:not(selector)` | elements that do not match the selector. | `div:not(.logo)` finds all divs that do not have the "logo" class; `div:not(:has(div))` finds divs that do not contain divs. |
255 /// | `:contains(text)` | elements that contains the specified text. The search is case insensitive. The text may appear in the found element, or any of its descendants. | `p:contains(SwiftSoup)` finds p elements containing the text "SwiftSoup"; `p:contains(hello \(there\))` finds p elements containing the text "Hello (There)" |
256 /// | `:matches(regex)` | elements whose text matches the specified regular expression. The text may appear in the found element, or any of its descendants. | `td:matches(\\d+)` finds table cells containing digits. div:matches((?i)login) finds divs containing the text, case insensitively. |
257 /// | `:containsOwn(text)` | elements that directly contain the specified text. The search is case insensitive. The text must appear in the found element, not any of its descendants. | `p:containsOwn(SwiftSoup)` finds p elements with own text "SwiftSoup". |
258 /// | `:matchesOwn(regex)` | elements whose own text matches the specified regular expression. The text must appear in the found element, not any of its descendants. | `td:matchesOwn(\\d+)` finds table cells directly containing digits. div:matchesOwn((?i)login) finds divs containing the text, case insensitively. |
259 ///
260 /// ## Structural pseudo-selectors
261 /// | Pattern | Matches | Example |
262 /// |---------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------|
263 /// | `:root` | The element that is the root of the document. In HTML, this is the html element | | |
264 /// | `:nth-child(an+b)` | elements that have an+b-1 siblings before it in the document tree, for any positive integer or zero value of n, and has a parent element. For values of a and b greater than zero, this effectively divides the element's children into groups of a elements (the last group taking the remainder), and selecting the bth element of each group. For example, this allows the selectors to address every other row in a table, and could be used to alternate the color of paragraph text in a cycle of four. The a and b values must be integers (positive, negative, or zero). The index of the first child of an element is 1. | |
265 /// | `:nth-last-child(an+b)` | elements that have an+b-1 siblings after it in the document tree. Otherwise like `:nth-child()` | `tr:nth-last-child(-n+2)` the last two rows of a table |
266 /// | `:nth-of-type(an+b)` | pseudo-class notation represents an element that has an+b-1 siblings with the same expanded element name before it in the document tree, for any zero or positive integer value of n, and has a parent element | `img:nth-of-type(2n+1)` |
267 /// | `:nth-last-of-type(an+b)` | pseudo-class notation represents an element that has an+b-1 siblings with the same expanded element name after it in the document tree, for any zero or positive integer value of n, and has a parent element | `img:nth-last-of-type(2n+1)` |
268 /// | `:first-child` | elements that are the first child of some other element. | `div > p:first-child` |
269 /// | `:last-child` | elements that are the last child of some other element. | `ol > li:last-child` |
270 /// | `:first-of-type` | elements that are the first sibling of its type in the list of children of its parent element | `dl dt:first-of-type` |
271 /// | `:last-of-type` | elements that are the last sibling of its type in the list of children of its parent element | `tr > td:last-of-type` |
272 /// | `:only-child` | elements that have a parent element and whose parent element hasve no other element children | |
273 /// | `:only-of-type` | an element that has a parent element and whose parent element has no other element children with the same expanded element name | |
274 /// | `:empty` | elements that have no children at all | |
275 /// </details>
276 pub fn select<T: AsRef<str>>(&self, css_query: T) -> Option<ElementList> {
277 self.0.select(css_query)
278 }
279
280 /// Find the first element that matches the given CSS (or JQuery) selector.
281 pub fn select_first<T: AsRef<str>>(&self, css_query: T) -> Option<Element> {
282 self.0.select_first(css_query)
283 }
284}
285
286/// A single HTML element.
287pub struct Element {
288 rid: Rid,
289}
290
291impl Element {
292 /// Get an instance from a [Rid].
293 unsafe fn from(rid: Rid) -> Self {
294 Self { rid }
295 }
296
297 /// Find elements that match the given CSS (or JQuery) selector.
298 pub fn select<T: AsRef<str>>(&self, css_query: T) -> Option<ElementList> {
299 let query = css_query.as_ref();
300 let rid = unsafe { select(self.rid, query.as_ptr(), query.len()) };
301 if HtmlError::from(rid).is_some() {
302 return None;
303 }
304 Some(unsafe { ElementList::from(rid) })
305 }
306
307 /// Find the first element that matches the given CSS (or JQuery) selector.
308 pub fn select_first<T: AsRef<str>>(&self, css_query: T) -> Option<Element> {
309 let query = css_query.as_ref();
310 let rid = unsafe { select_first(self.rid, query.as_ptr(), query.len()) };
311 if HtmlError::from(rid).is_some() {
312 return None;
313 }
314 Some(unsafe { Element::from(rid) })
315 }
316
317 /// Get an attribute value by its key.
318 ///
319 /// To get an absolute URL from an attribute that may be a relative URL,
320 /// prefix the key with `abs:`.
321 ///
322 /// # Examples
323 /// ```ignore
324 /// use aidoku::imports::html::Html;
325 /// let html = Html::parse_with_url("<img src=\"/image.jpg\" />", "https://example.com").unwrap();
326 /// let el = html.select_first("img").unwrap();
327 /// assert_eq!(
328 /// el.attr("abs:src"),
329 /// Some("https://example.com/image.jpg".into())
330 /// );
331 /// ```
332 pub fn attr<T: AsRef<str>>(&self, attr_name: T) -> Option<String> {
333 let attr_name = attr_name.as_ref();
334 let rid = unsafe { attr(self.rid, attr_name.as_ptr(), attr_name.len()) };
335 if HtmlError::from(rid).is_some() {
336 return None;
337 }
338 read_string_and_destroy(rid)
339 }
340
341 /// Get the normalized, combined text of this element and its children.
342 ///
343 /// Whitespace is normalized and trimmed.
344 ///
345 /// Note that this method returns text that would be presented to a reader.
346 /// The contents of data nodes (e.g. `<script>` tags) are not considered text,
347 /// and instead, [Element::html] or [Element::data] can be used for them.
348 ///
349 /// # Examples
350 /// ```ignore
351 /// use aidoku::imports::html::Html;
352 /// let html = Html::parse("<p>Hello <b>there</b> now! </p>").unwrap();
353 /// let el = html.select_first("p").unwrap();
354 /// assert_eq!(el.text(), Some("Hello there now!".into()));
355 /// ```
356 pub fn text(&self) -> Option<String> {
357 let rid = unsafe { text(self.rid) };
358 if HtmlError::from(rid).is_some() {
359 return None;
360 }
361 read_string_and_destroy(rid)
362 }
363
364 /// Get the text of this element and its children.
365 ///
366 /// Whitespace is *not* normalized and trimmed.
367 ///
368 /// Notices from [Element::text] apply.
369 ///
370 /// # Examples
371 /// ```ignore
372 /// use aidoku::imports::html::Html;
373 /// let html = Html::parse("<p>Hello <b>there</b> now! </p>").unwrap();
374 /// let el = html.select_first("p").unwrap();
375 /// assert_eq!(el.untrimmed_text(), Some("Hello there now! ".into()));
376 /// ```
377 pub fn untrimmed_text(&self) -> Option<String> {
378 let rid = unsafe { untrimmed_text(self.rid) };
379 if HtmlError::from(rid).is_some() {
380 return None;
381 }
382 read_string_and_destroy(rid)
383 }
384
385 /// Get the element's inner HTML.
386 ///
387 /// # Examples
388 /// ```ignore
389 /// use aidoku::imports::html::Html;
390 /// let html = Html::parse("<div><p></p></div>").unwrap();
391 /// let div = html.select_first("div").unwrap();
392 /// assert_eq!(div.html(), Some("<p></p>".into()));
393 /// ```
394 pub fn html(&self) -> Option<String> {
395 let rid = unsafe { html(self.rid) };
396 if HtmlError::from(rid).is_some() {
397 return None;
398 }
399 read_string_and_destroy(rid)
400 }
401
402 /// Get the element's outer HTML.
403 ///
404 /// # Examples
405 /// ```ignore
406 /// use aidoku::imports::html::Html;
407 /// let html = Html::parse("<div><p></p></div>").unwrap();
408 /// let div = html.select_first("div").unwrap();
409 /// assert_eq!(div.outer_html(), Some("<div><p></p></div>".into()));
410 /// ```
411 pub fn outer_html(&self) -> Option<String> {
412 let rid = unsafe { outer_html(self.rid) };
413 if HtmlError::from(rid).is_some() {
414 return None;
415 }
416 read_string_and_destroy(rid)
417 }
418
419 /// Remove this element from the DOM tree.
420 pub fn remove(self) {
421 _ = unsafe { remove(self.rid) };
422 }
423
424 /// Get the element's parent element, returning `None` if there isn't one.
425 pub fn parent(&self) -> Option<Element> {
426 let rid = unsafe { parent(self.rid) };
427 if HtmlError::from(rid).is_some() {
428 return None;
429 }
430 Some(unsafe { Element::from(rid) })
431 }
432
433 /// Get the element's children elements.
434 pub fn children(&self) -> ElementList {
435 let rid = unsafe { children(self.rid) };
436 unsafe { ElementList::from(rid) }
437 }
438
439 /// Get the sibling elements of the element.
440 pub fn siblings(&self) -> ElementList {
441 let rid = unsafe { siblings(self.rid) };
442 unsafe { ElementList::from(rid) }
443 }
444
445 /// Get the next sibling of the element, returning `None` if there isn't one.
446 pub fn next(&self) -> Option<Element> {
447 let rid = unsafe { next(self.rid) };
448 if HtmlError::from(rid).is_some() {
449 return None;
450 }
451 Some(unsafe { Element::from(rid) })
452 }
453
454 /// Get the previous sibling of the element, returning `None` if there isn't one.
455 pub fn prev(&self) -> Option<Element> {
456 let rid = unsafe { previous(self.rid) };
457 if HtmlError::from(rid).is_some() {
458 return None;
459 }
460 Some(unsafe { Element::from(rid) })
461 }
462
463 /// Set the element's text content, clearing any existing content.
464 pub fn set_text<T: AsRef<str>>(&mut self, text: T) -> Result<(), HtmlError> {
465 let text = text.as_ref();
466 let result = unsafe { set_text(self.rid, text.as_ptr(), text.len()) };
467
468 if let Some(error) = HtmlError::from(result) {
469 Err(error)
470 } else {
471 Ok(())
472 }
473 }
474
475 /// Set the element's inner HTML, clearing the existing HTML.
476 pub fn set_html<T: AsRef<str>>(&mut self, text: T) -> Result<(), HtmlError> {
477 let text = text.as_ref();
478 let result = unsafe { set_html(self.rid, text.as_ptr(), text.len()) };
479
480 if let Some(error) = HtmlError::from(result) {
481 Err(error)
482 } else {
483 Ok(())
484 }
485 }
486
487 /// Prepend inner HTML into this element.
488 ///
489 /// The given HTML will be parsed, and each node prepended to the start
490 /// of the element's children.
491 pub fn prepend<T: AsRef<str>>(&mut self, text: T) -> Result<(), HtmlError> {
492 let text = text.as_ref();
493 let result = unsafe { prepend(self.rid, text.as_ptr(), text.len()) };
494
495 if let Some(error) = HtmlError::from(result) {
496 Err(error)
497 } else {
498 Ok(())
499 }
500 }
501
502 /// Append inner HTML into this element.
503 ///
504 /// The given HTML will be parsed, and each node appended to the end
505 /// of the element's children.
506 pub fn append<T: AsRef<str>>(&mut self, text: T) -> Result<(), HtmlError> {
507 let text = text.as_ref();
508 let result = unsafe { append(self.rid, text.as_ptr(), text.len()) };
509
510 if let Some(error) = HtmlError::from(result) {
511 Err(error)
512 } else {
513 Ok(())
514 }
515 }
516
517 /// Get the base URI of this Element.
518 pub fn base_uri(&self) -> Option<String> {
519 let rid = unsafe { base_uri(self.rid) };
520 if HtmlError::from(rid).is_some() {
521 return None;
522 }
523 read_string_and_destroy(rid)
524 }
525
526 /// Gets the (normalized) text owned by this element.
527 pub fn own_text(&self) -> Option<String> {
528 let rid = unsafe { own_text(self.rid) };
529 if HtmlError::from(rid).is_some() {
530 return None;
531 }
532 read_string_and_destroy(rid)
533 }
534
535 /// Get the combined data (e.g. the inside of a `<script>` tag) of this element.
536 ///
537 /// Note that data is NOT the text of the element. Use [Element::text]
538 /// to get the text that would be visible to a user, and [Element::data]
539 /// for the contents of scripts, comments, CSS styles, etc.
540 pub fn data(&self) -> Option<String> {
541 let rid = unsafe { data(self.rid) };
542 if HtmlError::from(rid).is_some() {
543 return None;
544 }
545 read_string_and_destroy(rid)
546 }
547
548 /// Get the `id` attribute of this element.
549 pub fn id(&self) -> Option<String> {
550 let rid = unsafe { id(self.rid) };
551 if HtmlError::from(rid).is_some() {
552 return None;
553 }
554 read_string_and_destroy(rid)
555 }
556
557 /// Get the name of the tag for this element.
558 ///
559 /// This will always be the lowercased version. For example, `<DIV>` and
560 /// `<div>` would both return `div`.
561 pub fn tag_name(&self) -> Option<String> {
562 let rid = unsafe { tag_name(self.rid) };
563 if HtmlError::from(rid).is_some() {
564 return None;
565 }
566 read_string_and_destroy(rid)
567 }
568
569 /// Get the literal value of this node's `class` attribute.
570 ///
571 /// For example, on `<div class="header gray">` this would return `header gray`.
572 pub fn class_name(&self) -> Option<String> {
573 let rid = unsafe { class_name(self.rid) };
574 if HtmlError::from(rid).is_some() {
575 return None;
576 }
577 read_string_and_destroy(rid)
578 }
579
580 /// Test if this element has a class. Case insensitive.
581 pub fn has_class<T: AsRef<str>>(&self, class_name: T) -> bool {
582 let class_name = class_name.as_ref();
583 unsafe { has_class(self.rid, class_name.as_ptr(), class_name.len()) }
584 }
585
586 /// Add a class name to this element's class attribute.
587 pub fn add_class<T: AsRef<str>>(&mut self, class_name: T) -> Result<(), HtmlError> {
588 let class_name = class_name.as_ref();
589 let result = unsafe { add_class(self.rid, class_name.as_ptr(), class_name.len()) };
590
591 if let Some(error) = HtmlError::from(result) {
592 Err(error)
593 } else {
594 Ok(())
595 }
596 }
597
598 /// Remove a class name from this element's class attribute.
599 pub fn remove_class<T: AsRef<str>>(&mut self, class_name: T) -> Result<(), HtmlError> {
600 let class_name = class_name.as_ref();
601 let result = unsafe { remove_class(self.rid, class_name.as_ptr(), class_name.len()) };
602
603 if let Some(error) = HtmlError::from(result) {
604 Err(error)
605 } else {
606 Ok(())
607 }
608 }
609
610 /// Test if this element has an attribute. Case insensitive.
611 pub fn has_attr<T: AsRef<str>>(&self, attr_name: T) -> bool {
612 let attr_name = attr_name.as_ref();
613 unsafe { has_attr(self.rid, attr_name.as_ptr(), attr_name.len()) }
614 }
615
616 /// Set an attribute value on this element.
617 ///
618 /// If this element already has an attribute with the key, its value is updated;
619 /// otherwise, a new attribute is added.
620 pub fn set_attr<K: AsRef<str>, V: AsRef<str>>(
621 &mut self,
622 key: K,
623 value: V,
624 ) -> Result<(), HtmlError> {
625 let key = key.as_ref();
626 let value = value.as_ref();
627 let result = unsafe {
628 set_attr(
629 self.rid,
630 key.as_ptr(),
631 key.len(),
632 value.as_ptr(),
633 value.len(),
634 )
635 };
636
637 if let Some(error) = HtmlError::from(result) {
638 Err(error)
639 } else {
640 Ok(())
641 }
642 }
643
644 /// Remove an attribute from this element.
645 pub fn remove_attr<T: AsRef<str>>(&mut self, attr: T) -> Result<(), HtmlError> {
646 let attr = attr.as_ref();
647 let result = unsafe { remove_attr(self.rid, attr.as_ptr(), attr.len()) };
648
649 if let Some(error) = HtmlError::from(result) {
650 Err(error)
651 } else {
652 Ok(())
653 }
654 }
655}
656
657impl Drop for Element {
658 fn drop(&mut self) {
659 unsafe { destroy(self.rid) }
660 }
661}
662
663impl Display for Element {
664 /// Returns the outer HTML of the node.
665 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
666 write!(f, "{}", self.outer_html().unwrap_or_default())
667 }
668}
669
670/// A collection of HTML elements.
671pub struct ElementList {
672 rid: Rid,
673 lower_bound: usize,
674 upper_bound: usize,
675 size: usize,
676}
677
678impl ElementList {
679 /// Get an instance from a [Rid].
680 unsafe fn from(rid: Rid) -> Self {
681 let size = unsafe { size(rid) as usize };
682 Self {
683 rid,
684 lower_bound: 0,
685 upper_bound: size.wrapping_sub(1),
686 size,
687 }
688 }
689
690 /// Find elements that match the given CSS (or JQuery) selector.
691 pub fn select<T: AsRef<str>>(&self, css_query: T) -> Option<ElementList> {
692 let query = css_query.as_ref();
693 let rid = unsafe { select(self.rid, query.as_ptr(), query.len()) };
694 if HtmlError::from(rid).is_some() {
695 return None;
696 }
697 Some(unsafe { ElementList::from(rid) })
698 }
699
700 /// Find the first element that matches the given CSS (or JQuery) selector.
701 pub fn select_first<T: AsRef<str>>(&self, css_query: T) -> Option<Element> {
702 let query = css_query.as_ref();
703 let rid = unsafe { select_first(self.rid, query.as_ptr(), query.len()) };
704 if HtmlError::from(rid).is_some() {
705 return None;
706 }
707 Some(unsafe { Element::from(rid) })
708 }
709
710 /// Get the normalized, combined text of these elements and their children.
711 ///
712 /// See [Element::text].
713 pub fn text(&self) -> Option<String> {
714 let rid = unsafe { text(self.rid) };
715 if HtmlError::from(rid).is_some() {
716 return None;
717 }
718 read_string_and_destroy(rid)
719 }
720
721 /// Get the text of these elements and their children.
722 ///
723 /// See [Element::untrimmed_text].
724 pub fn untrimmed_text(&self) -> Option<String> {
725 let rid = unsafe { untrimmed_text(self.rid) };
726 if HtmlError::from(rid).is_some() {
727 return None;
728 }
729 read_string_and_destroy(rid)
730 }
731
732 /// Get the combined elements' inner HTML.
733 ///
734 /// See [Element::html].
735 pub fn html(&self) -> Option<String> {
736 let rid = unsafe { html(self.rid) };
737 if HtmlError::from(rid).is_some() {
738 return None;
739 }
740 read_string_and_destroy(rid)
741 }
742
743 /// Get the combined elements' outer HTML.
744 ///
745 /// See [Element::outer_html].
746 pub fn outer_html(&self) -> Option<String> {
747 let rid = unsafe { outer_html(self.rid) };
748 if HtmlError::from(rid).is_some() {
749 return None;
750 }
751 read_string_and_destroy(rid)
752 }
753
754 /// Remove each element from the DOM.
755 pub fn remove(self) {
756 _ = unsafe { remove(self.rid) };
757 }
758
759 /// Get the first element of this element list.
760 pub fn first(&self) -> Option<Element> {
761 let rid = unsafe { first(self.rid) };
762 if HtmlError::from(rid).is_some() {
763 return None;
764 }
765 Some(unsafe { Element::from(rid) })
766 }
767
768 /// Get the last element of this element list.
769 pub fn last(&self) -> Option<Element> {
770 let rid = unsafe { last(self.rid) };
771 if HtmlError::from(rid).is_some() {
772 return None;
773 }
774 Some(unsafe { Element::from(rid) })
775 }
776
777 /// Get the element at the given index.
778 pub fn get(&self, index: usize) -> Option<Element> {
779 let rid = unsafe { html_get(self.rid, index) };
780 if HtmlError::from(rid).is_some() {
781 return None;
782 }
783 Some(unsafe { Element::from(rid) })
784 }
785
786 /// Get the size of this element list.
787 pub fn size(&self) -> usize {
788 self.size
789 }
790
791 /// Check if this element list is empty.
792 pub fn is_empty(&self) -> bool {
793 self.size() == 0
794 }
795}
796
797impl Iterator for ElementList {
798 type Item = Element;
799
800 fn next(&mut self) -> Option<Self::Item> {
801 if self.lower_bound > self.upper_bound || self.upper_bound == usize::MAX {
802 return None;
803 }
804 let value_ref = self.get(self.lower_bound);
805 self.lower_bound += 1;
806 value_ref
807 }
808}
809
810impl DoubleEndedIterator for ElementList {
811 fn next_back(&mut self) -> Option<Self::Item> {
812 if self.lower_bound > self.upper_bound || self.upper_bound == usize::MAX {
813 return None;
814 }
815 let value_ref = self.get(self.upper_bound);
816 self.upper_bound = self.upper_bound.wrapping_sub(1);
817 value_ref
818 }
819}
820
821impl Drop for ElementList {
822 fn drop(&mut self) {
823 unsafe { destroy(self.rid) }
824 }
825}