diff --git a/html5test/src/main.rs b/html5test/src/main.rs
index f1d9048..d088f63 100644
--- a/html5test/src/main.rs
+++ b/html5test/src/main.rs
@@ -1,161 +1,21 @@
use html5ever::parse_document;
-use html5ever::tendril::StrTendril;
+// use html5ever::tendril::StrTendril;
use html5ever::tendril::TendrilSink;
-use markup5ever::interface::Attribute;
+// use markup5ever::interface::Attribute;
use markup5ever_rcdom::Handle;
use markup5ever_rcdom::NodeData;
use markup5ever_rcdom::RcDom;
use std::borrow::Borrow;
-use std::cell::RefCell;
+// use std::cell::RefCell;
use std::default::Default;
-// This go_children/walk is stupid, but I shot myself in the foot by adding
-// things after the children, link on links.
-//
-// So, I'm rethinking this, and I'll, basically redo the same thing Tendril is
-// doing by building a tree of elements.
-//
-// So, say, if we have
-//
-//
Text textthe linkother text, for italics,
is for code
-//
-//
-// That would build
-// root --------------------------\
-// / | \ Code()
-// Text(Text text) | ------ Italic() |
-// Link(link) | Text(is for code)
-// | Text(for italics)
-// Text(the link)
-//
-// Tree things to do, then:
-// 1. Walk the DOM tree and build the text tree.
-// 2. Tree elements could return if the DOM three should continue processing or
-// ignore incoming children (that would cut the "invisible span" processing,
-// for example).
-// 3. Build a walker for the new tree, to produce the final text. And, on that,
-// we could work on the text wrap, 'cause there are elements that can't be
-// wrapped (for example, Links)
-
-/// Nodes in the text tree
-#[derive(Debug)]
-enum NodeType {
- /// The root element; produces nothing, but has the base content.
- Root,
- /// A text block. Contains the text itself.
- Text(String),
- /// A line break
- LineBreak,
- // /// A link to somewhere. Contains the link.
- // Link(String),
- // /// Italics
- // Italic,
- // /// Code block
- // Code,
- // /// A block with an ellipsis at the end
- // Ellipsis,
-}
-
-#[derive(Debug)]
-struct Node {
- r#type: NodeType,
- children: Vec,
-}
-
-impl Node {
- /// Build the root node
- fn root() -> Self {
- Self {
- r#type: NodeType::Root,
- children: Vec::new(),
- }
- }
-
- /// Build a text node
- fn text(text: &str) -> Self {
- Self {
- r#type: NodeType::Text(text.into()),
- children: Vec::new(),
- }
- }
-
- /// Build a linebreak node
- fn line_break() -> Self {
- Self {
- r#type: NodeType::LineBreak,
- children: Vec::new(),
+/// Simplify the process of keep walking through the results
+macro_rules! keep_going {
+ ($source:ident, $target:ident) => {
+ for child in $source.children.borrow().iter() {
+ walk(child.borrow(), $target);
}
- }
-
- // /// Build a link node
- // fn link(href: &str) -> Self {
- // Self {
- // r#type: NodeType::Link(href.into()),
- // children: Vec::new(),
- // }
- // }
-
- // /// Build a ellipsis node
- // fn ellipsis() -> Self {
- // Self {
- // r#type: NodeType::Ellipsis,
- // children: Vec::new(),
- // }
- // }
-
- /// Add a child node to this node
- fn add_child(&mut self, node: Node) {
- self.children.push(node);
- }
-}
-
-// Handle functions can return a three state result:
-// 1. Do not process the children of the current Handle
-// 2. Process the children and add to the same parent
-// 3. Use the new Node as parent for future children.
-
-/// Result of the handling functions
-enum HandleResult {
- /// Stop processing, don't continue generating nodes
- Stop,
- /// Follow the children, but don't add any nodes in the current level
- Follow,
- // /// Produce a new node, but don't attach any children to it
- // AddAndStay(Node),
- /// Assume a new parent node
- AddAndAdopt(Node),
-}
-
-/// Handle a simple block of text
-fn handle_text(node: &mut Node, contents: &RefCell) -> HandleResult {
- let text = contents.borrow().to_string();
- node.add_child(Node::text(&text));
- HandleResult::Stop
-}
-
-/// Handle an incoming line break
-fn handle_line_break() -> HandleResult {
- let line_break = Node::line_break();
- HandleResult::AddAndAdopt(line_break)
-}
-
-/// Process the span content
-fn handle_span(attrs: &RefCell>) -> HandleResult {
- let attrs = attrs.borrow();
- let classes_attr = attrs
- .iter()
- .find(|attr| attr.name.local.to_string() == "class");
- match classes_attr {
- Some(classes) => {
- if classes.value.contains("invisible") {
- HandleResult::Stop
- } else {
- HandleResult::Follow
- }
- }
- None => HandleResult::Follow,
- }
+ };
}
// fn handle_anchor(node: &mut Node, attrs: &RefCell>) -> HandleResult {
@@ -180,10 +40,13 @@ fn handle_span(attrs: &RefCell>) -> HandleResult {
// }
// }
-fn walk(input: &Handle, parent: &mut Node) {
- // println!(">>> {:?}", input.data);
- let element = match input.data {
- NodeData::Text { ref contents } => handle_text(parent, contents),
+fn walk(input: &Handle, result: &mut String) {
+ match input.data {
+ NodeData::Text { ref contents } => {
+ let text = contents.borrow().to_string();
+ result.push_str(&text);
+ keep_going!(input, result);
+ }
NodeData::Element {
ref name,
ref attrs,
@@ -191,46 +54,66 @@ fn walk(input: &Handle, parent: &mut Node) {
} => {
let tag = name.local.to_string();
match tag.as_ref() {
- "html" | "head" | "body" => HandleResult::Follow,
- "p" => handle_line_break(),
- "span" => handle_span(attrs),
- // "a" => handle_anchor(parent, attrs),
- _ => HandleResult::Stop,
+ "html" | "head" | "body" => keep_going!(input, result),
+ "p" => {
+ keep_going!(input, result);
+ result.push_str("\n");
+ }
+ "span" => {
+ let attrs = attrs.borrow();
+ let classes_attr = attrs
+ .iter()
+ .find(|attr| attr.name.local.to_string() == "class");
+ match classes_attr {
+ Some(classes) => {
+ if !classes.value.contains("invisible") {
+ keep_going!(input, result);
+ }
+ }
+ None => keep_going!(input, result),
+ }
+ }
+ "a" => {
+ let attrs = attrs.borrow();
+ let rels = attrs
+ .iter()
+ .find(|attr| attr.name.local.to_string() == "rel");
+ let hrefs = attrs
+ .iter()
+ .find(|attr| attr.name.local.to_string() == "href");
+ println!("Rels: {:?}, Hrefs: {:?}", rels, hrefs);
+ match (rels, hrefs) {
+ (Some(rel), Some(href)) => {
+ if !rel.value.to_string().contains("tag") {
+ result.push_str("[[");
+ result.push_str(&href.value);
+ result.push_str("][");
+ keep_going!(input, result);
+ result.push_str("]]");
+ } else {
+ keep_going!(input, result);
+ }
+ }
+ _ => keep_going!(input, result),
+ }
+ }
+ _ => {}
}
}
- _ => HandleResult::Follow, // if we can't deal with it, just keep going
- };
-
- match element {
- HandleResult::Stop => {}
- HandleResult::Follow => {
- for child in input.children.borrow().iter() {
- walk(child.borrow(), parent);
- }
- }
- // HandleResult::AddAndStay(new_node) => {
- // parent.add_child(new_node);
- // for child in input.children.borrow().iter() {
- // walk(child.borrow(), parent);
- // }
- // }
- HandleResult::AddAndAdopt(mut new_node) => {
- for child in input.children.borrow().iter() {
- walk(child.borrow(), &mut new_node);
- }
- parent.add_child(new_node);
+ _ => {
+ keep_going!(input, result);
}
- }
+ };
}
-fn build_nodes(text: &str) {
+fn build_nodes(source: &str) {
let dom = parse_document(RcDom::default(), Default::default())
.from_utf8()
- .read_from(&mut text.as_bytes())
+ .read_from(&mut source.as_bytes())
.unwrap();
- let mut tree = Node::root();
- walk(&dom.document, &mut tree);
- println!("Tree: {:?}", tree);
+ let mut result = String::new();
+ walk(&dom.document, &mut result);
+ println!("Result: {:?}", result);
}
fn main() {
@@ -242,6 +125,11 @@ fn main() {
);
build_nodes(&example_2);
+ let example_3 = String::from(
+ r#"@This is a mention and #this is a tag
"#,
+ );
+ build_nodes(&example_3);
+
// let example_1 = String::from(
// r#"Today I finally moved with my contact and calendar management into the terminal with #vdirsyncer #khal and #khard.
Thank you @hund for your great post: https://hund.tty1.se/2020/08/12/how-to-sync-and-manage-your-caldav-and-carddav-via-the-terminal.html
#carddav #caldav #terminal
"#,
// );