use html5ever::parse_document; // use html5ever::tendril::StrTendril; use html5ever::tendril::TendrilSink; // use markup5ever::interface::Attribute; use markup5ever_rcdom::Handle; use markup5ever_rcdom::NodeData; use markup5ever_rcdom::RcDom; use std::borrow::Borrow; // use std::cell::RefCell; use std::default::Default; /// Simplify the process of keep walking through the results macro_rules! keep_going { ($source:ident, $target:ident) => { for child in $source.children.borrow().iter() { walk(child.borrow(), $target); } }; } // fn handle_anchor(node: &mut Node, attrs: &RefCell>) -> HandleResult { // let attrs = attrs.borrow(); // let rels = attrs // .iter() // .find(|attr| attr.name.local.to_string() == "rel"); // let hrefs = attrs // .iter() // .find(|attr| attr.name.local.to_string() == "href"); // match (rels, hrefs) { // (Some(rel), Some(href)) => { // if !rel.value.to_string().contains("tag") { // let new_node = Node::link(&href.value); // node.add_child(new_node); // HandleResult::NewNode(new_node) // } else { // HandleResult::Keep // } // } // _ => HandleResult::Stop, // } // } fn walk(input: &Handle, result: &mut String) { match input.data { NodeData::Text { ref contents } => { let text = contents.borrow().to_string(); result.push_str(&text); keep_going!(input, result); } NodeData::Element { ref name, ref attrs, .. } => { let tag = name.local.to_string(); match tag.as_ref() { "html" | "head" | "body" => keep_going!(input, result), "p" => { keep_going!(input, result); result.push_str("\n"); } "span" => { let attrs = attrs.borrow(); let classes_attr = attrs .iter() .find(|attr| attr.name.local.to_string() == "class"); match classes_attr { Some(classes) => { if !classes.value.contains("invisible") { keep_going!(input, result); } } None => keep_going!(input, result), } } "a" => { let attrs = attrs.borrow(); let rels = attrs .iter() .find(|attr| attr.name.local.to_string() == "rel"); let hrefs = attrs .iter() .find(|attr| attr.name.local.to_string() == "href"); println!("Rels: {:?}, Hrefs: {:?}", rels, hrefs); match (rels, hrefs) { (Some(rel), Some(href)) => { if !rel.value.to_string().contains("tag") { result.push_str("[["); result.push_str(&href.value); result.push_str("]["); keep_going!(input, result); result.push_str("]]"); } else { keep_going!(input, result); } } _ => keep_going!(input, result), } } _ => {} } } _ => { keep_going!(input, result); } }; } fn build_nodes(source: &str) { let dom = parse_document(RcDom::default(), Default::default()) .from_utf8() .read_from(&mut source.as_bytes()) .unwrap(); let mut result = String::new(); walk(&dom.document, &mut result); println!("Result: {:?}", result); } fn main() { let example_1 = String::from(r#"

A simple text component

"#); build_nodes(&example_1); let example_2 = String::from( r#"

but this is

"#, ); build_nodes(&example_2); let example_3 = String::from( r#"

@This is a mention and #this is a tag

"#, ); build_nodes(&example_3); // let example_1 = String::from( // r#"

Today I finally moved with my contact and calendar management into the terminal with #vdirsyncer #khal and #khard.

Thank you @hund for your great post: hund.tty1.se/2020/08/12/how-to

#carddav #caldav #terminal

"#, // ); // println!("Source: {}", &example_1); // println!("---------------------------------"); // let dom = parse_document(RcDom::default(), Default::default()) // .from_utf8() // .read_from(&mut example_1.as_bytes()) // .unwrap(); // let mut tree = Node::root(); // walk(&dom.document, &mut tree); }