|
|
|
@ -8,9 +8,6 @@ use markup5ever_rcdom::RcDom;
|
|
|
|
|
use std::borrow::Borrow; |
|
|
|
|
use std::cell::RefCell; |
|
|
|
|
use std::default::Default; |
|
|
|
|
use textwrap::fill; |
|
|
|
|
use textwrap::NoHyphenation; |
|
|
|
|
use textwrap::Options; |
|
|
|
|
|
|
|
|
|
// This go_children/walk is stupid, but I shot myself in the foot by adding
|
|
|
|
|
// things after the children, link on links.
|
|
|
|
@ -42,21 +39,25 @@ use textwrap::Options;
|
|
|
|
|
// wrapped (for example, Links)
|
|
|
|
|
|
|
|
|
|
/// Nodes in the text tree
|
|
|
|
|
#[derive(Debug)] |
|
|
|
|
enum NodeType { |
|
|
|
|
/// The root element; produces nothing, but has the base content.
|
|
|
|
|
Root, |
|
|
|
|
/// A text block. Contains the text itself.
|
|
|
|
|
Text(String), |
|
|
|
|
/// A link to somewhere. Contains the link.
|
|
|
|
|
Link(String), |
|
|
|
|
/// Italics
|
|
|
|
|
Italic, |
|
|
|
|
/// Code block
|
|
|
|
|
Code, |
|
|
|
|
/// A line break
|
|
|
|
|
LineBreak, |
|
|
|
|
// /// A link to somewhere. Contains the link.
|
|
|
|
|
// Link(String),
|
|
|
|
|
// /// Italics
|
|
|
|
|
// Italic,
|
|
|
|
|
// /// Code block
|
|
|
|
|
// Code,
|
|
|
|
|
// /// A block with an ellipsis at the end
|
|
|
|
|
// Ellipsis,
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
#[derive(Debug)] |
|
|
|
|
struct Node { |
|
|
|
|
r#type: NodeType, |
|
|
|
|
children: Vec<Node>, |
|
|
|
@ -75,14 +76,6 @@ impl Node {
|
|
|
|
|
fn text(text: &str) -> Self { |
|
|
|
|
Self { |
|
|
|
|
r#type: NodeType::Text(text.into()), |
|
|
|
|
children: Vec::new(), // XXX text nodes will never have children
|
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/// Build a link node
|
|
|
|
|
fn link(href: &str) -> Self { |
|
|
|
|
Self { |
|
|
|
|
r#type: NodeType::Link(href.into()), |
|
|
|
|
children: Vec::new(), |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
@ -91,53 +84,105 @@ impl Node {
|
|
|
|
|
fn line_break() -> Self { |
|
|
|
|
Self { |
|
|
|
|
r#type: NodeType::LineBreak, |
|
|
|
|
children: Vec::new(), // XXX linebreaks will never have children
|
|
|
|
|
children: Vec::new(), |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// /// Build a link node
|
|
|
|
|
// fn link(href: &str) -> Self {
|
|
|
|
|
// Self {
|
|
|
|
|
// r#type: NodeType::Link(href.into()),
|
|
|
|
|
// children: Vec::new(),
|
|
|
|
|
// }
|
|
|
|
|
// }
|
|
|
|
|
|
|
|
|
|
// /// Build a ellipsis node
|
|
|
|
|
// fn ellipsis() -> Self {
|
|
|
|
|
// Self {
|
|
|
|
|
// r#type: NodeType::Ellipsis,
|
|
|
|
|
// children: Vec::new(),
|
|
|
|
|
// }
|
|
|
|
|
// }
|
|
|
|
|
|
|
|
|
|
/// Add a child node to this node
|
|
|
|
|
fn add_child(&mut self, node: Node) { |
|
|
|
|
self.children.push(node); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
fn handle_text(node: &mut Node, contents: &RefCell<StrTendril>) -> bool { |
|
|
|
|
// Handle functions can return a three state result:
|
|
|
|
|
// 1. Do not process the children of the current Handle
|
|
|
|
|
// 2. Process the children and add to the same parent
|
|
|
|
|
// 3. Use the new Node as parent for future children.
|
|
|
|
|
|
|
|
|
|
/// Result of the handling functions
|
|
|
|
|
enum HandleResult { |
|
|
|
|
/// Stop processing, don't continue generating nodes
|
|
|
|
|
Stop, |
|
|
|
|
/// Follow the children, but don't add any nodes in the current level
|
|
|
|
|
Follow, |
|
|
|
|
// /// Produce a new node, but don't attach any children to it
|
|
|
|
|
// AddAndStay(Node),
|
|
|
|
|
/// Assume a new parent node
|
|
|
|
|
AddAndAdopt(Node), |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/// Handle a simple block of text
|
|
|
|
|
fn handle_text(node: &mut Node, contents: &RefCell<StrTendril>) -> HandleResult { |
|
|
|
|
let text = contents.borrow().to_string(); |
|
|
|
|
node.add_child(Node::text(&text)); |
|
|
|
|
true |
|
|
|
|
HandleResult::Stop |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
fn handle_line_break(node: &mut Node) -> bool { |
|
|
|
|
node.add_child(Node::line_break()); |
|
|
|
|
true |
|
|
|
|
/// Handle an incoming line break
|
|
|
|
|
fn handle_line_break() -> HandleResult { |
|
|
|
|
let line_break = Node::line_break(); |
|
|
|
|
HandleResult::AddAndAdopt(line_break) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
fn handle_span(node: &mut Node, attrs: &RefCell<Vec<Attribute>>) -> bool { |
|
|
|
|
/// Process the span content
|
|
|
|
|
fn handle_span(attrs: &RefCell<Vec<Attribute>>) -> HandleResult { |
|
|
|
|
let attrs = attrs.borrow(); |
|
|
|
|
let classes = attrs |
|
|
|
|
let classes_attr = attrs |
|
|
|
|
.iter() |
|
|
|
|
.find(|attr| attr.name.local.to_string() == "class"); |
|
|
|
|
if let Some(class) = classes { |
|
|
|
|
let classes = class.value.to_string(); |
|
|
|
|
// just keep going if not invisible
|
|
|
|
|
!classes.contains("invisible") |
|
|
|
|
|
|
|
|
|
// if !classes.contains("invisible") {
|
|
|
|
|
// true
|
|
|
|
|
// if classes.contains("ellipsis") {
|
|
|
|
|
// result.push_str("...");
|
|
|
|
|
// }
|
|
|
|
|
// }
|
|
|
|
|
match classes_attr { |
|
|
|
|
Some(classes) => { |
|
|
|
|
if classes.value.contains("invisible") { |
|
|
|
|
HandleResult::Stop |
|
|
|
|
} else { |
|
|
|
|
// with no classes, we consider the element visible and just keep
|
|
|
|
|
// processing the list.
|
|
|
|
|
true |
|
|
|
|
HandleResult::Follow |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
None => HandleResult::Follow, |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// fn handle_anchor(node: &mut Node, attrs: &RefCell<Vec<Attribute>>) -> HandleResult {
|
|
|
|
|
// let attrs = attrs.borrow();
|
|
|
|
|
// let rels = attrs
|
|
|
|
|
// .iter()
|
|
|
|
|
// .find(|attr| attr.name.local.to_string() == "rel");
|
|
|
|
|
// let hrefs = attrs
|
|
|
|
|
// .iter()
|
|
|
|
|
// .find(|attr| attr.name.local.to_string() == "href");
|
|
|
|
|
// match (rels, hrefs) {
|
|
|
|
|
// (Some(rel), Some(href)) => {
|
|
|
|
|
// if !rel.value.to_string().contains("tag") {
|
|
|
|
|
// let new_node = Node::link(&href.value);
|
|
|
|
|
// node.add_child(new_node);
|
|
|
|
|
// HandleResult::NewNode(new_node)
|
|
|
|
|
// } else {
|
|
|
|
|
// HandleResult::Keep
|
|
|
|
|
// }
|
|
|
|
|
// }
|
|
|
|
|
// _ => HandleResult::Stop,
|
|
|
|
|
// }
|
|
|
|
|
// }
|
|
|
|
|
|
|
|
|
|
fn walk(input: &Handle, parent: &mut Node) { |
|
|
|
|
println!(">>> {:?}", input.data); |
|
|
|
|
let process_children = match input.data { |
|
|
|
|
// println!(">>> {:?}", input.data);
|
|
|
|
|
let element = match input.data { |
|
|
|
|
NodeData::Text { ref contents } => handle_text(parent, contents), |
|
|
|
|
NodeData::Element { |
|
|
|
|
ref name, |
|
|
|
@ -145,67 +190,68 @@ fn walk(input: &Handle, parent: &mut Node) {
|
|
|
|
|
.. |
|
|
|
|
} => { |
|
|
|
|
let tag = name.local.to_string(); |
|
|
|
|
println!("Tag: {:?}", tag); |
|
|
|
|
match tag.as_ref() { |
|
|
|
|
"html" | "head" | "body" => true, // just keep going
|
|
|
|
|
"p" => handle_line_break(parent), |
|
|
|
|
"span" => handle_span(parent, attrs), |
|
|
|
|
"a" => { |
|
|
|
|
println!("\tAnchor"); |
|
|
|
|
if let NodeData::Element { ref attrs, .. } = input.data { |
|
|
|
|
let attrs = attrs.borrow(); |
|
|
|
|
let rels = attrs |
|
|
|
|
.iter() |
|
|
|
|
.find(|attr| attr.name.local.to_string() == "rel"); |
|
|
|
|
let hrefs = attrs |
|
|
|
|
.iter() |
|
|
|
|
.find(|attr| attr.name.local.to_string() == "href"); |
|
|
|
|
match (rels, hrefs) { |
|
|
|
|
(Some(rel), Some(href)) => { |
|
|
|
|
if !rel.value.to_string().contains("tag") { |
|
|
|
|
result.push_str("[["); |
|
|
|
|
result.push_str(&href.value.to_string()); |
|
|
|
|
result.push_str("]["); |
|
|
|
|
go_children(input, result); |
|
|
|
|
result.push_str("]]"); |
|
|
|
|
} else { |
|
|
|
|
go_children(input, result); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
_ => {} |
|
|
|
|
} |
|
|
|
|
"html" | "head" | "body" => HandleResult::Follow, |
|
|
|
|
"p" => handle_line_break(), |
|
|
|
|
"span" => handle_span(attrs), |
|
|
|
|
// "a" => handle_anchor(parent, attrs),
|
|
|
|
|
_ => HandleResult::Stop, |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
_ => false, |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
_ => true, // if we can't deal with it, just keep going
|
|
|
|
|
_ => HandleResult::Follow, // if we can't deal with it, just keep going
|
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
if process_children { |
|
|
|
|
match element { |
|
|
|
|
HandleResult::Stop => {} |
|
|
|
|
HandleResult::Follow => { |
|
|
|
|
for child in input.children.borrow().iter() { |
|
|
|
|
walk(child.borrow(), parent); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
// HandleResult::AddAndStay(new_node) => {
|
|
|
|
|
// parent.add_child(new_node);
|
|
|
|
|
// for child in input.children.borrow().iter() {
|
|
|
|
|
// walk(child.borrow(), parent);
|
|
|
|
|
// }
|
|
|
|
|
// }
|
|
|
|
|
HandleResult::AddAndAdopt(mut new_node) => { |
|
|
|
|
for child in input.children.borrow().iter() { |
|
|
|
|
walk(child.borrow(), &mut new_node); |
|
|
|
|
} |
|
|
|
|
parent.add_child(new_node); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
fn main() { |
|
|
|
|
let source = String::from( |
|
|
|
|
r#"<p>Today I finally moved with my contact and calendar management into the terminal with <a href="https://fosstodon.org/tags/vdirsyncer" class="mention hashtag" rel="tag nofollow noopener noreferrer" target="_blank">#<span>vdirsyncer</span></a> <a href="https://fosstodon.org/tags/khal" class="mention hashtag" rel="tag nofollow noopener noreferrer" target="_blank">#<span>khal</span></a> and <a href="https://fosstodon.org/tags/khard" class="mention hashtag" rel="tag nofollow noopener noreferrer" target="_blank">#<span>khard</span></a>.</p><p>Thank you <span class="h-card"><a href="https://fosstodon.org/@hund" class="u-url mention" rel="nofollow noopener noreferrer" target="_blank">@<span>hund</span></a></span> for your great post: <a href="https://hund.tty1.se/2020/08/12/how-to-sync-and-manage-your-caldav-and-carddav-via-the-terminal.html" rel="nofollow noopener noreferrer" target="_blank"><span class="invisible">https://</span><span class="ellipsis">hund.tty1.se/2020/08/12/how-to</span><span class="invisible">-sync-and-manage-your-caldav-and-carddav-via-the-terminal.html</span></a></p><p><a href="https://fosstodon.org/tags/carddav" class="mention hashtag" rel="tag nofollow noopener noreferrer" target="_blank">#<span>carddav</span></a> <a href="https://fosstodon.org/tags/caldav" class="mention hashtag" rel="tag nofollow noopener noreferrer" target="_blank">#<span>caldav</span></a> <a href="https://fosstodon.org/tags/terminal" class="mention hashtag" rel="tag nofollow noopener noreferrer" target="_blank">#<span>terminal</span></a></p>"#, |
|
|
|
|
); |
|
|
|
|
println!("Source: {}", &source); |
|
|
|
|
println!("---------------------------------"); |
|
|
|
|
|
|
|
|
|
fn build_nodes(text: &str) { |
|
|
|
|
let dom = parse_document(RcDom::default(), Default::default()) |
|
|
|
|
.from_utf8() |
|
|
|
|
.read_from(&mut source.as_bytes()) |
|
|
|
|
.read_from(&mut text.as_bytes()) |
|
|
|
|
.unwrap(); |
|
|
|
|
let mut tree = Node::root(); |
|
|
|
|
walk(&dom.document, &mut result); |
|
|
|
|
println!("---------------------------------"); |
|
|
|
|
let options = Options::new(70) |
|
|
|
|
.initial_indent(" ") |
|
|
|
|
.subsequent_indent(" ") |
|
|
|
|
.splitter(NoHyphenation); |
|
|
|
|
println!("{}", fill(&result.trim(), &options)); |
|
|
|
|
walk(&dom.document, &mut tree); |
|
|
|
|
println!("Tree: {:?}", tree); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
fn main() { |
|
|
|
|
let example_1 = String::from(r#"<p>A simple text component</p>"#); |
|
|
|
|
build_nodes(&example_1); |
|
|
|
|
|
|
|
|
|
let example_2 = String::from( |
|
|
|
|
r#"<p><span class="invisible">THis is not visible</span><span class="ellipsis">but this is</span></p>"#, |
|
|
|
|
); |
|
|
|
|
build_nodes(&example_2); |
|
|
|
|
|
|
|
|
|
// let example_1 = String::from(
|
|
|
|
|
// r#"<p>Today I finally moved with my contact and calendar management into the terminal with <a href="https://fosstodon.org/tags/vdirsyncer" class="mention hashtag" rel="tag nofollow noopener noreferrer" target="_blank">#<span>vdirsyncer</span></a> <a href="https://fosstodon.org/tags/khal" class="mention hashtag" rel="tag nofollow noopener noreferrer" target="_blank">#<span>khal</span></a> and <a href="https://fosstodon.org/tags/khard" class="mention hashtag" rel="tag nofollow noopener noreferrer" target="_blank">#<span>khard</span></a>.</p><p>Thank you <span class="h-card"><a href="https://fosstodon.org/@hund" class="u-url mention" rel="nofollow noopener noreferrer" target="_blank">@<span>hund</span></a></span> for your great post: <a href="https://hund.tty1.se/2020/08/12/how-to-sync-and-manage-your-caldav-and-carddav-via-the-terminal.html" rel="nofollow noopener noreferrer" target="_blank"><span class="invisible">https://</span><span class="ellipsis">hund.tty1.se/2020/08/12/how-to</span><span class="invisible">-sync-and-manage-your-caldav-and-carddav-via-the-terminal.html</span></a></p><p><a href="https://fosstodon.org/tags/carddav" class="mention hashtag" rel="tag nofollow noopener noreferrer" target="_blank">#<span>carddav</span></a> <a href="https://fosstodon.org/tags/caldav" class="mention hashtag" rel="tag nofollow noopener noreferrer" target="_blank">#<span>caldav</span></a> <a href="https://fosstodon.org/tags/terminal" class="mention hashtag" rel="tag nofollow noopener noreferrer" target="_blank">#<span>terminal</span></a></p>"#,
|
|
|
|
|
// );
|
|
|
|
|
// println!("Source: {}", &example_1);
|
|
|
|
|
// println!("---------------------------------");
|
|
|
|
|
|
|
|
|
|
// let dom = parse_document(RcDom::default(), Default::default())
|
|
|
|
|
// .from_utf8()
|
|
|
|
|
// .read_from(&mut example_1.as_bytes())
|
|
|
|
|
// .unwrap();
|
|
|
|
|
// let mut tree = Node::root();
|
|
|
|
|
// walk(&dom.document, &mut tree);
|
|
|
|
|
} |
|
|
|
|