Browse Source

Somewhat doing some progress

master
Julio Biason 4 years ago
parent
commit
ea6874c702
  1. 1
      html5test/Cargo.lock
  2. 1
      html5test/Cargo.toml
  3. 224
      html5test/src/main.rs

1
html5test/Cargo.lock generated

@ -46,6 +46,7 @@ name = "html5test"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"html5ever", "html5ever",
"markup5ever",
"markup5ever_rcdom", "markup5ever_rcdom",
"textwrap", "textwrap",
] ]

1
html5test/Cargo.toml

@ -9,4 +9,5 @@ edition = "2018"
[dependencies] [dependencies]
html5ever = "0.25" html5ever = "0.25"
markup5ever_rcdom = "0.1" markup5ever_rcdom = "0.1"
markup5ever = "0.10"
textwrap = "0.13" textwrap = "0.13"

224
html5test/src/main.rs

@ -8,9 +8,6 @@ use markup5ever_rcdom::RcDom;
use std::borrow::Borrow; use std::borrow::Borrow;
use std::cell::RefCell; use std::cell::RefCell;
use std::default::Default; use std::default::Default;
use textwrap::fill;
use textwrap::NoHyphenation;
use textwrap::Options;
// This go_children/walk is stupid, but I shot myself in the foot by adding // This go_children/walk is stupid, but I shot myself in the foot by adding
// things after the children, link on links. // things after the children, link on links.
@ -42,21 +39,25 @@ use textwrap::Options;
// wrapped (for example, Links) // wrapped (for example, Links)
/// Nodes in the text tree /// Nodes in the text tree
#[derive(Debug)]
enum NodeType { enum NodeType {
/// The root element; produces nothing, but has the base content. /// The root element; produces nothing, but has the base content.
Root, Root,
/// A text block. Contains the text itself. /// A text block. Contains the text itself.
Text(String), Text(String),
/// A link to somewhere. Contains the link.
Link(String),
/// Italics
Italic,
/// Code block
Code,
/// A line break /// A line break
LineBreak, LineBreak,
// /// A link to somewhere. Contains the link.
// Link(String),
// /// Italics
// Italic,
// /// Code block
// Code,
// /// A block with an ellipsis at the end
// Ellipsis,
} }
#[derive(Debug)]
struct Node { struct Node {
r#type: NodeType, r#type: NodeType,
children: Vec<Node>, children: Vec<Node>,
@ -75,14 +76,6 @@ impl Node {
fn text(text: &str) -> Self { fn text(text: &str) -> Self {
Self { Self {
r#type: NodeType::Text(text.into()), r#type: NodeType::Text(text.into()),
children: Vec::new(), // XXX text nodes will never have children
}
}
/// Build a link node
fn link(href: &str) -> Self {
Self {
r#type: NodeType::Link(href.into()),
children: Vec::new(), children: Vec::new(),
} }
} }
@ -91,53 +84,105 @@ impl Node {
fn line_break() -> Self { fn line_break() -> Self {
Self { Self {
r#type: NodeType::LineBreak, r#type: NodeType::LineBreak,
children: Vec::new(), // XXX linebreaks will never have children children: Vec::new(),
} }
} }
// /// Build a link node
// fn link(href: &str) -> Self {
// Self {
// r#type: NodeType::Link(href.into()),
// children: Vec::new(),
// }
// }
// /// Build a ellipsis node
// fn ellipsis() -> Self {
// Self {
// r#type: NodeType::Ellipsis,
// children: Vec::new(),
// }
// }
/// Add a child node to this node /// Add a child node to this node
fn add_child(&mut self, node: Node) { fn add_child(&mut self, node: Node) {
self.children.push(node); self.children.push(node);
} }
} }
fn handle_text(node: &mut Node, contents: &RefCell<StrTendril>) -> bool { // Handle functions can return a three state result:
// 1. Do not process the children of the current Handle
// 2. Process the children and add to the same parent
// 3. Use the new Node as parent for future children.
/// Result of the handling functions
enum HandleResult {
/// Stop processing, don't continue generating nodes
Stop,
/// Follow the children, but don't add any nodes in the current level
Follow,
// /// Produce a new node, but don't attach any children to it
// AddAndStay(Node),
/// Assume a new parent node
AddAndAdopt(Node),
}
/// Handle a simple block of text
fn handle_text(node: &mut Node, contents: &RefCell<StrTendril>) -> HandleResult {
let text = contents.borrow().to_string(); let text = contents.borrow().to_string();
node.add_child(Node::text(&text)); node.add_child(Node::text(&text));
true HandleResult::Stop
} }
fn handle_line_break(node: &mut Node) -> bool { /// Handle an incoming line break
node.add_child(Node::line_break()); fn handle_line_break() -> HandleResult {
true let line_break = Node::line_break();
HandleResult::AddAndAdopt(line_break)
} }
fn handle_span(node: &mut Node, attrs: &RefCell<Vec<Attribute>>) -> bool { /// Process the span content
fn handle_span(attrs: &RefCell<Vec<Attribute>>) -> HandleResult {
let attrs = attrs.borrow(); let attrs = attrs.borrow();
let classes = attrs let classes_attr = attrs
.iter() .iter()
.find(|attr| attr.name.local.to_string() == "class"); .find(|attr| attr.name.local.to_string() == "class");
if let Some(class) = classes { match classes_attr {
let classes = class.value.to_string(); Some(classes) => {
// just keep going if not invisible if classes.value.contains("invisible") {
!classes.contains("invisible") HandleResult::Stop
// if !classes.contains("invisible") {
// true
// if classes.contains("ellipsis") {
// result.push_str("...");
// }
// }
} else { } else {
// with no classes, we consider the element visible and just keep HandleResult::Follow
// processing the list. }
true }
None => HandleResult::Follow,
} }
} }
// fn handle_anchor(node: &mut Node, attrs: &RefCell<Vec<Attribute>>) -> HandleResult {
// let attrs = attrs.borrow();
// let rels = attrs
// .iter()
// .find(|attr| attr.name.local.to_string() == "rel");
// let hrefs = attrs
// .iter()
// .find(|attr| attr.name.local.to_string() == "href");
// match (rels, hrefs) {
// (Some(rel), Some(href)) => {
// if !rel.value.to_string().contains("tag") {
// let new_node = Node::link(&href.value);
// node.add_child(new_node);
// HandleResult::NewNode(new_node)
// } else {
// HandleResult::Keep
// }
// }
// _ => HandleResult::Stop,
// }
// }
fn walk(input: &Handle, parent: &mut Node) { fn walk(input: &Handle, parent: &mut Node) {
println!(">>> {:?}", input.data); // println!(">>> {:?}", input.data);
let process_children = match input.data { let element = match input.data {
NodeData::Text { ref contents } => handle_text(parent, contents), NodeData::Text { ref contents } => handle_text(parent, contents),
NodeData::Element { NodeData::Element {
ref name, ref name,
@ -145,67 +190,68 @@ fn walk(input: &Handle, parent: &mut Node) {
.. ..
} => { } => {
let tag = name.local.to_string(); let tag = name.local.to_string();
println!("Tag: {:?}", tag);
match tag.as_ref() { match tag.as_ref() {
"html" | "head" | "body" => true, // just keep going "html" | "head" | "body" => HandleResult::Follow,
"p" => handle_line_break(parent), "p" => handle_line_break(),
"span" => handle_span(parent, attrs), "span" => handle_span(attrs),
"a" => { // "a" => handle_anchor(parent, attrs),
println!("\tAnchor"); _ => HandleResult::Stop,
if let NodeData::Element { ref attrs, .. } = input.data {
let attrs = attrs.borrow();
let rels = attrs
.iter()
.find(|attr| attr.name.local.to_string() == "rel");
let hrefs = attrs
.iter()
.find(|attr| attr.name.local.to_string() == "href");
match (rels, hrefs) {
(Some(rel), Some(href)) => {
if !rel.value.to_string().contains("tag") {
result.push_str("[[");
result.push_str(&href.value.to_string());
result.push_str("][");
go_children(input, result);
result.push_str("]]");
} else {
go_children(input, result);
}
}
_ => {}
}
} }
} }
_ => false, _ => HandleResult::Follow, // if we can't deal with it, just keep going
}
}
_ => true, // if we can't deal with it, just keep going
}; };
if process_children { match element {
HandleResult::Stop => {}
HandleResult::Follow => {
for child in input.children.borrow().iter() { for child in input.children.borrow().iter() {
walk(child.borrow(), parent); walk(child.borrow(), parent);
} }
} }
// HandleResult::AddAndStay(new_node) => {
// parent.add_child(new_node);
// for child in input.children.borrow().iter() {
// walk(child.borrow(), parent);
// }
// }
HandleResult::AddAndAdopt(mut new_node) => {
for child in input.children.borrow().iter() {
walk(child.borrow(), &mut new_node);
}
parent.add_child(new_node);
}
}
} }
fn main() { fn build_nodes(text: &str) {
let source = String::from(
r#"<p>Today I finally moved with my contact and calendar management into the terminal with <a href="https://fosstodon.org/tags/vdirsyncer" class="mention hashtag" rel="tag nofollow noopener noreferrer" target="_blank">#<span>vdirsyncer</span></a> <a href="https://fosstodon.org/tags/khal" class="mention hashtag" rel="tag nofollow noopener noreferrer" target="_blank">#<span>khal</span></a> and <a href="https://fosstodon.org/tags/khard" class="mention hashtag" rel="tag nofollow noopener noreferrer" target="_blank">#<span>khard</span></a>.</p><p>Thank you <span class="h-card"><a href="https://fosstodon.org/@hund" class="u-url mention" rel="nofollow noopener noreferrer" target="_blank">@<span>hund</span></a></span> for your great post: <a href="https://hund.tty1.se/2020/08/12/how-to-sync-and-manage-your-caldav-and-carddav-via-the-terminal.html" rel="nofollow noopener noreferrer" target="_blank"><span class="invisible">https://</span><span class="ellipsis">hund.tty1.se/2020/08/12/how-to</span><span class="invisible">-sync-and-manage-your-caldav-and-carddav-via-the-terminal.html</span></a></p><p><a href="https://fosstodon.org/tags/carddav" class="mention hashtag" rel="tag nofollow noopener noreferrer" target="_blank">#<span>carddav</span></a> <a href="https://fosstodon.org/tags/caldav" class="mention hashtag" rel="tag nofollow noopener noreferrer" target="_blank">#<span>caldav</span></a> <a href="https://fosstodon.org/tags/terminal" class="mention hashtag" rel="tag nofollow noopener noreferrer" target="_blank">#<span>terminal</span></a></p>"#,
);
println!("Source: {}", &source);
println!("---------------------------------");
let dom = parse_document(RcDom::default(), Default::default()) let dom = parse_document(RcDom::default(), Default::default())
.from_utf8() .from_utf8()
.read_from(&mut source.as_bytes()) .read_from(&mut text.as_bytes())
.unwrap(); .unwrap();
let mut tree = Node::root(); let mut tree = Node::root();
walk(&dom.document, &mut result); walk(&dom.document, &mut tree);
println!("---------------------------------"); println!("Tree: {:?}", tree);
let options = Options::new(70) }
.initial_indent(" ")
.subsequent_indent(" ") fn main() {
.splitter(NoHyphenation); let example_1 = String::from(r#"<p>A simple text component</p>"#);
println!("{}", fill(&result.trim(), &options)); build_nodes(&example_1);
let example_2 = String::from(
r#"<p><span class="invisible">THis is not visible</span><span class="ellipsis">but this is</span></p>"#,
);
build_nodes(&example_2);
// let example_1 = String::from(
// r#"<p>Today I finally moved with my contact and calendar management into the terminal with <a href="https://fosstodon.org/tags/vdirsyncer" class="mention hashtag" rel="tag nofollow noopener noreferrer" target="_blank">#<span>vdirsyncer</span></a> <a href="https://fosstodon.org/tags/khal" class="mention hashtag" rel="tag nofollow noopener noreferrer" target="_blank">#<span>khal</span></a> and <a href="https://fosstodon.org/tags/khard" class="mention hashtag" rel="tag nofollow noopener noreferrer" target="_blank">#<span>khard</span></a>.</p><p>Thank you <span class="h-card"><a href="https://fosstodon.org/@hund" class="u-url mention" rel="nofollow noopener noreferrer" target="_blank">@<span>hund</span></a></span> for your great post: <a href="https://hund.tty1.se/2020/08/12/how-to-sync-and-manage-your-caldav-and-carddav-via-the-terminal.html" rel="nofollow noopener noreferrer" target="_blank"><span class="invisible">https://</span><span class="ellipsis">hund.tty1.se/2020/08/12/how-to</span><span class="invisible">-sync-and-manage-your-caldav-and-carddav-via-the-terminal.html</span></a></p><p><a href="https://fosstodon.org/tags/carddav" class="mention hashtag" rel="tag nofollow noopener noreferrer" target="_blank">#<span>carddav</span></a> <a href="https://fosstodon.org/tags/caldav" class="mention hashtag" rel="tag nofollow noopener noreferrer" target="_blank">#<span>caldav</span></a> <a href="https://fosstodon.org/tags/terminal" class="mention hashtag" rel="tag nofollow noopener noreferrer" target="_blank">#<span>terminal</span></a></p>"#,
// );
// println!("Source: {}", &example_1);
// println!("---------------------------------");
// let dom = parse_document(RcDom::default(), Default::default())
// .from_utf8()
// .read_from(&mut example_1.as_bytes())
// .unwrap();
// let mut tree = Node::root();
// walk(&dom.document, &mut tree);
} }

Loading…
Cancel
Save