Browse Source

WIP: Build a simpler element tree

With the simpler tree, it seems it will be easier to generate the org,
without the chance of exploding the call stack.
master
Julio Biason 4 years ago
parent
commit
f6bb83026f
  1. 151
      html5test/src/main.rs

151
html5test/src/main.rs

@ -1,9 +1,12 @@
use html5ever::parse_document; use html5ever::parse_document;
use html5ever::tendril::StrTendril;
use html5ever::tendril::TendrilSink; use html5ever::tendril::TendrilSink;
use markup5ever::interface::Attribute;
use markup5ever_rcdom::Handle; use markup5ever_rcdom::Handle;
use markup5ever_rcdom::NodeData; use markup5ever_rcdom::NodeData;
use markup5ever_rcdom::RcDom; use markup5ever_rcdom::RcDom;
use std::borrow::Borrow; use std::borrow::Borrow;
use std::cell::RefCell;
use std::default::Default; use std::default::Default;
use textwrap::fill; use textwrap::fill;
use textwrap::NoHyphenation; use textwrap::NoHyphenation;
@ -22,10 +25,12 @@ use textwrap::Options;
// </p> // </p>
// //
// That would build // That would build
// root // root --------------------------\
// / | |\Code(is for code) // / | \ Code()
// Text(Text text) | Italic(for italics) // Text(Text text) | ------ Italic() |
// Link(the link, link) // Link(link) | Text(is for code)
// | Text(for italics)
// Text(the link)
// //
// Tree things to do, then: // Tree things to do, then:
// 1. Walk the DOM tree and build the text tree. // 1. Walk the DOM tree and build the text tree.
@ -36,53 +41,115 @@ use textwrap::Options;
// we could work on the text wrap, 'cause there are elements that can't be // we could work on the text wrap, 'cause there are elements that can't be
// wrapped (for example, Links) // wrapped (for example, Links)
fn go_children(input: &Handle, result: &mut String) { /// Nodes in the text tree
for child in input.children.borrow().iter() { enum NodeType {
walk(child.borrow(), result); /// The root element; produces nothing, but has the base content.
Root,
/// A text block. Contains the text itself.
Text(String),
/// A link to somewhere. Contains the link.
Link(String),
/// Italics
Italic,
/// Code block
Code,
/// A line break
LineBreak,
} }
struct Node {
r#type: NodeType,
children: Vec<Node>,
} }
fn walk(input: &Handle, result: &mut String) { impl Node {
println!(">>> {:?}", input.data); /// Build the root node
match input.data { fn root() -> Self {
NodeData::Text { ref contents } => { Self {
let text = contents.borrow().to_string(); r#type: NodeType::Root,
println!("Text: {:?}", text); children: Vec::new(),
result.push_str(&text);
} }
NodeData::Element { ref name, .. } => {
let tag = name.local.to_string();
println!("Tag: {:?}", tag);
match tag.as_ref() {
"html" | "head" | "body" => {
println!("\tIgnored tag");
go_children(input, result);
} }
"p" => {
println!("\tParagraph"); /// Build a text node
result.push_str("\n\n"); fn text(text: &str) -> Self {
go_children(input, result); Self {
r#type: NodeType::Text(text.into()),
children: Vec::new(), // XXX text nodes will never have children
} }
"span" => { }
println!("\tSpan");
if let NodeData::Element { ref attrs, .. } = input.data { /// Build a link node
fn link(href: &str) -> Self {
Self {
r#type: NodeType::Link(href.into()),
children: Vec::new(),
}
}
/// Build a linebreak node
fn line_break() -> Self {
Self {
r#type: NodeType::LineBreak,
children: Vec::new(), // XXX linebreaks will never have children
}
}
/// Add a child node to this node
fn add_child(&mut self, node: Node) {
self.children.push(node);
}
}
fn handle_text(node: &mut Node, contents: &RefCell<StrTendril>) -> bool {
let text = contents.borrow().to_string();
node.add_child(Node::text(&text));
true
}
fn handle_line_break(node: &mut Node) -> bool {
node.add_child(Node::line_break());
true
}
fn handle_span(node: &mut Node, attrs: &RefCell<Vec<Attribute>>) -> bool {
let attrs = attrs.borrow(); let attrs = attrs.borrow();
let classes = attrs let classes = attrs
.iter() .iter()
.find(|attr| attr.name.local.to_string() == "class"); .find(|attr| attr.name.local.to_string() == "class");
if let Some(class) = classes { if let Some(class) = classes {
let classes = class.value.to_string(); let classes = class.value.to_string();
if !classes.contains("invisible") { // just keep going if not invisible
go_children(input, result); // bollocks! !classes.contains("invisible")
if classes.contains("ellipsis") {
result.push_str("..."); // if !classes.contains("invisible") {
} // true
} // if classes.contains("ellipsis") {
// result.push_str("...");
// }
// }
} else { } else {
go_children(input, result); // with no classes, we consider the element visible and just keep
} // processing the list.
true
} }
} }
fn walk(input: &Handle, parent: &mut Node) {
println!(">>> {:?}", input.data);
let process_children = match input.data {
NodeData::Text { ref contents } => handle_text(parent, contents),
NodeData::Element {
ref name,
ref attrs,
..
} => {
let tag = name.local.to_string();
println!("Tag: {:?}", tag);
match tag.as_ref() {
"html" | "head" | "body" => true, // just keep going
"p" => handle_line_break(parent),
"span" => handle_span(parent, attrs),
"a" => { "a" => {
println!("\tAnchor"); println!("\tAnchor");
if let NodeData::Element { ref attrs, .. } = input.data { if let NodeData::Element { ref attrs, .. } = input.data {
@ -109,11 +176,15 @@ fn walk(input: &Handle, result: &mut String) {
} }
} }
} }
_ => {} _ => false,
} }
} }
_ => { _ => true, // if we can't deal with it, just keep going
go_children(input, result); };
if process_children {
for child in input.children.borrow().iter() {
walk(child.borrow(), parent);
} }
} }
} }
@ -129,7 +200,7 @@ fn main() {
.from_utf8() .from_utf8()
.read_from(&mut source.as_bytes()) .read_from(&mut source.as_bytes())
.unwrap(); .unwrap();
let mut result = String::new(); let mut tree = Node::root();
walk(&dom.document, &mut result); walk(&dom.document, &mut result);
println!("---------------------------------"); println!("---------------------------------");
let options = Options::new(70) let options = Options::new(70)

Loading…
Cancel
Save