From 5a93c3ba84ef7a9d929d48a3799d427f77ecc4f6 Mon Sep 17 00:00:00 2001 From: Julio Biason Date: Tue, 4 May 2021 13:41:36 -0300 Subject: [PATCH] Processing HTML to Org now, looks pretty decent --- src/storage/org.rs | 127 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 114 insertions(+), 13 deletions(-) diff --git a/src/storage/org.rs b/src/storage/org.rs index 3205ffe..5655267 100644 --- a/src/storage/org.rs +++ b/src/storage/org.rs @@ -16,26 +16,116 @@ along with this program. If not, see . */ -use chrono::prelude::*; +use std::borrow::Borrow; +use std::default::Default; use std::fs::OpenOptions; use std::io::prelude::*; use std::path::Path; use std::path::PathBuf; +use chrono::prelude::*; +use html5ever::parse_document; +use html5ever::tendril::TendrilSink; +use markup5ever_rcdom::Handle; +use markup5ever_rcdom::NodeData; +use markup5ever_rcdom::RcDom; + use crate::config::OrgConfig; use crate::storage::data::Data; use crate::storage::storage::Storage; +/// Definition for the Org storage pub struct Org { + /// The path where the file will be stored file: PathBuf, + /// The date being processed, needed for the header if it is a new file date: String, } +/// Simple macro to recursively walk through html5ever nodes +macro_rules! keep_going { + ($source:ident, $target:ident) => { + for child in $source.children.borrow().iter() { + walk(child.borrow(), $target); + } + }; +} + +/// Walk though the html5ever nodes, producing the required string in Org +/// format. +fn walk(input: &Handle, result: &mut String) { + match input.data { + NodeData::Text { ref contents } => { + let text = contents.borrow().to_string(); + result.push_str(&text); + keep_going!(input, result); + } + NodeData::Element { + ref name, + ref attrs, + .. + } => { + let tag = name.local.to_string(); + match tag.as_ref() { + "html" | "head" | "body" => keep_going!(input, result), + "p" | "br" => { + keep_going!(input, result); + result.push_str("\n "); + } + "span" => { + let attrs = attrs.borrow(); + let classes_attr = attrs + .iter() + .find(|attr| attr.name.local.to_string() == "class"); + match classes_attr { + Some(classes) => { + if classes.value.contains("ellipsis") { + keep_going!(input, result); + result.push_str("..."); + } else if !classes.value.contains("invisible") { + keep_going!(input, result); + } + } + None => keep_going!(input, result), + } + } + "a" => { + let attrs = attrs.borrow(); + let rels = attrs + .iter() + .find(|attr| attr.name.local.to_string() == "rel"); + let hrefs = attrs + .iter() + .find(|attr| attr.name.local.to_string() == "href"); + match (rels, hrefs) { + (Some(rel), Some(href)) => { + if !rel.value.to_string().contains("tag") { + result.push_str("[["); + result.push_str(&href.value); + result.push_str("]["); + keep_going!(input, result); + result.push_str("]]"); + } else { + keep_going!(input, result); + } + } + _ => keep_going!(input, result), + } + } + _ => {} + } + } + _ => { + keep_going!(input, result); + } + }; +} + impl Org { pub(crate) fn new_from_config(config: &OrgConfig) -> Org { let now = Utc::now(); - let filename = format!("{}{}{}.org", now.year(), now.month(), now.day()); - let date = format!("{}-{}-{}", now.year(), now.month(), now.day()); + let filename = format!("{:>04}{:>02}{:>02}.org", now.year(), now.month(), now.day()); + let date = format!("{:>04}-{:>02}-{:>02}", now.year(), now.month(), now.day()); let full_path = Path::new(&config.location).join(&filename); log::debug!("Org file: {}", full_path.to_string_lossy()); @@ -44,6 +134,23 @@ impl Org { date, } } + + /// Creates the title (entry) for the record + fn title(record: &Data) -> String { + return format!("* {user}/{id}", user = record.account, id = record.id); + } + + /// Creates the body of the markdown content from the incoming data + fn body(record: &Data) -> String { + let dom = parse_document(RcDom::default(), Default::default()) + .from_utf8() + .read_from(&mut record.text.as_bytes()) + .unwrap(); + let mut result = String::new(); + result.push_str(" "); // initial identantion + walk(&dom.document, &mut result); + result + } } impl Storage for Org { @@ -70,15 +177,9 @@ impl Storage for Org { }) .unwrap() }); - fp.write_all( - format!( - "* {user}/{id}\n {message}\n", - user = record.account, - id = record.id, - message = record.text, - ) - .as_bytes(), - ) - .unwrap(); + fp.write_all(Org::title(record).as_bytes()).unwrap(); + fp.write_all("\n".as_bytes()).unwrap(); + fp.write_all(Org::body(record).as_bytes()).unwrap(); + fp.write_all("\n".as_bytes()).unwrap(); } }