diff --git a/Cargo.lock b/Cargo.lock index 014818b..9b48992 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -20,6 +20,12 @@ version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" +[[package]] +name = "base64" +version = "0.21.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" + [[package]] name = "bitflags" version = "1.3.2" @@ -86,7 +92,7 @@ name = "docx-rs" version = "0.4.7" source = "git+https://github.com/kettei-sproutty/docx-rs?branch=main#6441f26f6935a6fedba11e20d53876d4ddddfcad" dependencies = [ - "base64", + "base64 0.13.1", "image", "serde", "serde_json", @@ -99,8 +105,10 @@ dependencies = [ name = "docx-to-html" version = "0.5.0" dependencies = [ + "base64 0.21.7", "console_error_panic_hook", "docx-rs", + "serde", "wasm-bindgen", "wasm-bindgen-test", ] diff --git a/Cargo.toml b/Cargo.toml index 01a146a..0ebbb61 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,6 +23,8 @@ wasm-bindgen = "0.2.84" # code size when deploying. console_error_panic_hook = { version = "0.1.7", optional = true } docx-rs = { git = "https://github.com/kettei-sproutty/docx-rs", branch = "main" } +serde = { version = "1.0.195", features = ["derive"] } +base64 = "0.21.7" [dev-dependencies] wasm-bindgen-test = "0.3.34" diff --git a/src/element.rs b/src/element.rs index c5387ef..d1fa1ed 100644 --- a/src/element.rs +++ b/src/element.rs @@ -1,6 +1,7 @@ #![allow(dead_code)] use std::fmt::{Display, Formatter, Result}; +#[derive(PartialEq, Eq)] pub enum ElementTag { H1, H2, @@ -111,6 +112,7 @@ pub enum ElementChildren { } pub struct Element { + pub id: Option, pub tag: ElementTag, pub styles: Vec, pub classes: Vec, @@ -138,6 +140,7 @@ impl Element { impl Default for Element { fn default() -> Self { Element { + id: None, tag: ElementTag::P, children: vec![], styles: vec![], diff --git a/src/image.rs b/src/image.rs new file mode 100644 index 0000000..96cb1ee --- /dev/null +++ b/src/image.rs @@ -0,0 +1,27 @@ +pub struct HtmlImage { + pub id: String, + pub src: String, + pub size: (u32, u32), +} + +impl HtmlImage { + pub fn to_string(&self) -> String { + format!( + "", + self.id, + self.src, + format!("{}px", self.size.0 / 10000), + format!("{}px", self.size.1 / 10000), + ) + } +} + +impl Default for HtmlImage { + fn default() -> Self { + HtmlImage { + id: String::new(), + src: String::new(), + size: (0, 0), + } + } +} diff --git a/src/lib.rs b/src/lib.rs index afa89ed..8df5c7a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,11 +1,14 @@ mod element; +mod image; mod options; mod parser; mod state; mod utils; +use base64::{engine::general_purpose, Engine as _}; use docx_rs::read_docx; -use state::CONTAINER; +use image::HtmlImage; +use state::{CONTAINER, IMAGES}; use wasm_bindgen::prelude::*; #[wasm_bindgen] @@ -18,11 +21,24 @@ pub fn convert(file: &[u8]) -> String { utils::set_panic_hook(); unsafe { CONTAINER.children.clear(); + IMAGES.clear() } let document = read_docx(file).unwrap(); - // let images = &document.images; - // alert(format!("images: {:?}", images).as_str()); + + let images = &document.images; + images.iter().for_each(|img| { + let (id, _file_type, image, _png) = img; + + let src = general_purpose::STANDARD.encode(&image.0); + let image = HtmlImage { + id: id.to_string(), + src: format!("data:image/png;base64,{}", src), + ..Default::default() + }; + + unsafe { IMAGES.push(image) } + }); document .document diff --git a/src/parser/paragraph.rs b/src/parser/paragraph.rs index 75f0e49..304852c 100644 --- a/src/parser/paragraph.rs +++ b/src/parser/paragraph.rs @@ -1,6 +1,9 @@ use docx_rs::{Paragraph, ParagraphChild, ParagraphProperty}; -use crate::element::{Element, ElementChildren, ElementTag}; +use crate::{ + alert, + element::{Element, ElementChildren, ElementTag}, +}; use super::{ hyperlink::analyze_hyperlink, @@ -21,9 +24,19 @@ pub fn analyze_paragraph(paragraph: &Paragraph) -> ElementChildren { let mut element = Element::default(); let tag = ¶graph.property.style.as_ref(); - if tag.is_some() { - let tag = &tag.unwrap().val; - element.tag = ElementTag::from_style(tag) + if let Some(tag) = tag { + if tag.val.eq("ListParagraph") { + let numbering_property = ¶graph.property.numbering_property.as_ref(); + if let Some(property) = numbering_property { + let id = &property.id.as_ref().unwrap().id; + match id { + _ => element.tag = ElementTag::Ul, + } + } + } else { + let tag = &tag.val; + element.tag = ElementTag::from_style(tag); + } } let mut run_property = analyze_run_properties(¶graph.property.run_property); @@ -34,6 +47,14 @@ pub fn analyze_paragraph(paragraph: &Paragraph) -> ElementChildren { paragraph.children.iter().for_each(|child| match child { ParagraphChild::Run(run) => { let mut children = analyze_run(run); + if element.tag == ElementTag::Ul || element.tag == ElementTag::Ol { + children.iter_mut().for_each(|child| { + if let ElementChildren::Element(child) = child { + alert(&format!("child: {}", child.to_string())); + child.tag = ElementTag::Li; + } + }); + } element.children.append(&mut children); } ParagraphChild::Hyperlink(hyperlink) => { diff --git a/src/parser/run.rs b/src/parser/run.rs index c03a7b0..56e7c44 100644 --- a/src/parser/run.rs +++ b/src/parser/run.rs @@ -1,6 +1,10 @@ -use docx_rs::{Run, RunChild, RunProperty, Text}; +use docx_rs::{Drawing, DrawingData, Run, RunChild, RunProperty, Text}; -use crate::element::{ElementChildren, ElementTag}; +use crate::{ + element::{ElementChildren, ElementTag}, + image::HtmlImage, + state::IMAGES, +}; pub struct RunElement { pub tags: Vec, @@ -100,6 +104,32 @@ fn analyze_run_text(text: &Text) -> Option { Some(ElementChildren::Text(text.text.to_string())) } +fn analyze_run_image(image: &Drawing) -> Option { + match &image.data { + Some(DrawingData::Pic(pic)) => unsafe { + let image = IMAGES.iter().find(|picture| picture.id.eq(&pic.id)); + if image.is_none() { + return None; + } + + let image = image.unwrap(); + if image.src.is_empty() { + return None; + } + + let img = HtmlImage { + id: image.id.clone(), + src: image.src.clone(), + size: pic.size, + }; + + return Some(ElementChildren::Text(img.to_string())); + }, + Some(DrawingData::TextBox(_)) => None, + None => None, + } +} + pub fn analyze_run(run: &Run) -> Vec { let mut element = analyze_run_properties(&run.run_property); @@ -108,6 +138,7 @@ pub fn analyze_run(run: &Run) -> Vec { .iter() .filter_map(|child| match child { RunChild::Text(text) => analyze_run_text(text), + RunChild::Drawing(image) => analyze_run_image(image), _ => None, }) .collect(); diff --git a/src/state.rs b/src/state.rs index 1135b09..83d0c32 100644 --- a/src/state.rs +++ b/src/state.rs @@ -1,9 +1,11 @@ use crate::{ element::{Element, ElementTag}, + image::HtmlImage, options::Options, }; pub static mut CONTAINER: Element = Element { + id: None, tag: ElementTag::Div, children: vec![], styles: vec![], @@ -12,3 +14,5 @@ pub static mut CONTAINER: Element = Element { #[allow(dead_code)] pub static mut OPTIONS: Options = Options { style_map: vec![] }; + +pub static mut IMAGES: Vec = Vec::new();