Skip to content

Commit

Permalink
feat: parse run properties
Browse files Browse the repository at this point in the history
  • Loading branch information
kettei-sproutty committed Jan 16, 2024
1 parent 8da5e17 commit 4852864
Show file tree
Hide file tree
Showing 10 changed files with 165 additions and 34 deletions.
Binary file modified example/bun.lockb
Binary file not shown.
18 changes: 13 additions & 5 deletions example/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -44,16 +44,24 @@
</div>
</form>

<p class="mt-10 text-center text-sm text-gray-500">
<a
<p
class="mt-10 font-mono text-center text-sm dark:text-white flex items-center justify-center text-black space-x-4"
>
<a href="https://github.com/scuderia-fe"
>Made with ❤️ by Scuderia FE</a
>
<span> - </span
><a
href="https://github.com/scuderia-fe/docx-to-html"
class="font-semibold leading-6 text-indigo-600 hover:text-indigo-500"
class="font-semibold text-indigo-600 hover:text-indigo-500"
>Repository</a
>
</p>
<p class="mt-10 text-center print:hidden dark:text-white text-black">
<p
class="mt-10 text-center font-mono print:hidden dark:text-white text-black"
>
Converted in:
<span class="font-bold underline" id="performance">__</span>
<span class="font-bold underline" id="performance">0ms</span>
</p>
</div>
</div>
Expand Down
1 change: 1 addition & 0 deletions index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
console.log("Hello via Bun!");
17 changes: 10 additions & 7 deletions src/element.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,6 @@
#![allow(dead_code)]
use std::fmt::{Display, Formatter, Result};

pub static mut CONTAINER: Element = Element {
tag: ElementTag::Div,
children: vec![],
styles: vec![],
classes: vec![],
};

pub enum ElementTag {
H1,
H2,
Expand All @@ -22,6 +15,7 @@ pub enum ElementTag {
Strong,
A,
Img,
U,
Ul,
Ol,
Li,
Expand All @@ -30,6 +24,7 @@ pub enum ElementTag {
Td,
Th,
Thead,
I,
Tbody,
Tfoot,
Blockquote,
Expand All @@ -39,6 +34,8 @@ pub enum ElementTag {
Br,
Sub,
Sup,
S,
Mark,
}

impl ElementTag {
Expand All @@ -56,6 +53,8 @@ impl ElementTag {
"FootnoteReference" => ElementTag::Sup,
"Hyperlink" => ElementTag::A,
"Strong" => ElementTag::Strong,
"Bold" => ElementTag::Strong,
"BoldCS" => ElementTag::Strong,
_ => ElementTag::P,
}
}
Expand Down Expand Up @@ -85,13 +84,17 @@ impl ElementTag {
ElementTag::Thead => "thead",
ElementTag::Tbody => "tbody",
ElementTag::Tfoot => "tfoot",
ElementTag::I => "i",
ElementTag::Blockquote => "blockquote",
ElementTag::Pre => "pre",
ElementTag::Code => "code",
ElementTag::Hr => "hr",
ElementTag::Br => "br",
ElementTag::Sub => "sub",
ElementTag::Sup => "sup",
ElementTag::S => "s",
ElementTag::U => "u",
ElementTag::Mark => "mark",
}
}
}
Expand Down
6 changes: 4 additions & 2 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
mod element;
mod options;
mod parser;
mod state;
mod utils;

use docx_rs::read_docx;
use element::CONTAINER;
use state::CONTAINER;
use wasm_bindgen::prelude::*;

#[wasm_bindgen]
Expand All @@ -28,5 +30,5 @@ pub fn convert(file: &[u8]) -> String {
.iter()
.for_each(parser::parse_child);

unsafe { element::CONTAINER.to_string() }
unsafe { CONTAINER.to_string() }
}
9 changes: 9 additions & 0 deletions src/options.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
pub struct Options {
pub style_map: Vec<(String, String)>,
}

impl Default for Options {
fn default() -> Self {
Options { style_map: vec![] }
}
}
2 changes: 1 addition & 1 deletion src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ mod table;

use docx_rs::DocumentChild;

use crate::element::CONTAINER;
use crate::state::CONTAINER;

pub fn parse_child(child: &DocumentChild) {
let children = match child {
Expand Down
37 changes: 30 additions & 7 deletions src/parser/paragraph.rs
Original file line number Diff line number Diff line change
@@ -1,18 +1,36 @@
use docx_rs::{Paragraph, ParagraphChild};
use docx_rs::{Paragraph, ParagraphChild, ParagraphProperty};

use crate::element::{Element, ElementChildren, ElementTag};

use super::{hyperlink::analyze_hyperlink, run::analyze_run};
use super::{
hyperlink::analyze_hyperlink,
run::{analyze_run, analyze_run_properties},
};

pub fn get_paragraph_properties(properties: &ParagraphProperty) -> Vec<String> {
let mut props: Vec<String> = vec![];

if let Some(alignment) = &properties.alignment.as_ref() {
props.push(format!("text-align: {}", alignment.val));
};

props
}

pub fn analyze_paragraph(paragraph: &Paragraph) -> ElementChildren {
let mut element = Element::default();

let styles = &paragraph.property.style.as_ref();
if styles.is_some() {
let styles = &styles.unwrap().val;
element.tag = ElementTag::from_style(styles)
let tag = &paragraph.property.style.as_ref();
if tag.is_some() {
let tag = &tag.unwrap().val;
element.tag = ElementTag::from_style(tag)
}

let mut run_property = analyze_run_properties(&paragraph.property.run_property);

let properties = get_paragraph_properties(&paragraph.property);
element.styles = properties;

paragraph.children.iter().for_each(|child| match child {
ParagraphChild::Run(run) => {
let mut children = analyze_run(run);
Expand All @@ -27,5 +45,10 @@ pub fn analyze_paragraph(paragraph: &Paragraph) -> ElementChildren {
_ => (),
});

ElementChildren::Element(element)
if run_property.tags.len().eq(&0) {
return ElementChildren::Element(element);
}

run_property.text = element.to_string();
return ElementChildren::Text(run_property.to_string());
}
95 changes: 83 additions & 12 deletions src/parser/run.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,73 @@
use docx_rs::{Run, RunChild, Text};
use docx_rs::{Run, RunChild, RunProperty, Text};

use crate::element::{Element, ElementChildren, ElementTag};
use crate::element::{ElementChildren, ElementTag};

pub struct RunElement {
pub tags: Vec<ElementTag>,
pub text: String,
}

impl RunElement {
pub fn to_string(&self) -> String {
let mut string = String::new();

self.tags.iter().for_each(|tag| {
string.push_str(&format!("<{}>", tag.to_string()));
});

string.push_str(&self.text);

self.tags.iter().rev().for_each(|tag| {
string.push_str(&format!("</{}>", tag.to_string()));
});

string
}
}

pub fn analyze_run_properties(run_properties: &RunProperty) -> RunElement {
let mut element = RunElement {
tags: vec![ElementTag::Span],
text: String::new(),
};

if let Some(style) = &run_properties.style {
element.tags.push(ElementTag::from_style(&style.val));
}

if run_properties.bold.is_some() {
element.tags.push(ElementTag::Strong);
};

if run_properties.italic.is_some() {
element.tags.push(ElementTag::I);
};

if run_properties.underline.is_some() {
element.tags.push(ElementTag::U);
};

if run_properties.strike.is_some() {
element.tags.push(ElementTag::S);
};

if run_properties.highlight.is_some() {
element.tags.push(ElementTag::Mark);
};

// TODO: superscript and subscript
// if run.run_property.vert_align.is_some() {
// if let Some(val) = &run.run_property.vert_align.as_ref().unwrap().val {
// match val.as_str() {
// "superscript" => element.tags.push(ElementTag::Sup),
// "subscript" => element.tags.push(ElementTag::Sub),
// _ => (),
// }
// }
// };

element
}

fn analyze_run_text(text: &Text) -> Option<ElementChildren> {
if text.text.is_empty() {
Expand All @@ -11,6 +78,8 @@ fn analyze_run_text(text: &Text) -> Option<ElementChildren> {
}

pub fn analyze_run(run: &Run) -> Vec<ElementChildren> {
let mut element = analyze_run_properties(&run.run_property);

let children = run
.children
.iter()
Expand All @@ -20,15 +89,17 @@ pub fn analyze_run(run: &Run) -> Vec<ElementChildren> {
})
.collect();

let style = &run.run_property.style.clone().unwrap_or_default();
if !style.val.is_empty() && style.val != "Normal" {
let element = Element {
tag: ElementTag::from_style(&style.val),
..Element::default()
};

vec![ElementChildren::Element(element)]
} else {
children
if element.tags.len() == 0 {
return children;
}

element.text = children
.iter()
.map(|child| match child {
ElementChildren::Text(text) => text,
_ => "",
})
.collect();

vec![ElementChildren::Text(element.to_string())]
}
14 changes: 14 additions & 0 deletions src/state.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
use crate::{
element::{Element, ElementTag},
options::Options,
};

pub static mut CONTAINER: Element = Element {
tag: ElementTag::Div,
children: vec![],
styles: vec![],
classes: vec![],
};

#[allow(dead_code)]
pub static mut OPTIONS: Options = Options { style_map: vec![] };

0 comments on commit 4852864

Please sign in to comment.