Skip to content

Commit

Permalink
Merge pull request #52 from lfoppiano/enable-text-content-2
Browse files Browse the repository at this point in the history
Enable text content
  • Loading branch information
lfoppiano authored Jun 22, 2024
2 parents 5526238 + fc2025b commit c3ef337
Show file tree
Hide file tree
Showing 5 changed files with 105 additions and 48 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,8 @@ In the following table the list of parameters that can be provided to the `pdf_v
| pages_vertical_spacing | The vertical space (in pixels) between each page of the PDF. Defaults to 2 pixels. |
| annotation_outline_size | Size of the outline around each annotation in pixels. Defaults to 1 pixel. |
| rendering | Type of rendering: `unwrap` (default), `legacy_iframe`, or `legacy_embed`. The default value, `unwrap` shows the PDF document using pdf.js, and supports the visualisation of annotations. Other values are `legacy_iframe` and `legacy_embed` which use the legacy approach of injecting the document into an `<embed>` or `<iframe>`. They allow viewing the PDF using the viewer of the browser that contains additional features we are still working to implement in this component. **IMPORTANT**: :warning: The "legacy" methods **work only with Firefox**, and **do not support annotations**. :warning:|
| pages_to_render | Filter the rendering to a specific set of pages. By default, all pages are rendered. |
| pages_to_render | Filter the rendering to a specific set of pages. By default, all pages are rendered. |
| render_text | Enable a layer of text on top of the PDF document. The text may be selected and copied. **NOTE** to avoid breaking existing deployments, we made this optional at first, also considering that having many annotations might interfere with the copy-paste.


### Annotation format
Expand Down
16 changes: 10 additions & 6 deletions streamlit_pdf_viewer/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,16 @@
)


def pdf_viewer(input: Union[str, Path, bytes],
def pdf_viewer(input: Union[str, Path, bytes],
width: int = None,
height: int = None,
key=None,
annotations: list = (),
pages_vertical_spacing: int = 2,
annotation_outline_size: int = 1,
rendering: str = RENDERING_UNWRAP,
pages_to_render: List[int] = ()
pages_to_render: List[int] = (),
render_text: bool = False
):
"""
pdf_viewer function to display a PDF file in a Streamlit app.
Expand All @@ -50,6 +51,7 @@ def pdf_viewer(input: Union[str, Path, bytes],
"legacy_iframe" and "legacy_embed" which uses the legacy approach for showing PDF document with streamlit.
These methods enable the default pdf viewer of Firefox/Chrome/Edge that contains additional features we are still
working to implement for the "unwrap" method.
:param render_text: Whether to enable selection of text in the PDF viewer. Defaults to False.
The function reads the PDF file (from a file path, URL, or binary data), encodes it in base64,
and uses a Streamlit component to render it in the app. It supports optional annotations and adjustable margins.
Expand All @@ -73,7 +75,7 @@ def pdf_viewer(input: Union[str, Path, bytes],

if rendering == RENDERING_IFRAME or rendering == RENDERING_EMBED:
print(f"{RENDERING_IFRAME} and {RENDERING_EMBED} may not work consistently on all browsers "
f"they might disapper in future releases.")
f"they might disapper in future releases.")
if height is None:
height = "100%"

Expand All @@ -88,7 +90,8 @@ def pdf_viewer(input: Union[str, Path, bytes],
pages_vertical_spacing=pages_vertical_spacing,
annotation_outline_size=annotation_outline_size,
rendering=rendering,
pages_to_render=pages_to_render
pages_to_render=pages_to_render,
render_text=render_text
)
return component_value

Expand All @@ -97,11 +100,12 @@ def pdf_viewer(input: Union[str, Path, bytes],
with open("resources/test.pdf", 'rb') as fo:
binary = fo.read()

with open("resources/annotations.json", 'rb') as fo:
with open("resources/annotations.sample.json", 'rb') as fo:
annotations = json.loads(fo.read())

viewer = pdf_viewer(
binary,
width=800,
annotations=annotations
annotations=annotations,
render_text=True
)
27 changes: 12 additions & 15 deletions streamlit_pdf_viewer/frontend/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,24 +8,21 @@
},
"dependencies": {
"core-js": "^3.6.5",
"pdfjs-dist": "^3.6.172",
"streamlit-component-lib": "^1.1.1",
"pdfjs-dist": "^4.2.67",
"streamlit-component-lib": "^2.0.0",
"vue": "^3.0.0-0"
},
"devDependencies": {
"@babel/plugin-proposal-class-properties": "^7.18.6",
"@babel/plugin-proposal-private-methods": "^7.18.6",
"@typescript-eslint/eslint-plugin": "^2.33.0",
"@typescript-eslint/parser": "^2.33.0",
"@vue/cli-plugin-babel": "~4.5.0",
"@vue/cli-plugin-eslint": "~4.5.0",
"@vue/cli-plugin-typescript": "^4.5.0",
"@vue/cli-service": "~4.5.0",
"@vue/compiler-sfc": "^3.0.0-0",
"@vue/eslint-config-typescript": "^5.0.2",
"@typescript-eslint/eslint-plugin": "^7.10.0",
"@typescript-eslint/parser": "^7.10.0",
"@vue/cli-plugin-babel": "^5.0.8",
"@vue/cli-plugin-typescript": "^5.0.8",
"@vue/cli-service": "^5.0.8",
"@vue/compiler-sfc": "^3.4.27",
"@vue/eslint-config-typescript": "^13.0.0",
"babel-eslint": "^10.1.0",
"eslint": "^6.7.2",
"eslint-plugin-vue": "^7.0.0-0",
"typescript": "4"
"typescript": "^5.4.5",
"vue-loader": "^17.4.2",
"webpack": "^5.91.0"
}
}
95 changes: 73 additions & 22 deletions streamlit_pdf_viewer/frontend/src/PdfViewer.vue
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,12 @@
<div id="pdfContainer" :style="pdfContainerStyle">
<div v-if="args.rendering==='unwrap'">
<div id="pdfViewer" :style="pdfViewerStyle">
<!-- <div class="urlsAnnotations"></div>-->
<div id="pdfAnnotations" v-if="args.annotations">
<div v-for="(annotation, index) in filteredAnnotations" :key="index" :style="getPageStyle">
<div :style="getAnnotationStyle(annotation)" :id="`annotation-${index}`"></div>
</div>
<div id="pdfAnnotations" v-if="args.annotations">
<div v-for="(annotation, index) in filteredAnnotations" :key="index" :style="getPageStyle">
<div :style="getAnnotationStyle(annotation)" :id="`annotation-${index}`"></div>
</div>
</div>
</div>
</div>
<div v-else-if="args.rendering==='legacy_embed'">
<embed :src="`data:application/pdf;base64,${args.binary}`" :width="`${args.width}`" :height="`${args.height}`" type="application/pdf"/>
Expand All @@ -24,9 +23,15 @@

<script>
import { onMounted, onUpdated, computed, ref} from "vue";
import "pdfjs-dist/build/pdf.worker.entry";
import "pdfjs-dist/web/pdf_viewer.css";
import "pdfjs-dist/build/pdf.worker.mjs";
import {getDocument} from "pdfjs-dist/build/pdf";
import {Streamlit} from "streamlit-component-lib";
import * as pdfjsLib from "pdfjs-dist";
const CMAP_URL = "pdfjs-dist/cmaps/";
const CMAP_PACKED = true;
const ENABLE_XFA = true;
export default {
props: ["args"],
Expand All @@ -43,12 +48,13 @@ export default {
if (isRenderingAllPages) {
return props.args.annotations;
}
const filteredAnnotations = props.args.annotations.filter(anno => {
return props.args.annotations.filter(anno => {
return props.args.pages_to_render.includes(Number(anno.page))
})
return filteredAnnotations;
});
const renderText = props.args.render_text === true
const pdfContainerStyle = computed(() => ({
width: props.args.width ? `${props.args.width}px` : `${maxWidth.value}px`,
height: props.args.height ? `${props.args.height}px` : 'auto',
Expand Down Expand Up @@ -84,7 +90,8 @@ export default {
height: `${annoObj.height * scale}px`,
outline: `${props.args.annotation_outline_size * scale}px solid`,
outlineColor: annoObj.color,
cursor: 'pointer'
cursor: 'pointer',
'z-index': 10
};
};
Expand All @@ -98,8 +105,6 @@ export default {
const createCanvasForPage = (page, scale, rotation, pageNumber) => {
const viewport = page.getViewport({scale, rotation});
// console.log(`Page viewport size: ${viewport.width}, ${viewport.height}`)
const ratio = window.devicePixelRatio || 1
const canvas = document.createElement("canvas");
Expand All @@ -115,18 +120,53 @@ export default {
};
const renderPage = async (page, canvas) => {
const renderPage = async (page, canvas, viewport) => {
const renderContext = {
canvasContext: canvas.getContext("2d"),
viewport: page.getViewport({
scale: pageScales.value[page._pageIndex],
rotation: page.rotate,
intent: "print",
})
viewport: viewport
};
const renderTask = page.render(renderContext);
await renderTask.promise;
if (renderText) {
const textContent = await page.getTextContent();
const textLayerDiv = document.createElement("div");
textLayerDiv.className = "textLayer"
// textLayerDiv.style.position = "absolute";
// textLayerDiv.style.height = `${viewport.height}px`;
// textLayerDiv.style.width = `${viewport.width}px`;
pdfjsLib.renderTextLayer({
textContentSource: textContent,
container: textLayerDiv,
viewport: viewport,
textDivs: []
})
const pageDiv = document.createElement('div');
pageDiv.className = 'page';
const canvasWrapper = document.createElement('div');
canvasWrapper.className = 'canvasWrapper';
canvasWrapper.appendChild(canvas);
pageDiv.style = "position: relative;";
const pdfViewer = document.getElementById("pdfViewer");
pageDiv.appendChild(canvasWrapper);
pageDiv.appendChild(textLayerDiv);
pdfViewer.appendChild(pageDiv);
} else {
const pageDiv = document.createElement('div');
pageDiv.className = 'page';
const canvasWrapper = document.createElement('div');
canvasWrapper.className = 'canvasWrapper';
canvasWrapper.appendChild(canvas);
pageDiv.style = "position: relative;";
pageDiv.appendChild(canvasWrapper);
pdfViewer.appendChild(pageDiv);
}
};
const renderPdfPages = async (pdf, pdfViewer, pagesToRender = null) => {
Expand All @@ -152,11 +192,18 @@ export default {
pageHeights.value.push(unscaledViewport.height)
if (pagesToRender.includes(pageNumber)) {
const canvas = createCanvasForPage(page, scale, rotation, pageNumber)
pdfViewer?.append(canvas)
// pdfViewer?.append(canvas)
pdfViewer.style.setProperty('--scale-factor', scale);
const viewport = page.getViewport({
scale: pageScales.value[page._pageIndex],
rotation: page.rotate,
intent: "print",
});
const ratio = window.devicePixelRatio || 1
totalHeight.value += canvas.height / ratio
await renderPage(page, canvas)
await renderPage(page, canvas, viewport)
}
}
// Subtract the margin for the last page as it's not needed
Expand All @@ -172,7 +219,12 @@ export default {
const loadPdfs = async (url) => {
try {
const loadingTask = await getDocument(url);
const loadingTask = await getDocument({
"url": url,
"cMapUrl": CMAP_URL,
"cMapPacked": CMAP_PACKED,
"enableXfa": ENABLE_XFA,
});
const pdfViewer = document.getElementById("pdfViewer");
clearExistingCanvases(pdfViewer);
Expand Down Expand Up @@ -216,7 +268,6 @@ export default {
});
onUpdated(() => {
// console.log("onUpdated")
setFrameHeight();
});
Expand Down
12 changes: 8 additions & 4 deletions streamlit_pdf_viewer/frontend/vue.config.js
Original file line number Diff line number Diff line change
@@ -1,20 +1,24 @@
const {VueLoaderPlugin} = require('vue-loader');
module.exports = {
publicPath: './',
configureWebpack: {
target: 'web',
module: {
rules: [
{
test: /\.js$/,
test: /\.(js|mjs)$/,
include: /node_modules\/pdfjs-dist/,
use: {
loader: 'babel-loader',
options: {
presets: ['@babel/preset-env'],
plugins: ['@babel/plugin-proposal-class-properties', '@babel/plugin-proposal-private-methods']
presets: [['@babel/preset-env', {targets: {esmodules: true}}]],
}
}
}
]
}
},
plugins: [
new VueLoaderPlugin()
],
}
}

0 comments on commit c3ef337

Please sign in to comment.