From 7aea6695cba5a530087525608a6938adb91ff675 Mon Sep 17 00:00:00 2001 From: Luca Foppiano Date: Fri, 16 Aug 2024 09:07:31 +0200 Subject: [PATCH 01/10] save annotations and page identifiers to be returned to the streamlit backend --- streamlit_pdf_viewer/__init__.py | 8 ++- .../frontend/src/PdfViewer.vue | 56 ++++++++++++------- 2 files changed, 42 insertions(+), 22 deletions(-) diff --git a/streamlit_pdf_viewer/__init__.py b/streamlit_pdf_viewer/__init__.py index 676d2c8..ad90a65 100644 --- a/streamlit_pdf_viewer/__init__.py +++ b/streamlit_pdf_viewer/__init__.py @@ -138,6 +138,12 @@ def pdf_viewer(input: Union[str, Path, bytes], render_text=True, key="bao" ) + st.markdown(viewer) + st.markdown(type(viewer)) + + annotations_id = viewer['annotations'] + pages_id = viewer['pages'] + with tab2: st.markdown("tab 2") viewer2 = pdf_viewer( @@ -147,4 +153,4 @@ def pdf_viewer(input: Union[str, Path, bytes], render_text=True, key="miao", resolution_boost=4 - ) \ No newline at end of file + ) diff --git a/streamlit_pdf_viewer/frontend/src/PdfViewer.vue b/streamlit_pdf_viewer/frontend/src/PdfViewer.vue index dc8b2ce..879820e 100644 --- a/streamlit_pdf_viewer/frontend/src/PdfViewer.vue +++ b/streamlit_pdf_viewer/frontend/src/PdfViewer.vue @@ -4,7 +4,8 @@
-
+ +
@@ -41,6 +42,8 @@ export default { const maxWidth = ref(0); const pageScales = ref([]); const pageHeights = ref([]); + const loadedPages = ref([]); + const loadedAnnotations = ref([]); const isRenderingAllPages = props.args.pages_to_render.length === 0; @@ -80,9 +83,9 @@ export default { return height; }; - const getAnnotationStyle = (annoObj) => { + const getAnnotationStyle = (annoObj, index) => { const scale = pageScales.value[annoObj.page - 1]; - return { + const obj = { position: 'absolute', left: `${annoObj.x * scale}px`, top: `${calculatePdfsHeight(annoObj.page) + annoObj.y * scale}px`, @@ -93,6 +96,11 @@ export default { cursor: 'pointer', 'z-index': 10 }; + if (index) { + obj['id'] = `annotation-${index}`; + loadedAnnotations.value.push(obj['id']) + } + return obj }; const clearExistingCanvases = (pdfViewer) => { @@ -143,7 +151,7 @@ export default { textDivs: [] }) await textLayer.render() - + const pageDiv = document.createElement('div'); pageDiv.className = 'page'; @@ -194,17 +202,8 @@ export default { rotation: rotation, }) - // console.log(`unscaledViewport ${pageNumber}`) - // console.log(unscaledViewport) - - // console.log("Max width") - // console.log(maxWidth.value) - - // console.log("Height") - // console.log(props.args.height) - if (props.args.height > 0) { - let widthScale = unscaledViewport.width / unscaledViewport.height + const widthScale = unscaledViewport.width / unscaledViewport.height const possibleScaledWidth = widthScale * props.args.height if (maxWidth.value === 0) { maxWidth.value = possibleScaledWidth @@ -220,9 +219,6 @@ export default { if (pagesToRender.includes(pageNumber)) { const canvas = createCanvasForPage(page, scale, rotation, pageNumber, resolutionBoost) - // console.log(`canvas`) - // console.log(canvas) - // pdfViewer?.append(canvas) pdfViewer.style.setProperty('--scale-factor', scale); @@ -232,12 +228,13 @@ export default { intent: "print", }); - // console.log(`Scaled viewport`) - // console.log(viewport) - const ratio = (window.devicePixelRatio || 1) * resolutionBoost totalHeight.value += canvas.height / ratio await renderPage(page, canvas, viewport) + if (canvas.id !== undefined) { + console.log(canvas.id) + loadedPages.value.push(canvas.id) + } } } // Subtract the margin for the last page as it's not needed @@ -296,7 +293,24 @@ export default { if (props.args.rendering === "unwrap") { loadPdfs(binaryDataUrl) .then(setFrameHeight) - .then(Streamlit.setComponentReady); + .then(Streamlit.setComponentReady) + .then( + function() { + const pages_ids = new Set() + const annotations_ids = new Set() + + let j = 0 + + for(j=0; j < loadedAnnotations.value.length; j++) { + annotations_ids.add(loadedAnnotations.value[j]); + } + for(j=0; j < loadedPages.value.length; j++) { + pages_ids.add(loadedPages.value[j]); + } + + Streamlit.setComponentValue({"pages": Array.from(pages_ids), "annotations": Array.from(annotations_ids)}) + } + ); } else { setFrameHeight(); Streamlit.setComponentReady(); From 6f0ee0faaa9451e32deeca917c69149b49b25618 Mon Sep 17 00:00:00 2001 From: Luca Foppiano Date: Fri, 16 Aug 2024 11:20:21 +0200 Subject: [PATCH 02/10] add scrolling to page or to annotation --- streamlit_pdf_viewer/__init__.py | 78 ++++++++++++++++--- .../frontend/src/PdfViewer.vue | 58 ++++++++------ 2 files changed, 102 insertions(+), 34 deletions(-) diff --git a/streamlit_pdf_viewer/__init__.py b/streamlit_pdf_viewer/__init__.py index ad90a65..deecfa5 100644 --- a/streamlit_pdf_viewer/__init__.py +++ b/streamlit_pdf_viewer/__init__.py @@ -6,7 +6,7 @@ import streamlit.components.v1 as components import json -_RELEASE = True +_RELEASE = False RENDERING_EMBED = "legacy_embed" RENDERING_IFRAME = "legacy_iframe" RENDERING_UNWRAP = "unwrap" @@ -35,7 +35,9 @@ def pdf_viewer(input: Union[str, Path, bytes], rendering: str = RENDERING_UNWRAP, pages_to_render: List[int] = (), render_text: bool = False, - resolution_boost: int = 1 + resolution_boost: int = 1, + scroll_to_page: int = None, + scroll_to_annotation: int = None, ): """ pdf_viewer function to display a PDF file in a Streamlit app. @@ -54,6 +56,8 @@ def pdf_viewer(input: Union[str, Path, bytes], working to implement for the "unwrap" method. :param render_text: Whether to enable selection of text in the PDF viewer. Defaults to False. :param resolution_boost: Boost the resolution by a factor from 2 to 10. Defaults to 1. + :param scroll_to_page: Scroll to a specific page in the PDF. The parameter is an integer, which represent the positional value of the page. E.g. 1, will be the first page. Defaults to None. + :param scroll_to_annotation: Scroll to a specific annotation in the PDF. The parameter is an integer, which represent the positional value of the annotation. E.g. 1, will be the first annotation. Defaults to None. The function reads the PDF file (from a file path, URL, or binary data), encodes it in base64, and uses a Streamlit component to render it in the app. It supports optional annotations and adjustable margins. @@ -74,6 +78,16 @@ def pdf_viewer(input: Union[str, Path, bytes], elif resolution_boost > 10: raise ValueError("ratio_boost must be lower than 10") + if scroll_to_page: + if scroll_to_annotation: + raise ValueError("scroll_to_page and scroll_to_annotation cannot be used together") + if scroll_to_page < 1: + scroll_to_page = None + + else: + if scroll_to_annotation < 1: + scroll_to_annotation = None + if type(input) is not bytes: with open(input, 'rb') as fo: binary = fo.read() @@ -99,13 +113,16 @@ def pdf_viewer(input: Union[str, Path, bytes], rendering=rendering, pages_to_render=pages_to_render, render_text=render_text, - resolution_boost=resolution_boost + resolution_boost=resolution_boost, + scroll_to_page=scroll_to_page, + scroll_to_annotation=scroll_to_annotation ) return component_value if not _RELEASE: import streamlit as st + from streamlit import markdown # from glob import glob @@ -117,6 +134,29 @@ def pdf_viewer(input: Union[str, Path, bytes], # with st.container(height=600): # pdf_viewer(path, width=800, render_text=True, resolution_boost=values[id]) # + # def scroll_to_page(page): + # st.markdown( + # """ + # function(){ + # document.getElementById(""" + page + """).scrollIntoView({behavior: 'smooth'}) + # }; + # + # function() + # """, unsafe_allow_html=True) + + # print(page) + # st.components.v1.html( + # """ + # + # """ + # ) with open("resources/test.pdf", 'rb') as fo: binary = fo.read() @@ -129,20 +169,34 @@ def pdf_viewer(input: Union[str, Path, bytes], tab1, tab2 = st.tabs(["tab1", "tab2"]) + # st.markdown(""" + # """, unsafe_allow_html=True) + # @st.fragment + # def show_buttons_scrolling(pages_id: List): + # for page in pages_id: + # print(page) + # st.button(f"Page {page}", key=f"page_{page}", on_click=scroll_to_page, args=(page,)) + with tab1: st.markdown("tab 1") - with st.container(height=300): + with st.container(height=400): viewer = pdf_viewer( binary, annotations=annotations, render_text=True, - key="bao" + key="bao", + scroll_to_page=3 ) - st.markdown(viewer) - st.markdown(type(viewer)) - - annotations_id = viewer['annotations'] - pages_id = viewer['pages'] + # st.markdown(viewer) + # st.markdown(type(viewer)) + # if type(viewer) == dict: + # annotations_id = viewer['annotations'] + # pages_id = viewer['pages'] + # show_buttons_scrolling(pages_id) with tab2: st.markdown("tab 2") @@ -152,5 +206,7 @@ def pdf_viewer(input: Union[str, Path, bytes], annotations=annotations, render_text=True, key="miao", - resolution_boost=4 + resolution_boost=4, + scroll_to_annotation=2 ) + diff --git a/streamlit_pdf_viewer/frontend/src/PdfViewer.vue b/streamlit_pdf_viewer/frontend/src/PdfViewer.vue index 879820e..38d7c1c 100644 --- a/streamlit_pdf_viewer/frontend/src/PdfViewer.vue +++ b/streamlit_pdf_viewer/frontend/src/PdfViewer.vue @@ -4,8 +4,7 @@
- -
+
@@ -97,8 +96,7 @@ export default { 'z-index': 10 }; if (index) { - obj['id'] = `annotation-${index}`; - loadedAnnotations.value.push(obj['id']) + loadedAnnotations.value.push(`annotation-${index}`); } return obj }; @@ -232,7 +230,6 @@ export default { totalHeight.value += canvas.height / ratio await renderPage(page, canvas, viewport) if (canvas.id !== undefined) { - console.log(canvas.id) loadedPages.value.push(canvas.id) } } @@ -266,6 +263,36 @@ export default { } }; + const scrollToItem = () => { + if (props.args.scroll_to_page) { + const page = document.getElementById(`canvas_page_${props.args.scroll_to_page}`); + if (page) { + page.scrollIntoView({behavior: "smooth"}); + } + } else if (props.args.scroll_to_annotation) { + const annotation = document.getElementById(`annotation-${props.args.scroll_to_annotation}`); + if (annotation) { + annotation.scrollIntoView({behavior: "smooth", block: "center"}); + } + } + }; + + const collectAndReturnIds = () => { + const pages_ids = new Set() + const annotations_ids = new Set() + + let j + + for (j = 0; j < loadedAnnotations.value.length; j++) { + annotations_ids.add(loadedAnnotations.value[j]); + } + for (j = 0; j < loadedPages.value.length; j++) { + pages_ids.add(loadedPages.value[j]); + } + + Streamlit.setComponentValue({"pages": Array.from(pages_ids), "annotations": Array.from(annotations_ids)}) + } + const setFrameHeight = () => { Streamlit.setFrameHeight(props.args.height || totalHeight.value); @@ -293,24 +320,8 @@ export default { if (props.args.rendering === "unwrap") { loadPdfs(binaryDataUrl) .then(setFrameHeight) - .then(Streamlit.setComponentReady) - .then( - function() { - const pages_ids = new Set() - const annotations_ids = new Set() - - let j = 0 - - for(j=0; j < loadedAnnotations.value.length; j++) { - annotations_ids.add(loadedAnnotations.value[j]); - } - for(j=0; j < loadedPages.value.length; j++) { - pages_ids.add(loadedPages.value[j]); - } - - Streamlit.setComponentValue({"pages": Array.from(pages_ids), "annotations": Array.from(annotations_ids)}) - } - ); + .then(collectAndReturnIds) + .then(Streamlit.setComponentReady); } else { setFrameHeight(); Streamlit.setComponentReady(); @@ -319,6 +330,7 @@ export default { onUpdated(() => { setFrameHeight(); + scrollToItem(); }); From 8ea4384aa942d3e10960efa7cfbbf16b8f993b0a Mon Sep 17 00:00:00 2001 From: Luca Foppiano Date: Fri, 16 Aug 2024 11:36:35 +0200 Subject: [PATCH 03/10] release = true --- streamlit_pdf_viewer/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/streamlit_pdf_viewer/__init__.py b/streamlit_pdf_viewer/__init__.py index deecfa5..835d2ae 100644 --- a/streamlit_pdf_viewer/__init__.py +++ b/streamlit_pdf_viewer/__init__.py @@ -6,7 +6,7 @@ import streamlit.components.v1 as components import json -_RELEASE = False +_RELEASE = True RENDERING_EMBED = "legacy_embed" RENDERING_IFRAME = "legacy_iframe" RENDERING_UNWRAP = "unwrap" From 9b46b7ac73abe6d32e990ffbdc015418aa1e0285 Mon Sep 17 00:00:00 2001 From: Luca Foppiano Date: Fri, 16 Aug 2024 13:33:54 +0200 Subject: [PATCH 04/10] fix incorrect None parameters handling --- pytest.ini | 4 ++-- streamlit_pdf_viewer/__init__.py | 4 ++-- streamlit_pdf_viewer/frontend/src/PdfViewer.vue | 4 +++- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/pytest.ini b/pytest.ini index 4d1d134..9fca5a3 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,4 +1,4 @@ # content of pytest.ini [pytest] -; addopts = --headed --browser firefox --browser chromium -addopts = --browser firefox --browser chromium \ No newline at end of file +addopts = --headed --browser firefox --browser chromium +; addopts = --browser firefox --browser chromium \ No newline at end of file diff --git a/streamlit_pdf_viewer/__init__.py b/streamlit_pdf_viewer/__init__.py index 835d2ae..3fce530 100644 --- a/streamlit_pdf_viewer/__init__.py +++ b/streamlit_pdf_viewer/__init__.py @@ -81,11 +81,11 @@ def pdf_viewer(input: Union[str, Path, bytes], if scroll_to_page: if scroll_to_annotation: raise ValueError("scroll_to_page and scroll_to_annotation cannot be used together") - if scroll_to_page < 1: + if scroll_to_page and scroll_to_page < 1: scroll_to_page = None else: - if scroll_to_annotation < 1: + if scroll_to_annotation and scroll_to_annotation < 1: scroll_to_annotation = None if type(input) is not bytes: diff --git a/streamlit_pdf_viewer/frontend/src/PdfViewer.vue b/streamlit_pdf_viewer/frontend/src/PdfViewer.vue index 38d7c1c..761da05 100644 --- a/streamlit_pdf_viewer/frontend/src/PdfViewer.vue +++ b/streamlit_pdf_viewer/frontend/src/PdfViewer.vue @@ -330,7 +330,9 @@ export default { onUpdated(() => { setFrameHeight(); - scrollToItem(); + if (props.args.rendering === "unwrap") { + scrollToItem(); + } }); From a9a629570d477d83e6d24e038cf973807cddb2e7 Mon Sep 17 00:00:00 2001 From: Luca Foppiano Date: Fri, 16 Aug 2024 13:42:07 +0200 Subject: [PATCH 05/10] don't use the headed browser --- pytest.ini | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pytest.ini b/pytest.ini index 9fca5a3..4d1d134 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,4 +1,4 @@ # content of pytest.ini [pytest] -addopts = --headed --browser firefox --browser chromium -; addopts = --browser firefox --browser chromium \ No newline at end of file +; addopts = --headed --browser firefox --browser chromium +addopts = --browser firefox --browser chromium \ No newline at end of file From b81e39711f4213b49af875a92898850992ca17e8 Mon Sep 17 00:00:00 2001 From: Luca Foppiano Date: Fri, 16 Aug 2024 14:35:40 +0200 Subject: [PATCH 06/10] uniform annotations and pages convention numbers --- streamlit_pdf_viewer/frontend/src/PdfViewer.vue | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/streamlit_pdf_viewer/frontend/src/PdfViewer.vue b/streamlit_pdf_viewer/frontend/src/PdfViewer.vue index 761da05..f0a905c 100644 --- a/streamlit_pdf_viewer/frontend/src/PdfViewer.vue +++ b/streamlit_pdf_viewer/frontend/src/PdfViewer.vue @@ -270,7 +270,7 @@ export default { page.scrollIntoView({behavior: "smooth"}); } } else if (props.args.scroll_to_annotation) { - const annotation = document.getElementById(`annotation-${props.args.scroll_to_annotation}`); + const annotation = document.getElementById(`annotation-${props.args.scroll_to_annotation - 1}`); if (annotation) { annotation.scrollIntoView({behavior: "smooth", block: "center"}); } From ded5878c51cf4e7732e9405dcb25272ce2360d70 Mon Sep 17 00:00:00 2001 From: Luca Foppiano Date: Fri, 16 Aug 2024 17:11:41 +0200 Subject: [PATCH 07/10] use explicit ifs --- streamlit_pdf_viewer/__init__.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/streamlit_pdf_viewer/__init__.py b/streamlit_pdf_viewer/__init__.py index 3fce530..fc380bc 100644 --- a/streamlit_pdf_viewer/__init__.py +++ b/streamlit_pdf_viewer/__init__.py @@ -78,14 +78,14 @@ def pdf_viewer(input: Union[str, Path, bytes], elif resolution_boost > 10: raise ValueError("ratio_boost must be lower than 10") - if scroll_to_page: - if scroll_to_annotation: + if scroll_to_page is not None: + if scroll_to_annotation is not None: raise ValueError("scroll_to_page and scroll_to_annotation cannot be used together") - if scroll_to_page and scroll_to_page < 1: + if scroll_to_page is not None and scroll_to_page < 1: scroll_to_page = None else: - if scroll_to_annotation and scroll_to_annotation < 1: + if scroll_to_annotation is not None and scroll_to_annotation < 1: scroll_to_annotation = None if type(input) is not bytes: From 65f47cf30d2f695c0d1f41cac2576a36535168ef Mon Sep 17 00:00:00 2001 From: Luca Foppiano Date: Sun, 18 Aug 2024 20:46:08 +0200 Subject: [PATCH 08/10] enable RC --- .bumpversion.toml | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/.bumpversion.toml b/.bumpversion.toml index 44c11d2..5405d97 100644 --- a/.bumpversion.toml +++ b/.bumpversion.toml @@ -4,22 +4,22 @@ commit = "true" tag = "true" tag_name = "v{new_version}" -#parse = """(?x) -# (?P0|[1-9]\\d*)\\. -# (?P0|[1-9]\\d*)\\. -# (?P0|[1-9]\\d*) -# (?: -# - # dash separator for pre-release section -# (?P[a-zA-Z-]+) # pre-release label -# (?P0|[1-9]\\d*) # pre-release version number -# )? # pre-release section is optional -#""" -# -#serialize = [ -# "{major}.{minor}.{patch}-{pre_l}{pre_n}", -# "{major}.{minor}.{patch}", -#] -# -#[tool.bumpversion.parts.pre_l] -#values = ["dev", "rc", "final"] -#optional_value = "final" \ No newline at end of file +parse = """(?x) + (?P0|[1-9]\\d*)\\. + (?P0|[1-9]\\d*)\\. + (?P0|[1-9]\\d*) + (?: + - # dash separator for pre-release section + (?P[a-zA-Z-]+) # pre-release label + (?P0|[1-9]\\d*) # pre-release version number + )? # pre-release section is optional +""" + +serialize = [ + "{major}.{minor}.{patch}-{pre_l}{pre_n}", + "{major}.{minor}.{patch}", +] + +[tool.bumpversion.parts.pre_l] +values = ["dev", "rc", "final"] +optional_value = "final" \ No newline at end of file From 6c8466e66d29e924655b1946aefacfe0c995ec86 Mon Sep 17 00:00:00 2001 From: Luca Foppiano Date: Sun, 18 Aug 2024 21:03:32 +0200 Subject: [PATCH 09/10] =?UTF-8?q?Bump=20version:=200.0.17=20=E2=86=92=200.?= =?UTF-8?q?0.18-dev0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .bumpversion.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.bumpversion.toml b/.bumpversion.toml index 5405d97..04afb09 100644 --- a/.bumpversion.toml +++ b/.bumpversion.toml @@ -1,5 +1,5 @@ [tool.bumpversion] -current_version = "0.0.17" +current_version = "0.0.18-dev0" commit = "true" tag = "true" tag_name = "v{new_version}" From f0b1b1900d5f4b9818e6904803558aa7733e130b Mon Sep 17 00:00:00 2001 From: Luca Foppiano Date: Sun, 18 Aug 2024 21:29:47 +0200 Subject: [PATCH 10/10] add documentation --- README.md | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 399b5ab..f475a88 100644 --- a/README.md +++ b/README.md @@ -34,17 +34,19 @@ pdf_viewer("str, path or bytes") In the following table the list of parameters that can be provided to the `pdf_viewer` function: -| name | description | -|-------------------------|| -| input | The source of the PDF file. Accepts a file path, URL, or binary data. | -| width | Width of the PDF viewer in pixels. It defaults to 700 pixels. | -| height | Height of the PDF viewer in pixels. If not provided, the viewer shows the whole content. | -| annotations | A list of annotations to be overlaid on the PDF. Format is described here. | -| pages_vertical_spacing | The vertical space (in pixels) between each page of the PDF. Defaults to 2 pixels. | -| annotation_outline_size | Size of the outline around each annotation in pixels. Defaults to 1 pixel. | -| rendering | Type of rendering: `unwrap` (default), `legacy_iframe`, or `legacy_embed`. The default value, `unwrap` shows the PDF document using pdf.js, and supports the visualisation of annotations. Other values are `legacy_iframe` and `legacy_embed` which use the legacy approach of injecting the document into an `` or `