From 0e565064e615a3f65c0f5f5c7905114a3a9331a1 Mon Sep 17 00:00:00 2001 From: Patrick Corless Date: Sun, 11 Feb 2024 20:15:01 -0700 Subject: [PATCH] GH-314 redaction fixes --- .../java/org/icepdf/core/pobjects/Form.java | 9 +- .../java/org/icepdf/core/pobjects/Stream.java | 8 +- .../acroform/VariableTextFieldDictionary.java | 6 +- .../pobjects/annotations/AppearanceState.java | 2 +- .../pobjects/annotations/TextAnnotation.java | 2 +- .../parser/content/AbstractContentParser.java | 4 +- .../util/redaction/StringObjectWriter.java | 14 ++- .../ContentStreamRedactorCallback.java | 32 ++++-- examples/pom.xml | 1 + examples/redaction/build.gradle | 13 +++ examples/redaction/pom.xml | 22 ++++ .../examples/redaction/RedactionHeadless.java | 103 ++++++++++++++++++ settings.gradle | 5 +- 13 files changed, 193 insertions(+), 28 deletions(-) create mode 100644 examples/redaction/build.gradle create mode 100644 examples/redaction/pom.xml create mode 100644 examples/redaction/src/main/java/org/icepdf/examples/redaction/RedactionHeadless.java diff --git a/core/core-awt/src/main/java/org/icepdf/core/pobjects/Form.java b/core/core-awt/src/main/java/org/icepdf/core/pobjects/Form.java index e25364485..142cfeb09 100644 --- a/core/core-awt/src/main/java/org/icepdf/core/pobjects/Form.java +++ b/core/core-awt/src/main/java/org/icepdf/core/pobjects/Form.java @@ -172,8 +172,7 @@ public synchronized void init(ContentStreamRedactorCallback contentStreamRedacto // try and find the form's resources dictionary. Resources leafResources = library.getResources(entries, RESOURCES_KEY); // apply parent resource, if the current resources is null - if (leafResources != null) { - } else { + if (leafResources == null) { leafResources = parentResource; } // Build a new content parser for the content streams and apply the @@ -184,11 +183,11 @@ public synchronized void init(ContentStreamRedactorCallback contentStreamRedacto if (in != null) { try { logger.log(Level.FINER, () -> "Parsing form " + getPObjectReference()); - shapes = cp.parse(Stream.fromByteArray(in, this.getPObjectReference()), null).getShapes(); + shapes = cp.parse(Stream.fromByteArray(in, this), null).getShapes(); inited = true; } catch (InterruptedException e) { - // the initialization was interrupted so we need to make sure we bubble up the exception - // as we need to let any chained forms know so we can invalidate the page correctly + // the initialization was interrupted so, we need to make sure we bubble up the exception + // as we need to let any chained forms know so, we can invalidate the page correctly shapes = new Shapes(); logger.log(Level.FINE, "Parsing form interrupted parsing Form content stream.", e); throw new InterruptedException(e.getMessage()); diff --git a/core/core-awt/src/main/java/org/icepdf/core/pobjects/Stream.java b/core/core-awt/src/main/java/org/icepdf/core/pobjects/Stream.java index a51fd65d3..e5d017b1b 100644 --- a/core/core-awt/src/main/java/org/icepdf/core/pobjects/Stream.java +++ b/core/core-awt/src/main/java/org/icepdf/core/pobjects/Stream.java @@ -342,13 +342,13 @@ protected List getNormalisedFilterNames() { * that aren't specifically stream, but we want to parse some kind of state from the given bytes. * * @param contentBytes decompressed bytes to be treated as a stream - * @param reference parent objects reference, can be null + * @param dictionary parent objects to base new stream from * @return mock stream object */ - public static Stream[] fromByteArray(byte[] contentBytes, Reference reference) { - Stream stream = new Stream(new DictionaryEntries(), null); + public static Stream[] fromByteArray(byte[] contentBytes, Dictionary dictionary) { + Stream stream = new Stream(dictionary.getEntries(), null); stream.setRawBytes(contentBytes); - stream.setPObjectReference(reference); + stream.setPObjectReference(dictionary.getPObjectReference()); return new Stream[]{stream}; } diff --git a/core/core-awt/src/main/java/org/icepdf/core/pobjects/acroform/VariableTextFieldDictionary.java b/core/core-awt/src/main/java/org/icepdf/core/pobjects/acroform/VariableTextFieldDictionary.java index 42af3a8ca..d81cd15ce 100644 --- a/core/core-awt/src/main/java/org/icepdf/core/pobjects/acroform/VariableTextFieldDictionary.java +++ b/core/core-awt/src/main/java/org/icepdf/core/pobjects/acroform/VariableTextFieldDictionary.java @@ -129,8 +129,7 @@ public VariableTextFieldDictionary(Library library, DictionaryEntries entries) { if (resources != null) { try { ContentParser cp = new ContentParser(library, resources); - Stream[] possibleContentStream = Stream.fromByteArray(defaultAppearance.getBytes(), - this.getPObjectReference()); + Stream[] possibleContentStream = Stream.fromByteArray(defaultAppearance.getBytes(), this); cp.parseTextBlocks(possibleContentStream); GraphicsState gs = cp.getGraphicsState(); if (gs != null) { @@ -172,8 +171,7 @@ public String generateDefaultAppearance(String content, Resources resources) { } ContentParser cp = new ContentParser(library, resources); // usefull parser so we parse the font color. - Stream[] possibleContentStream = Stream.fromByteArray(possibleContent.getBytes(), - this.getPObjectReference()); + Stream[] possibleContentStream = Stream.fromByteArray(possibleContent.getBytes(), this); cp.parse(possibleContentStream, null); GraphicsState gs = cp.getGraphicsState(); if (gs != null) { diff --git a/core/core-awt/src/main/java/org/icepdf/core/pobjects/annotations/AppearanceState.java b/core/core-awt/src/main/java/org/icepdf/core/pobjects/annotations/AppearanceState.java index 17737b0e7..66c78764b 100644 --- a/core/core-awt/src/main/java/org/icepdf/core/pobjects/annotations/AppearanceState.java +++ b/core/core-awt/src/main/java/org/icepdf/core/pobjects/annotations/AppearanceState.java @@ -140,7 +140,7 @@ public void setContentStream(byte[] contentBytes) { try { ContentParser cp = new ContentParser(library, resources); shapes = cp.parse( - Stream.fromByteArray(contentBytes, this.getPObjectReference()), + Stream.fromByteArray(contentBytes, this), null).getShapes(); } catch (Exception e) { shapes = new Shapes(); diff --git a/core/core-awt/src/main/java/org/icepdf/core/pobjects/annotations/TextAnnotation.java b/core/core-awt/src/main/java/org/icepdf/core/pobjects/annotations/TextAnnotation.java index 36e21d022..8812d1b48 100644 --- a/core/core-awt/src/main/java/org/icepdf/core/pobjects/annotations/TextAnnotation.java +++ b/core/core-awt/src/main/java/org/icepdf/core/pobjects/annotations/TextAnnotation.java @@ -277,7 +277,7 @@ public void resetAppearanceStream(double dx, double dy, AffineTransform pageTran try { Resources resources = form.getResources(); ContentParser cp = new ContentParser(library, resources); - shapes = cp.parse(Stream.fromByteArray(iconContentString.getBytes(), this.getPObjectReference()), + shapes = cp.parse(Stream.fromByteArray(iconContentString.getBytes(), this), null).getShapes(); } catch (Exception e) { shapes = new Shapes(); diff --git a/core/core-awt/src/main/java/org/icepdf/core/util/parser/content/AbstractContentParser.java b/core/core-awt/src/main/java/org/icepdf/core/util/parser/content/AbstractContentParser.java index 041f3781c..cb38489d5 100644 --- a/core/core-awt/src/main/java/org/icepdf/core/util/parser/content/AbstractContentParser.java +++ b/core/core-awt/src/main/java/org/icepdf/core/util/parser/content/AbstractContentParser.java @@ -526,7 +526,9 @@ protected static GraphicsState consume_Do(GraphicsState graphicState, Stack glyphTexts) { return true; } - public static float writeTj(ByteArrayOutputStream contentOutputStream, ArrayList textOperators) throws IOException { - float lastTdOffset = 0; + public static float writeTj(ByteArrayOutputStream contentOutputStream, ArrayList textOperators, + float lastTdOffset) throws IOException { int operatorCount = 0; for (TextSprite textSprite : textOperators) { ArrayList glyphTexts = textSprite.getGlyphSprites(); @@ -82,9 +82,9 @@ public static float writeTj(ByteArrayOutputStream contentOutputStream, ArrayList return lastTdOffset; } - public static float writeTJ(ByteArrayOutputStream contentOutputStream, ArrayList textOperators) throws IOException { + public static float writeTJ(ByteArrayOutputStream contentOutputStream, ArrayList textOperators, + float lastTdOffset) throws IOException { int operatorCount = 0; - float lastTdOffset = 0; for (TextSprite textSprite : textOperators) { ArrayList glyphTexts = textSprite.getGlyphSprites(); @@ -129,6 +129,12 @@ public static float writeTJ(ByteArrayOutputStream contentOutputStream, ArrayList private static float writeLastTdOffset(ByteArrayOutputStream contentOutputStream, float lastTdOffset, GlyphText glyphText) throws IOException { float advance = glyphText.getX() + glyphText.getAdvanceX(); + // still not sure how to handle this in a 100% of cases as advance can technically be negative + // but if we have a negative glyph advance we likely have a negative font value and should + // treat this as a positive value when writing the advance. + if (glyphText.getAdvanceX() < 0) { + advance = Math.abs(advance); + } return writeTdOffset(contentOutputStream, advance, lastTdOffset); } diff --git a/core/core-awt/src/main/java/org/icepdf/core/util/updater/callbacks/ContentStreamRedactorCallback.java b/core/core-awt/src/main/java/org/icepdf/core/util/updater/callbacks/ContentStreamRedactorCallback.java index 08efb9bcd..ba0474cbb 100644 --- a/core/core-awt/src/main/java/org/icepdf/core/util/updater/callbacks/ContentStreamRedactorCallback.java +++ b/core/core-awt/src/main/java/org/icepdf/core/util/updater/callbacks/ContentStreamRedactorCallback.java @@ -14,6 +14,7 @@ import org.icepdf.core.util.redaction.InlineImageWriter; import org.icepdf.core.util.redaction.StringObjectWriter; +import java.awt.geom.AffineTransform; import java.awt.geom.GeneralPath; import java.awt.geom.Rectangle2D; import java.io.ByteArrayOutputStream; @@ -45,6 +46,7 @@ public class ContentStreamRedactorCallback { private int lastTextPosition; private float lastTjOffset; private final Library library; + private final AffineTransform transform; private boolean modifiedStream; private final List redactionAnnotations; @@ -52,10 +54,20 @@ public class ContentStreamRedactorCallback { public ContentStreamRedactorCallback(Library library, List redactionAnnotations) { this.redactionAnnotations = redactionAnnotations; this.library = library; + this.transform = new AffineTransform(); } - public ContentStreamRedactorCallback createChildInstance() { - return new ContentStreamRedactorCallback(this.library, this.redactionAnnotations); + private ContentStreamRedactorCallback(Library library, List redactionAnnotations, + AffineTransform transform) { + this.redactionAnnotations = redactionAnnotations; + this.library = library; + // xObject text will have it's on transform that must be taken into when determining intersections of the + // redaction and glyph bounds. + this.transform = transform; + } + + public ContentStreamRedactorCallback createChildInstance(AffineTransform transform) { + return new ContentStreamRedactorCallback(this.library, this.redactionAnnotations, transform); } public void startContentStream(Stream stream) throws IOException { @@ -102,13 +114,18 @@ public void setLastTokenPosition(int position, Integer token) throws IOException (position - lastTokenPosition)); lastTokenPosition = position; } else if (token == T_STAR || token == TD || token == Td) { + // relative operators, so adjust for the redacted content. writeLastTjOffset(); - lastTjOffset = 0; burnedContentOutputStream.write(originalContentStreamBytes, lastTokenPosition, (position - lastTokenPosition)); + lastTjOffset = 0; lastTokenPosition = position; - } else if (token == BT) { + } else if (token == BT || token == Tm) { + burnedContentOutputStream.write(originalContentStreamBytes, lastTokenPosition, + (position - lastTokenPosition)); + // hard reset, new coordinate system lastTjOffset = 0; + lastTokenPosition = position; } lastTextPosition = position; } @@ -125,7 +142,7 @@ private void writeLastTjOffset() throws IOException { } private boolean isTextLayoutToken(int token) { - return token == Tj || token == TJ || token == Td || token == TD || token == T_STAR || token == BT; + return token == Tj || token == TJ || token == Td || token == TD || token == Tm || token == T_STAR || token == BT; } /** @@ -136,6 +153,7 @@ private boolean isTextLayoutToken(int token) { public void checkAndRedactText(GlyphText glyphText) { for (RedactionAnnotation annotation : redactionAnnotations) { GeneralPath reactionPaths = annotation.getMarkupPath(); + glyphText.normalizeToUserSpace(transform, null); Rectangle2D glyphBounds = glyphText.getBounds(); if (reactionPaths != null && reactionPaths.contains(glyphBounds)) { logger.finer(() -> "Redacting Text: " + glyphText.getCid() + " " + glyphText.getUnicode()); @@ -183,9 +201,9 @@ public void writeRedactedStringObject(ArrayList textOperators, final if (StringObjectWriter.containsRedactions(textOperators)) { // apply redaction if (Operands.TJ == operand) { - lastTjOffset = StringObjectWriter.writeTJ(burnedContentOutputStream, textOperators); + lastTjOffset = StringObjectWriter.writeTJ(burnedContentOutputStream, textOperators, lastTjOffset); } else { - lastTjOffset = StringObjectWriter.writeTj(burnedContentOutputStream, textOperators); + lastTjOffset = StringObjectWriter.writeTj(burnedContentOutputStream, textOperators, lastTjOffset); } modifiedStream = true; } else { diff --git a/examples/pom.xml b/examples/pom.xml index fbb94f42c..92a4010ed 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -23,6 +23,7 @@ loadingEvents printservices search + redaction signatures diff --git a/examples/redaction/build.gradle b/examples/redaction/build.gradle new file mode 100644 index 000000000..02395c829 --- /dev/null +++ b/examples/redaction/build.gradle @@ -0,0 +1,13 @@ +plugins { + id 'java' + id 'application' +} + +dependencies { + implementation project(':core:core-awt'), project(':viewer:viewer-awt') +} + +description 'java redaction example' + +mainClassName = "org.icepdf.examples.redaction.RedactionHeadless" +applicationDefaultJvmArgs = ["-Xms64m", "-Xmx1024m"] \ No newline at end of file diff --git a/examples/redaction/pom.xml b/examples/redaction/pom.xml new file mode 100644 index 000000000..bd20238d3 --- /dev/null +++ b/examples/redaction/pom.xml @@ -0,0 +1,22 @@ + + + 4.0.0 + + org.icepdf.examples + examples + 7.2.0-SNAPSHOT + + redaction + pom + ICEpdf :: Examples :: Redaction + + The ICEpdf redaction examples + + + + component + headless + + + diff --git a/examples/redaction/src/main/java/org/icepdf/examples/redaction/RedactionHeadless.java b/examples/redaction/src/main/java/org/icepdf/examples/redaction/RedactionHeadless.java new file mode 100644 index 000000000..33de9178d --- /dev/null +++ b/examples/redaction/src/main/java/org/icepdf/examples/redaction/RedactionHeadless.java @@ -0,0 +1,103 @@ +package org.icepdf.examples.redaction; + +import org.icepdf.core.pobjects.Document; +import org.icepdf.core.pobjects.Page; +import org.icepdf.core.pobjects.annotations.Annotation; +import org.icepdf.core.pobjects.annotations.AnnotationFactory; +import org.icepdf.core.pobjects.annotations.RedactionAnnotation; +import org.icepdf.core.pobjects.graphics.text.WordText; +import org.icepdf.core.search.DocumentSearchController; +import org.icepdf.core.util.updater.WriteMode; +import org.icepdf.ri.common.search.DocumentSearchControllerImpl; +import org.icepdf.ri.util.FontPropertiesManager; + +import java.awt.*; +import java.awt.geom.AffineTransform; +import java.awt.geom.GeneralPath; +import java.io.BufferedOutputStream; +import java.io.File; +import java.io.FileOutputStream; +import java.util.ArrayList; +import java.util.Collections; + +/** + * The RedactionHeadless class is an example of how to use text search results + * as inputs for the creation of redaction annotations. Once the annotations are created the + * document is exported burning the redaction annotations into the PDFs content streams. + * The resulting document will no longer have text where the Redaction annotations intersected. + * + * @since 7.2.0 + */ +public class RedactionHeadless { + public static void main(String[] args) { + + FontPropertiesManager.getInstance().loadOrReadSystemFonts(); + + // Get a file from the command line to open + String filePath = args[0]; + + // save page captures to file. + float scale = 1.0f; + float rotation = 0f; + + // open the document + Document document = new Document(); + try { + document.setFile(filePath); + + // get the search controller + DocumentSearchController searchController = + new DocumentSearchControllerImpl(document); + // add a specified search terms. + searchController.addSearchTerm("redaction", false, false); + + ArrayList foundWords; + RedactionAnnotation redactionAnnotation; + + // iterated over each page creating redaction from search terms + for (int i = 0, max = document.getNumberOfPages(); i < max; i++) { + Page page = document.getPageTree().getPage(i); + page.init(); + + // search the page + foundWords = searchController.searchPage(i); + if (foundWords == null) { + System.out.println("No Search terms found"); + return; + } + for (WordText wordText : foundWords) { + final Rectangle tBbox = wordText.getBounds().getBounds(); + + redactionAnnotation = (RedactionAnnotation) + AnnotationFactory.buildAnnotation( + document.getPageTree().getLibrary(), + Annotation.SUBTYPE_REDACT, + tBbox); + + if (redactionAnnotation != null) { + redactionAnnotation.setColor(Color.BLACK); + redactionAnnotation.setMarkupBounds(new ArrayList<>(Collections.singletonList(tBbox))); + redactionAnnotation.setMarkupPath(new GeneralPath(tBbox)); + redactionAnnotation.setBBox(tBbox); + redactionAnnotation.resetAppearanceStream(new AffineTransform()); + page.addAnnotation(redactionAnnotation, true); + } + } + } + + // burn the redaction into the PDF by exporting the document. + File file = new File("redacted_output.pdf"); + try (final FileOutputStream fileOutputStream = new FileOutputStream(file); + final BufferedOutputStream buf = new BufferedOutputStream(fileOutputStream, 8192)) { + document.writeToOutputStream(buf, WriteMode.FULL_UPDATE); + } + + // clean up resources + document.dispose(); + + } catch (Exception e) { + e.printStackTrace(); + } + } + +} diff --git a/settings.gradle b/settings.gradle index a51c93327..5f2d39909 100644 --- a/settings.gradle +++ b/settings.gradle @@ -22,4 +22,7 @@ include 'core:core-awt', 'examples:signatures' -rootProject.name = 'icepdf' \ No newline at end of file +rootProject.name = 'icepdf' +include 'examples:redaction' +findProject(':examples:redaction')?.name = 'redaction' +