From 0e565064e615a3f65c0f5f5c7905114a3a9331a1 Mon Sep 17 00:00:00 2001
From: Patrick Corless <pcorless@users.noreply.github.com>
Date: Sun, 11 Feb 2024 20:15:01 -0700
Subject: [PATCH] GH-314 redaction fixes

---
 .../java/org/icepdf/core/pobjects/Form.java   |   9 +-
 .../java/org/icepdf/core/pobjects/Stream.java |   8 +-
 .../acroform/VariableTextFieldDictionary.java |   6 +-
 .../pobjects/annotations/AppearanceState.java |   2 +-
 .../pobjects/annotations/TextAnnotation.java  |   2 +-
 .../parser/content/AbstractContentParser.java |   4 +-
 .../util/redaction/StringObjectWriter.java    |  14 ++-
 .../ContentStreamRedactorCallback.java        |  32 ++++--
 examples/pom.xml                              |   1 +
 examples/redaction/build.gradle               |  13 +++
 examples/redaction/pom.xml                    |  22 ++++
 .../examples/redaction/RedactionHeadless.java | 103 ++++++++++++++++++
 settings.gradle                               |   5 +-
 13 files changed, 193 insertions(+), 28 deletions(-)
 create mode 100644 examples/redaction/build.gradle
 create mode 100644 examples/redaction/pom.xml
 create mode 100644 examples/redaction/src/main/java/org/icepdf/examples/redaction/RedactionHeadless.java
diff --git a/core/core-awt/src/main/java/org/icepdf/core/pobjects/Form.java b/core/core-awt/src/main/java/org/icepdf/core/pobjects/Form.java
index e25364485..142cfeb09 100644
--- a/core/core-awt/src/main/java/org/icepdf/core/pobjects/Form.java
+++ b/core/core-awt/src/main/java/org/icepdf/core/pobjects/Form.java
@@ -172,8 +172,7 @@ public synchronized void init(ContentStreamRedactorCallback contentStreamRedacto
         // try and find the form's resources dictionary.
         Resources leafResources = library.getResources(entries, RESOURCES_KEY);
         // apply parent resource, if the current resources is null
-        if (leafResources != null) {
-        } else {
+        if (leafResources == null) {
             leafResources = parentResource;
         }
         // Build a new content parser for the content streams and apply the
@@ -184,11 +183,11 @@ public synchronized void init(ContentStreamRedactorCallback contentStreamRedacto
         if (in != null) {
             try {
                 logger.log(Level.FINER, () -> "Parsing form " + getPObjectReference());
-                shapes = cp.parse(Stream.fromByteArray(in, this.getPObjectReference()), null).getShapes();
+                shapes = cp.parse(Stream.fromByteArray(in, this), null).getShapes();
                 inited = true;
             } catch (InterruptedException e) {
-                // the initialization was interrupted so we need to make sure we bubble up the exception
-                // as we need to let any chained forms know so we can invalidate the page correctly
+                // the initialization was interrupted so, we need to make sure we bubble up the exception
+                // as we need to let any chained forms know so, we can invalidate the page correctly
                 shapes = new Shapes();
                 logger.log(Level.FINE, "Parsing form interrupted parsing Form content stream.", e);
                 throw new InterruptedException(e.getMessage());
diff --git a/core/core-awt/src/main/java/org/icepdf/core/pobjects/Stream.java b/core/core-awt/src/main/java/org/icepdf/core/pobjects/Stream.java
index a51fd65d3..e5d017b1b 100644
--- a/core/core-awt/src/main/java/org/icepdf/core/pobjects/Stream.java
+++ b/core/core-awt/src/main/java/org/icepdf/core/pobjects/Stream.java
@@ -342,13 +342,13 @@ protected List<String> getNormalisedFilterNames() {
      * that aren't specifically stream, but we want to parse some kind of state from the given bytes.
      *
      * @param contentBytes decompressed bytes to be treated as a stream
-     * @param reference    parent objects reference, can be null
+     * @param dictionary  parent objects to base new stream from
      * @return mock stream object
      */
-    public static Stream[] fromByteArray(byte[] contentBytes, Reference reference) {
-        Stream stream = new Stream(new DictionaryEntries(), null);
+    public static Stream[] fromByteArray(byte[] contentBytes, Dictionary dictionary) {
+        Stream stream = new Stream(dictionary.getEntries(), null);
         stream.setRawBytes(contentBytes);
-        stream.setPObjectReference(reference);
+        stream.setPObjectReference(dictionary.getPObjectReference());
         return new Stream[]{stream};
     }
 
diff --git a/core/core-awt/src/main/java/org/icepdf/core/pobjects/acroform/VariableTextFieldDictionary.java b/core/core-awt/src/main/java/org/icepdf/core/pobjects/acroform/VariableTextFieldDictionary.java
index 42af3a8ca..d81cd15ce 100644
--- a/core/core-awt/src/main/java/org/icepdf/core/pobjects/acroform/VariableTextFieldDictionary.java
+++ b/core/core-awt/src/main/java/org/icepdf/core/pobjects/acroform/VariableTextFieldDictionary.java
@@ -129,8 +129,7 @@ public VariableTextFieldDictionary(Library library, DictionaryEntries entries) {
             if (resources != null) {
                 try {
                     ContentParser cp = new ContentParser(library, resources);
-                    Stream[] possibleContentStream = Stream.fromByteArray(defaultAppearance.getBytes(),
-                            this.getPObjectReference());
+                    Stream[] possibleContentStream = Stream.fromByteArray(defaultAppearance.getBytes(), this);
                     cp.parseTextBlocks(possibleContentStream);
                     GraphicsState gs = cp.getGraphicsState();
                     if (gs != null) {
@@ -172,8 +171,7 @@ public String generateDefaultAppearance(String content, Resources resources) {
             }
             ContentParser cp = new ContentParser(library, resources);
             // usefull parser so we parse the font color.
-            Stream[] possibleContentStream = Stream.fromByteArray(possibleContent.getBytes(),
-                    this.getPObjectReference());
+            Stream[] possibleContentStream = Stream.fromByteArray(possibleContent.getBytes(), this);
             cp.parse(possibleContentStream, null);
             GraphicsState gs = cp.getGraphicsState();
             if (gs != null) {
diff --git a/core/core-awt/src/main/java/org/icepdf/core/pobjects/annotations/AppearanceState.java b/core/core-awt/src/main/java/org/icepdf/core/pobjects/annotations/AppearanceState.java
index 17737b0e7..66c78764b 100644
--- a/core/core-awt/src/main/java/org/icepdf/core/pobjects/annotations/AppearanceState.java
+++ b/core/core-awt/src/main/java/org/icepdf/core/pobjects/annotations/AppearanceState.java
@@ -140,7 +140,7 @@ public void setContentStream(byte[] contentBytes) {
         try {
             ContentParser cp = new ContentParser(library, resources);
             shapes = cp.parse(
-                    Stream.fromByteArray(contentBytes, this.getPObjectReference()),
+                    Stream.fromByteArray(contentBytes, this),
                     null).getShapes();
         } catch (Exception e) {
             shapes = new Shapes();
diff --git a/core/core-awt/src/main/java/org/icepdf/core/pobjects/annotations/TextAnnotation.java b/core/core-awt/src/main/java/org/icepdf/core/pobjects/annotations/TextAnnotation.java
index 36e21d022..8812d1b48 100644
--- a/core/core-awt/src/main/java/org/icepdf/core/pobjects/annotations/TextAnnotation.java
+++ b/core/core-awt/src/main/java/org/icepdf/core/pobjects/annotations/TextAnnotation.java
@@ -277,7 +277,7 @@ public void resetAppearanceStream(double dx, double dy, AffineTransform pageTran
         try {
             Resources resources = form.getResources();
             ContentParser cp = new ContentParser(library, resources);
-            shapes = cp.parse(Stream.fromByteArray(iconContentString.getBytes(), this.getPObjectReference()),
+            shapes = cp.parse(Stream.fromByteArray(iconContentString.getBytes(), this),
                     null).getShapes();
         } catch (Exception e) {
             shapes = new Shapes();
diff --git a/core/core-awt/src/main/java/org/icepdf/core/util/parser/content/AbstractContentParser.java b/core/core-awt/src/main/java/org/icepdf/core/util/parser/content/AbstractContentParser.java
index 041f3781c..cb38489d5 100644
--- a/core/core-awt/src/main/java/org/icepdf/core/util/parser/content/AbstractContentParser.java
+++ b/core/core-awt/src/main/java/org/icepdf/core/util/parser/content/AbstractContentParser.java
@@ -526,7 +526,9 @@ protected static GraphicsState consume_Do(GraphicsState graphicState, Stack<Obje
             // need a new instance, so we don't corrupt the stream offset.
             ContentStreamRedactorCallback formContentStreamRedactorCallback = null;
             if (contentStreamRedactorCallback != null) {
-                formContentStreamRedactorCallback = contentStreamRedactorCallback.createChildInstance();
+                AffineTransform xObjectTransform = graphicState.getCTM();
+                xObjectTransform.concatenate(formXObject.getMatrix());
+                formContentStreamRedactorCallback = contentStreamRedactorCallback.createChildInstance(xObjectTransform);
             }
             formXObject.init(formContentStreamRedactorCallback);
             // 2. concatenate matrix entry with the current CTM
diff --git a/core/core-awt/src/main/java/org/icepdf/core/util/redaction/StringObjectWriter.java b/core/core-awt/src/main/java/org/icepdf/core/util/redaction/StringObjectWriter.java
index 063c0d842..cb22bbd72 100644
--- a/core/core-awt/src/main/java/org/icepdf/core/util/redaction/StringObjectWriter.java
+++ b/core/core-awt/src/main/java/org/icepdf/core/util/redaction/StringObjectWriter.java
@@ -44,8 +44,8 @@ public static boolean fullyRedacted(ArrayList<GlyphText> glyphTexts) {
         return true;
     }
 
-    public static float writeTj(ByteArrayOutputStream contentOutputStream, ArrayList<TextSprite> textOperators) throws IOException {
-        float lastTdOffset = 0;
+    public static float writeTj(ByteArrayOutputStream contentOutputStream, ArrayList<TextSprite> textOperators,
+                                float lastTdOffset) throws IOException {
         int operatorCount = 0;
         for (TextSprite textSprite : textOperators) {
             ArrayList<GlyphText> glyphTexts = textSprite.getGlyphSprites();
@@ -82,9 +82,9 @@ public static float writeTj(ByteArrayOutputStream contentOutputStream, ArrayList
         return lastTdOffset;
     }
 
-    public static float writeTJ(ByteArrayOutputStream contentOutputStream, ArrayList<TextSprite> textOperators) throws IOException {
+    public static float writeTJ(ByteArrayOutputStream contentOutputStream, ArrayList<TextSprite> textOperators,
+                                float lastTdOffset) throws IOException {
         int operatorCount = 0;
-        float lastTdOffset = 0;
 
         for (TextSprite textSprite : textOperators) {
             ArrayList<GlyphText> glyphTexts = textSprite.getGlyphSprites();
@@ -129,6 +129,12 @@ public static float writeTJ(ByteArrayOutputStream contentOutputStream, ArrayList
     private static float writeLastTdOffset(ByteArrayOutputStream contentOutputStream, float lastTdOffset,
                                            GlyphText glyphText) throws IOException {
         float advance = glyphText.getX() + glyphText.getAdvanceX();
+        // still not sure how to handle this in a 100% of cases as advance can technically be negative
+        // but if we have a negative glyph advance we likely have a negative font value and should
+        // treat this as a positive value when writing the advance.
+        if (glyphText.getAdvanceX() < 0) {
+            advance = Math.abs(advance);
+        }
         return writeTdOffset(contentOutputStream, advance, lastTdOffset);
     }
 
diff --git a/core/core-awt/src/main/java/org/icepdf/core/util/updater/callbacks/ContentStreamRedactorCallback.java b/core/core-awt/src/main/java/org/icepdf/core/util/updater/callbacks/ContentStreamRedactorCallback.java
index 08efb9bcd..ba0474cbb 100644
--- a/core/core-awt/src/main/java/org/icepdf/core/util/updater/callbacks/ContentStreamRedactorCallback.java
+++ b/core/core-awt/src/main/java/org/icepdf/core/util/updater/callbacks/ContentStreamRedactorCallback.java
@@ -14,6 +14,7 @@
 import org.icepdf.core.util.redaction.InlineImageWriter;
 import org.icepdf.core.util.redaction.StringObjectWriter;
 
+import java.awt.geom.AffineTransform;
 import java.awt.geom.GeneralPath;
 import java.awt.geom.Rectangle2D;
 import java.io.ByteArrayOutputStream;
@@ -45,6 +46,7 @@ public class ContentStreamRedactorCallback {
     private int lastTextPosition;
     private float lastTjOffset;
     private final Library library;
+    private final AffineTransform transform;
     private boolean modifiedStream;
 
     private final List<RedactionAnnotation> redactionAnnotations;
@@ -52,10 +54,20 @@ public class ContentStreamRedactorCallback {
     public ContentStreamRedactorCallback(Library library, List<RedactionAnnotation> redactionAnnotations) {
         this.redactionAnnotations = redactionAnnotations;
         this.library = library;
+        this.transform = new AffineTransform();
     }
 
-    public ContentStreamRedactorCallback createChildInstance() {
-        return new ContentStreamRedactorCallback(this.library, this.redactionAnnotations);
+    private ContentStreamRedactorCallback(Library library, List<RedactionAnnotation> redactionAnnotations,
+                                          AffineTransform transform) {
+        this.redactionAnnotations = redactionAnnotations;
+        this.library = library;
+        // xObject text will have it's on transform that must be taken into when determining intersections of the
+        // redaction and glyph bounds.
+        this.transform = transform;
+    }
+
+    public ContentStreamRedactorCallback createChildInstance(AffineTransform transform) {
+        return new ContentStreamRedactorCallback(this.library, this.redactionAnnotations, transform);
     }
 
     public void startContentStream(Stream stream) throws IOException {
@@ -102,13 +114,18 @@ public void setLastTokenPosition(int position, Integer token) throws IOException
                     (position - lastTokenPosition));
             lastTokenPosition = position;
         } else if (token == T_STAR || token == TD || token == Td) {
+            // relative operators, so adjust for the redacted content.
             writeLastTjOffset();
-            lastTjOffset = 0;
             burnedContentOutputStream.write(originalContentStreamBytes, lastTokenPosition,
                     (position - lastTokenPosition));
+            lastTjOffset = 0;
             lastTokenPosition = position;
-        } else if (token == BT) {
+        } else if (token == BT || token == Tm) {
+            burnedContentOutputStream.write(originalContentStreamBytes, lastTokenPosition,
+                    (position - lastTokenPosition));
+            // hard reset, new coordinate system
             lastTjOffset = 0;
+            lastTokenPosition = position;
         }
         lastTextPosition = position;
     }
@@ -125,7 +142,7 @@ private void writeLastTjOffset() throws IOException {
     }
 
     private boolean isTextLayoutToken(int token) {
-        return token == Tj || token == TJ || token == Td || token == TD || token == T_STAR || token == BT;
+        return token == Tj || token == TJ || token == Td || token == TD || token == Tm || token == T_STAR || token == BT;
     }
 
     /**
@@ -136,6 +153,7 @@ private boolean isTextLayoutToken(int token) {
     public void checkAndRedactText(GlyphText glyphText) {
         for (RedactionAnnotation annotation : redactionAnnotations) {
             GeneralPath reactionPaths = annotation.getMarkupPath();
+            glyphText.normalizeToUserSpace(transform, null);
             Rectangle2D glyphBounds = glyphText.getBounds();
             if (reactionPaths != null && reactionPaths.contains(glyphBounds)) {
                 logger.finer(() -> "Redacting Text: " + glyphText.getCid() + " " + glyphText.getUnicode());
@@ -183,9 +201,9 @@ public void writeRedactedStringObject(ArrayList<TextSprite> textOperators, final
         if (StringObjectWriter.containsRedactions(textOperators)) {
             // apply redaction
             if (Operands.TJ == operand) {
-                lastTjOffset = StringObjectWriter.writeTJ(burnedContentOutputStream, textOperators);
+                lastTjOffset = StringObjectWriter.writeTJ(burnedContentOutputStream, textOperators, lastTjOffset);
             } else {
-                lastTjOffset = StringObjectWriter.writeTj(burnedContentOutputStream, textOperators);
+                lastTjOffset = StringObjectWriter.writeTj(burnedContentOutputStream, textOperators, lastTjOffset);
             }
             modifiedStream = true;
         } else {
diff --git a/examples/pom.xml b/examples/pom.xml
index fbb94f42c..92a4010ed 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -23,6 +23,7 @@
         <module>loadingEvents</module>
         <module>printservices</module>
         <module>search</module>
+        <module>redaction</module>
         <module>signatures</module>
     </modules>
 
diff --git a/examples/redaction/build.gradle b/examples/redaction/build.gradle
new file mode 100644
index 000000000..02395c829
--- /dev/null
+++ b/examples/redaction/build.gradle
@@ -0,0 +1,13 @@
+plugins {
+    id 'java'
+    id 'application'
+}
+
+dependencies {
+    implementation project(':core:core-awt'), project(':viewer:viewer-awt')
+}
+
+description 'java redaction example'
+
+mainClassName = "org.icepdf.examples.redaction.RedactionHeadless"
+applicationDefaultJvmArgs = ["-Xms64m", "-Xmx1024m"]
\ No newline at end of file
diff --git a/examples/redaction/pom.xml b/examples/redaction/pom.xml
new file mode 100644
index 000000000..bd20238d3
--- /dev/null
+++ b/examples/redaction/pom.xml
@@ -0,0 +1,22 @@
+<?xml version="1.0"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/maven-v4_0_0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+    <parent>
+        <groupId>org.icepdf.examples</groupId>
+        <artifactId>examples</artifactId>
+        <version>7.2.0-SNAPSHOT</version>
+    </parent>
+    <artifactId>redaction</artifactId>
+    <packaging>pom</packaging>
+    <name>ICEpdf :: Examples :: Redaction</name>
+    <description>
+        The ICEpdf redaction examples
+    </description>
+
+    <modules>
+        <module>component</module>
+        <module>headless</module>
+    </modules>
+
+</project>
diff --git a/examples/redaction/src/main/java/org/icepdf/examples/redaction/RedactionHeadless.java b/examples/redaction/src/main/java/org/icepdf/examples/redaction/RedactionHeadless.java
new file mode 100644
index 000000000..33de9178d
--- /dev/null
+++ b/examples/redaction/src/main/java/org/icepdf/examples/redaction/RedactionHeadless.java
@@ -0,0 +1,103 @@
+package org.icepdf.examples.redaction;
+
+import org.icepdf.core.pobjects.Document;
+import org.icepdf.core.pobjects.Page;
+import org.icepdf.core.pobjects.annotations.Annotation;
+import org.icepdf.core.pobjects.annotations.AnnotationFactory;
+import org.icepdf.core.pobjects.annotations.RedactionAnnotation;
+import org.icepdf.core.pobjects.graphics.text.WordText;
+import org.icepdf.core.search.DocumentSearchController;
+import org.icepdf.core.util.updater.WriteMode;
+import org.icepdf.ri.common.search.DocumentSearchControllerImpl;
+import org.icepdf.ri.util.FontPropertiesManager;
+
+import java.awt.*;
+import java.awt.geom.AffineTransform;
+import java.awt.geom.GeneralPath;
+import java.io.BufferedOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.util.ArrayList;
+import java.util.Collections;
+
+/**
+ * The <code>RedactionHeadless</code> class is an example of how to use text search results
+ * as inputs for the creation of redaction annotations. Once the annotations are created the
+ * document is exported burning the redaction annotations into the PDFs content streams.
+ * The resulting document will no longer have text where the Redaction annotations intersected.
+ *
+ * @since 7.2.0
+ */
+public class RedactionHeadless {
+    public static void main(String[] args) {
+
+        FontPropertiesManager.getInstance().loadOrReadSystemFonts();
+
+        // Get a file from the command line to open
+        String filePath = args[0];
+
+        // save page captures to file.
+        float scale = 1.0f;
+        float rotation = 0f;
+
+        // open the document
+        Document document = new Document();
+        try {
+            document.setFile(filePath);
+
+            // get the search controller
+            DocumentSearchController searchController =
+                    new DocumentSearchControllerImpl(document);
+            // add a specified search terms.
+            searchController.addSearchTerm("redaction", false, false);
+
+            ArrayList<WordText> foundWords;
+            RedactionAnnotation redactionAnnotation;
+
+            // iterated over each page creating redaction from search terms
+            for (int i = 0, max = document.getNumberOfPages(); i < max; i++) {
+                Page page = document.getPageTree().getPage(i);
+                page.init();
+
+                // search the page
+                foundWords = searchController.searchPage(i);
+                if (foundWords == null) {
+                    System.out.println("No Search terms found");
+                    return;
+                }
+                for (WordText wordText : foundWords) {
+                    final Rectangle tBbox = wordText.getBounds().getBounds();
+
+                    redactionAnnotation = (RedactionAnnotation)
+                            AnnotationFactory.buildAnnotation(
+                                    document.getPageTree().getLibrary(),
+                                    Annotation.SUBTYPE_REDACT,
+                                    tBbox);
+
+                    if (redactionAnnotation != null) {
+                        redactionAnnotation.setColor(Color.BLACK);
+                        redactionAnnotation.setMarkupBounds(new ArrayList<>(Collections.singletonList(tBbox)));
+                        redactionAnnotation.setMarkupPath(new GeneralPath(tBbox));
+                        redactionAnnotation.setBBox(tBbox);
+                        redactionAnnotation.resetAppearanceStream(new AffineTransform());
+                        page.addAnnotation(redactionAnnotation, true);
+                    }
+                }
+            }
+
+            // burn the redaction into the PDF by exporting the document.
+            File file = new File("redacted_output.pdf");
+            try (final FileOutputStream fileOutputStream = new FileOutputStream(file);
+                 final BufferedOutputStream buf = new BufferedOutputStream(fileOutputStream, 8192)) {
+                document.writeToOutputStream(buf, WriteMode.FULL_UPDATE);
+            }
+
+            // clean up resources
+            document.dispose();
+
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
+    }
+
+}
diff --git a/settings.gradle b/settings.gradle
index a51c93327..5f2d39909 100644
--- a/settings.gradle
+++ b/settings.gradle
@@ -22,4 +22,7 @@ include 'core:core-awt',
         'examples:signatures'
 
 
-rootProject.name = 'icepdf'
\ No newline at end of file
+rootProject.name = 'icepdf'
+include 'examples:redaction'
+findProject(':examples:redaction')?.name = 'redaction'
+