Skip to content

Commit

Permalink
GH-314 redaction fixes
Browse files Browse the repository at this point in the history
(cherry picked from commit 0e56506)
  • Loading branch information
pcorless authored and Patrick Corless committed Feb 15, 2024
1 parent 3b6dc87 commit eb3c8d5
Show file tree
Hide file tree
Showing 13 changed files with 193 additions and 28 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -172,8 +172,7 @@ public synchronized void init(ContentStreamRedactorCallback contentStreamRedacto
// try and find the form's resources dictionary.
Resources leafResources = library.getResources(entries, RESOURCES_KEY);
// apply parent resource, if the current resources is null
if (leafResources != null) {
} else {
if (leafResources == null) {
leafResources = parentResource;
}
// Build a new content parser for the content streams and apply the
Expand All @@ -184,11 +183,11 @@ public synchronized void init(ContentStreamRedactorCallback contentStreamRedacto
if (in != null) {
try {
logger.log(Level.FINER, () -> "Parsing form " + getPObjectReference());
shapes = cp.parse(Stream.fromByteArray(in, this.getPObjectReference()), null).getShapes();
shapes = cp.parse(Stream.fromByteArray(in, this), null).getShapes();
inited = true;
} catch (InterruptedException e) {
// the initialization was interrupted so we need to make sure we bubble up the exception
// as we need to let any chained forms know so we can invalidate the page correctly
// the initialization was interrupted so, we need to make sure we bubble up the exception
// as we need to let any chained forms know so, we can invalidate the page correctly
shapes = new Shapes();
logger.log(Level.FINE, "Parsing form interrupted parsing Form content stream.", e);
throw new InterruptedException(e.getMessage());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -342,13 +342,13 @@ protected List<String> getNormalisedFilterNames() {
* that aren't specifically stream, but we want to parse some kind of state from the given bytes.
*
* @param contentBytes decompressed bytes to be treated as a stream
* @param reference parent objects reference, can be null
* @param dictionary parent objects to base new stream from
* @return mock stream object
*/
public static Stream[] fromByteArray(byte[] contentBytes, Reference reference) {
Stream stream = new Stream(new DictionaryEntries(), null);
public static Stream[] fromByteArray(byte[] contentBytes, Dictionary dictionary) {
Stream stream = new Stream(dictionary.getEntries(), null);
stream.setRawBytes(contentBytes);
stream.setPObjectReference(reference);
stream.setPObjectReference(dictionary.getPObjectReference());
return new Stream[]{stream};
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -129,8 +129,7 @@ public VariableTextFieldDictionary(Library library, DictionaryEntries entries) {
if (resources != null) {
try {
ContentParser cp = new ContentParser(library, resources);
Stream[] possibleContentStream = Stream.fromByteArray(defaultAppearance.getBytes(),
this.getPObjectReference());
Stream[] possibleContentStream = Stream.fromByteArray(defaultAppearance.getBytes(), this);
cp.parseTextBlocks(possibleContentStream);
GraphicsState gs = cp.getGraphicsState();
if (gs != null) {
Expand Down Expand Up @@ -172,8 +171,7 @@ public String generateDefaultAppearance(String content, Resources resources) {
}
ContentParser cp = new ContentParser(library, resources);
// usefull parser so we parse the font color.
Stream[] possibleContentStream = Stream.fromByteArray(possibleContent.getBytes(),
this.getPObjectReference());
Stream[] possibleContentStream = Stream.fromByteArray(possibleContent.getBytes(), this);
cp.parse(possibleContentStream, null);
GraphicsState gs = cp.getGraphicsState();
if (gs != null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ public void setContentStream(byte[] contentBytes) {
try {
ContentParser cp = new ContentParser(library, resources);
shapes = cp.parse(
Stream.fromByteArray(contentBytes, this.getPObjectReference()),
Stream.fromByteArray(contentBytes, this),
null).getShapes();
} catch (Exception e) {
shapes = new Shapes();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ public void resetAppearanceStream(double dx, double dy, AffineTransform pageTran
try {
Resources resources = form.getResources();
ContentParser cp = new ContentParser(library, resources);
shapes = cp.parse(Stream.fromByteArray(iconContentString.getBytes(), this.getPObjectReference()),
shapes = cp.parse(Stream.fromByteArray(iconContentString.getBytes(), this),
null).getShapes();
} catch (Exception e) {
shapes = new Shapes();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -526,7 +526,9 @@ protected static GraphicsState consume_Do(GraphicsState graphicState, Stack<Obje
// need a new instance, so we don't corrupt the stream offset.
ContentStreamRedactorCallback formContentStreamRedactorCallback = null;
if (contentStreamRedactorCallback != null) {
formContentStreamRedactorCallback = contentStreamRedactorCallback.createChildInstance();
AffineTransform xObjectTransform = graphicState.getCTM();
xObjectTransform.concatenate(formXObject.getMatrix());
formContentStreamRedactorCallback = contentStreamRedactorCallback.createChildInstance(xObjectTransform);
}
formXObject.init(formContentStreamRedactorCallback);
// 2. concatenate matrix entry with the current CTM
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ public static boolean fullyRedacted(ArrayList<GlyphText> glyphTexts) {
return true;
}

public static float writeTj(ByteArrayOutputStream contentOutputStream, ArrayList<TextSprite> textOperators) throws IOException {
float lastTdOffset = 0;
public static float writeTj(ByteArrayOutputStream contentOutputStream, ArrayList<TextSprite> textOperators,
float lastTdOffset) throws IOException {
int operatorCount = 0;
for (TextSprite textSprite : textOperators) {
ArrayList<GlyphText> glyphTexts = textSprite.getGlyphSprites();
Expand Down Expand Up @@ -82,9 +82,9 @@ public static float writeTj(ByteArrayOutputStream contentOutputStream, ArrayList
return lastTdOffset;
}

public static float writeTJ(ByteArrayOutputStream contentOutputStream, ArrayList<TextSprite> textOperators) throws IOException {
public static float writeTJ(ByteArrayOutputStream contentOutputStream, ArrayList<TextSprite> textOperators,
float lastTdOffset) throws IOException {
int operatorCount = 0;
float lastTdOffset = 0;

for (TextSprite textSprite : textOperators) {
ArrayList<GlyphText> glyphTexts = textSprite.getGlyphSprites();
Expand Down Expand Up @@ -129,6 +129,12 @@ public static float writeTJ(ByteArrayOutputStream contentOutputStream, ArrayList
private static float writeLastTdOffset(ByteArrayOutputStream contentOutputStream, float lastTdOffset,
GlyphText glyphText) throws IOException {
float advance = glyphText.getX() + glyphText.getAdvanceX();
// still not sure how to handle this in a 100% of cases as advance can technically be negative
// but if we have a negative glyph advance we likely have a negative font value and should
// treat this as a positive value when writing the advance.
if (glyphText.getAdvanceX() < 0) {
advance = Math.abs(advance);
}
return writeTdOffset(contentOutputStream, advance, lastTdOffset);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import org.icepdf.core.util.redaction.InlineImageWriter;
import org.icepdf.core.util.redaction.StringObjectWriter;

import java.awt.geom.AffineTransform;
import java.awt.geom.GeneralPath;
import java.awt.geom.Rectangle2D;
import java.io.ByteArrayOutputStream;
Expand Down Expand Up @@ -45,17 +46,28 @@ public class ContentStreamRedactorCallback {
private int lastTextPosition;
private float lastTjOffset;
private final Library library;
private final AffineTransform transform;
private boolean modifiedStream;

private final List<RedactionAnnotation> redactionAnnotations;

public ContentStreamRedactorCallback(Library library, List<RedactionAnnotation> redactionAnnotations) {
this.redactionAnnotations = redactionAnnotations;
this.library = library;
this.transform = new AffineTransform();
}

public ContentStreamRedactorCallback createChildInstance() {
return new ContentStreamRedactorCallback(this.library, this.redactionAnnotations);
private ContentStreamRedactorCallback(Library library, List<RedactionAnnotation> redactionAnnotations,
AffineTransform transform) {
this.redactionAnnotations = redactionAnnotations;
this.library = library;
// xObject text will have it's on transform that must be taken into when determining intersections of the
// redaction and glyph bounds.
this.transform = transform;
}

public ContentStreamRedactorCallback createChildInstance(AffineTransform transform) {
return new ContentStreamRedactorCallback(this.library, this.redactionAnnotations, transform);
}

public void startContentStream(Stream stream) throws IOException {
Expand Down Expand Up @@ -102,13 +114,18 @@ public void setLastTokenPosition(int position, Integer token) throws IOException
(position - lastTokenPosition));
lastTokenPosition = position;
} else if (token == T_STAR || token == TD || token == Td) {
// relative operators, so adjust for the redacted content.
writeLastTjOffset();
lastTjOffset = 0;
burnedContentOutputStream.write(originalContentStreamBytes, lastTokenPosition,
(position - lastTokenPosition));
lastTjOffset = 0;
lastTokenPosition = position;
} else if (token == BT) {
} else if (token == BT || token == Tm) {
burnedContentOutputStream.write(originalContentStreamBytes, lastTokenPosition,
(position - lastTokenPosition));
// hard reset, new coordinate system
lastTjOffset = 0;
lastTokenPosition = position;
}
lastTextPosition = position;
}
Expand All @@ -125,7 +142,7 @@ private void writeLastTjOffset() throws IOException {
}

private boolean isTextLayoutToken(int token) {
return token == Tj || token == TJ || token == Td || token == TD || token == T_STAR || token == BT;
return token == Tj || token == TJ || token == Td || token == TD || token == Tm || token == T_STAR || token == BT;
}

/**
Expand All @@ -136,6 +153,7 @@ private boolean isTextLayoutToken(int token) {
public void checkAndRedactText(GlyphText glyphText) {
for (RedactionAnnotation annotation : redactionAnnotations) {
GeneralPath reactionPaths = annotation.getMarkupPath();
glyphText.normalizeToUserSpace(transform, null);
Rectangle2D glyphBounds = glyphText.getBounds();
if (reactionPaths != null && reactionPaths.contains(glyphBounds)) {
logger.finer(() -> "Redacting Text: " + glyphText.getCid() + " " + glyphText.getUnicode());
Expand Down Expand Up @@ -183,9 +201,9 @@ public void writeRedactedStringObject(ArrayList<TextSprite> textOperators, final
if (StringObjectWriter.containsRedactions(textOperators)) {
// apply redaction
if (Operands.TJ == operand) {
lastTjOffset = StringObjectWriter.writeTJ(burnedContentOutputStream, textOperators);
lastTjOffset = StringObjectWriter.writeTJ(burnedContentOutputStream, textOperators, lastTjOffset);
} else {
lastTjOffset = StringObjectWriter.writeTj(burnedContentOutputStream, textOperators);
lastTjOffset = StringObjectWriter.writeTj(burnedContentOutputStream, textOperators, lastTjOffset);
}
modifiedStream = true;
} else {
Expand Down
1 change: 1 addition & 0 deletions examples/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
<module>loadingEvents</module>
<module>printservices</module>
<module>search</module>
<module>redaction</module>
<module>signatures</module>
</modules>

Expand Down
13 changes: 13 additions & 0 deletions examples/redaction/build.gradle
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
plugins {
id 'java'
id 'application'
}

dependencies {
implementation project(':core:core-awt'), project(':viewer:viewer-awt')
}

description 'java redaction example'

mainClassName = "org.icepdf.examples.redaction.RedactionHeadless"
applicationDefaultJvmArgs = ["-Xms64m", "-Xmx1024m"]
22 changes: 22 additions & 0 deletions examples/redaction/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
<?xml version="1.0"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.icepdf.examples</groupId>
<artifactId>examples</artifactId>
<version>7.2.0-SNAPSHOT</version>
</parent>
<artifactId>redaction</artifactId>
<packaging>pom</packaging>
<name>ICEpdf :: Examples :: Redaction</name>
<description>
The ICEpdf redaction examples
</description>

<modules>
<module>component</module>
<module>headless</module>
</modules>

</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
package org.icepdf.examples.redaction;

import org.icepdf.core.pobjects.Document;
import org.icepdf.core.pobjects.Page;
import org.icepdf.core.pobjects.annotations.Annotation;
import org.icepdf.core.pobjects.annotations.AnnotationFactory;
import org.icepdf.core.pobjects.annotations.RedactionAnnotation;
import org.icepdf.core.pobjects.graphics.text.WordText;
import org.icepdf.core.search.DocumentSearchController;
import org.icepdf.core.util.updater.WriteMode;
import org.icepdf.ri.common.search.DocumentSearchControllerImpl;
import org.icepdf.ri.util.FontPropertiesManager;

import java.awt.*;
import java.awt.geom.AffineTransform;
import java.awt.geom.GeneralPath;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.util.ArrayList;
import java.util.Collections;

/**
* The <code>RedactionHeadless</code> class is an example of how to use text search results
* as inputs for the creation of redaction annotations. Once the annotations are created the
* document is exported burning the redaction annotations into the PDFs content streams.
* The resulting document will no longer have text where the Redaction annotations intersected.
*
* @since 7.2.0
*/
public class RedactionHeadless {
public static void main(String[] args) {

FontPropertiesManager.getInstance().loadOrReadSystemFonts();

// Get a file from the command line to open
String filePath = args[0];

// save page captures to file.
float scale = 1.0f;
float rotation = 0f;

// open the document
Document document = new Document();
try {
document.setFile(filePath);

// get the search controller
DocumentSearchController searchController =
new DocumentSearchControllerImpl(document);
// add a specified search terms.
searchController.addSearchTerm("redaction", false, false);

ArrayList<WordText> foundWords;
RedactionAnnotation redactionAnnotation;

// iterated over each page creating redaction from search terms
for (int i = 0, max = document.getNumberOfPages(); i < max; i++) {
Page page = document.getPageTree().getPage(i);
page.init();

// search the page
foundWords = searchController.searchPage(i);
if (foundWords == null) {
System.out.println("No Search terms found");
return;
}
for (WordText wordText : foundWords) {
final Rectangle tBbox = wordText.getBounds().getBounds();

redactionAnnotation = (RedactionAnnotation)
AnnotationFactory.buildAnnotation(
document.getPageTree().getLibrary(),
Annotation.SUBTYPE_REDACT,
tBbox);

if (redactionAnnotation != null) {
redactionAnnotation.setColor(Color.BLACK);
redactionAnnotation.setMarkupBounds(new ArrayList<>(Collections.singletonList(tBbox)));
redactionAnnotation.setMarkupPath(new GeneralPath(tBbox));
redactionAnnotation.setBBox(tBbox);
redactionAnnotation.resetAppearanceStream(new AffineTransform());
page.addAnnotation(redactionAnnotation, true);
}
}
}

// burn the redaction into the PDF by exporting the document.
File file = new File("redacted_output.pdf");
try (final FileOutputStream fileOutputStream = new FileOutputStream(file);
final BufferedOutputStream buf = new BufferedOutputStream(fileOutputStream, 8192)) {
document.writeToOutputStream(buf, WriteMode.FULL_UPDATE);
}

// clean up resources
document.dispose();

} catch (Exception e) {
e.printStackTrace();
}
}

}
5 changes: 4 additions & 1 deletion settings.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,7 @@ include 'core:core-awt',
'examples:signatures'


rootProject.name = 'icepdf'
rootProject.name = 'icepdf'
include 'examples:redaction'
findProject(':examples:redaction')?.name = 'redaction'

0 comments on commit eb3c8d5

Please sign in to comment.