Skip to content

Commit

Permalink
GH-314 image and text redaction support (#335)
Browse files Browse the repository at this point in the history
  • Loading branch information
pcorless committed Feb 6, 2024
1 parent eea10fc commit 3f345f7
Show file tree
Hide file tree
Showing 133 changed files with 4,903 additions and 745 deletions.
5 changes: 5 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Set the default behavior, in case people don't have core.autocrlf set.
* text=auto
# Denote all files that are truly binary and should not be modified.
# stop messing up test data.
*.pdf binary
31 changes: 26 additions & 5 deletions core/core-awt/src/main/java/org/icepdf/core/pobjects/Document.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import org.icepdf.core.pobjects.acroform.FieldDictionary;
import org.icepdf.core.pobjects.acroform.InteractiveForm;
import org.icepdf.core.pobjects.annotations.AbstractWidgetAnnotation;
import org.icepdf.core.pobjects.annotations.RedactionAnnotation;
import org.icepdf.core.pobjects.graphics.WatermarkCallback;
import org.icepdf.core.pobjects.graphics.images.ImageUtility;
import org.icepdf.core.pobjects.graphics.text.PageText;
Expand Down Expand Up @@ -567,7 +568,7 @@ public void dispose() {
* @return The length of the PDF file copied
* @throws IOException if there is some problem reading or writing the PDF data
*/
public long writeToOutputStream(OutputStream out) throws IOException {
public long writeToOutputStream(OutputStream out) throws IOException, InterruptedException {
return writeToOutputStream(out, WriteMode.INCREMENT_UPDATE);
}

Expand All @@ -581,7 +582,7 @@ public long writeToOutputStream(OutputStream out) throws IOException {
* @return The length of the PDF file copied
* @throws IOException if there is some problem reading or writing the PDF data
*/
public long writeToOutputStream(OutputStream out, WriteMode writeMode) throws IOException {
public long writeToOutputStream(OutputStream out, WriteMode writeMode) throws IOException, InterruptedException {
if (documentFileChannel != null) {
synchronized (library.getMappedFileByteBufferLock()) {
ByteBuffer documentByteBuffer = library.getMappedFileByteBuffer();
Expand Down Expand Up @@ -620,7 +621,7 @@ public long writeToOutputStream(OutputStream out, WriteMode writeMode) throws IO
* @return The length of the PDF file saved
* @throws IOException if there is some problem reading or writing the PDF data
*/
public long saveToOutputStream(OutputStream out) throws IOException {
public long saveToOutputStream(OutputStream out) throws IOException, InterruptedException {
return writeToOutputStream(out, WriteMode.INCREMENT_UPDATE);
}

Expand All @@ -633,7 +634,7 @@ public long saveToOutputStream(OutputStream out) throws IOException {
* @return The length of the PDF file saved
* @throws IOException if there is some problem reading or writing the PDF data
*/
public long saveToOutputStream(OutputStream out, WriteMode writeMode) throws IOException {
public long saveToOutputStream(OutputStream out, WriteMode writeMode) throws IOException, InterruptedException {
return writeToOutputStream(out, writeMode);
}

Expand Down Expand Up @@ -720,6 +721,26 @@ public PageText getPageViewText(int pageNumber) throws InterruptedException {
}
}

public boolean hasRedactions() {
// check state manager first as this will be a bit cheaper than scanning each page in the document.
if (stateManager.hasRedactions()) {
return true;
} else {
PageTree pageTree = catalog.getPageTree();
Page page;
List<RedactionAnnotation> redactions;
for (int i = 0, max = pageTree.getNumberOfPages(); i < max; i++) {
page = pageTree.getPage(i);
redactions = page.getRedactionAnnotations();
if (redactions != null && redactions.size() > 1) {
return true;
}
}
return false;
}
}


/**
* Gets the security manager for this document. If the document has no
* security manager null is returned.
Expand Down Expand Up @@ -883,7 +904,7 @@ public Catalog getCatalog() {
}

/**
* Sets the caching mode when handling file loaded by an URI. If enabled
* Sets the caching mode when handling file loaded by a URI. If enabled
* URI streams will be cached to disk, otherwise they will be stored in
* memory. This method must be set before a call to setByteArray() or
* setInputStream() is called.
Expand Down
13 changes: 10 additions & 3 deletions core/core-awt/src/main/java/org/icepdf/core/pobjects/Form.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import org.icepdf.core.pobjects.graphics.Shapes;
import org.icepdf.core.util.Library;
import org.icepdf.core.util.parser.content.ContentParser;
import org.icepdf.core.util.updater.callbacks.ContentStreamRedactorCallback;

import java.awt.geom.AffineTransform;
import java.awt.geom.Rectangle2D;
Expand Down Expand Up @@ -148,10 +149,16 @@ public void setParentResources(Resources parentResource) {
this.parentResource = parentResource;
}


public synchronized void init() throws InterruptedException {
init(null);
}

/**
*
*/
public synchronized void init() throws InterruptedException {
public synchronized void init(ContentStreamRedactorCallback contentStreamRedactorCallback)
throws InterruptedException {
if (inited) {
return;
}
Expand All @@ -171,13 +178,13 @@ public synchronized void init() throws InterruptedException {
}
// Build a new content parser for the content streams and apply the
// content stream of the calling content stream.
ContentParser cp = new ContentParser(library, leafResources);
ContentParser cp = new ContentParser(library, leafResources, contentStreamRedactorCallback);
cp.setGraphicsState(graphicsState);
byte[] in = getDecodedStreamBytes();
if (in != null) {
try {
logger.log(Level.FINER, () -> "Parsing form " + getPObjectReference());
shapes = cp.parse(new byte[][]{in}, new Reference[]{this.getPObjectReference()}, null).getShapes();
shapes = cp.parse(Stream.fromByteArray(in, this.getPObjectReference()), null).getShapes();
inited = true;
} catch (InterruptedException e) {
// the initialization was interrupted so we need to make sure we bubble up the exception
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -165,15 +165,15 @@ public String getLiteralString() {

/**
* <p>Gets a literal String representation of this object's data using the
* specifed font and format. The font is used to verify that the
* specific character codes can be rendered; if they cannot they may be
* specified font and format. The font is used to verify that the
* specific character codes can be rendered; if they cannot, they may be
* removed or combined with the next character code to get a displayable
* character code.
*
* @param fontFormat the type of pdf font which will be used to display
* the text. Valid values are CID_FORMAT and SIMPLE_FORMAT for Adobe
* Composite and Simple font types respectively
* @param font font used to render the the literal string data.
* @param font font used to render the literal string data.
* @return StringBuffer which contains all renderable characters for the
* given font.
*/
Expand Down Expand Up @@ -217,7 +217,7 @@ public StringBuilder getLiteralStringBuffer(final int fontFormat, FontFile font)
}

/**
* The length of the the underlying object's data.
* The length of the underlying object's data.
*
* @return length of objcts data.
*/
Expand Down
88 changes: 56 additions & 32 deletions core/core-awt/src/main/java/org/icepdf/core/pobjects/Page.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,7 @@

import org.icepdf.core.events.*;
import org.icepdf.core.io.SeekableInput;
import org.icepdf.core.pobjects.annotations.Annotation;
import org.icepdf.core.pobjects.annotations.MarkupAnnotation;
import org.icepdf.core.pobjects.annotations.MarkupGlueAnnotation;
import org.icepdf.core.pobjects.annotations.PopupAnnotation;
import org.icepdf.core.pobjects.annotations.*;
import org.icepdf.core.pobjects.graphics.Shapes;
import org.icepdf.core.pobjects.graphics.WatermarkCallback;
import org.icepdf.core.pobjects.graphics.text.GlyphText;
Expand All @@ -29,6 +26,7 @@
import org.icepdf.core.pobjects.graphics.text.WordText;
import org.icepdf.core.util.*;
import org.icepdf.core.util.parser.content.ContentParser;
import org.icepdf.core.util.updater.callbacks.ContentStreamRedactorCallback;
import org.icepdf.core.util.updater.modifiers.AnnotationRemovalModifier;
import org.icepdf.core.util.updater.modifiers.ModifierFactory;

Expand Down Expand Up @@ -333,7 +331,7 @@ else if (annotObj instanceof DictionaryEntries) { // HashMap lacks "Type"->"Anno
if (creator.equals(SystemProperties.USER_NAME)) {
annotations.add(a);
} else {
// other wise we skip it all together but make sure the popup is hidden.
// otherwise we skip it all together but make sure the popup is hidden.
if (markupAnnotation.getPopupAnnotation() != null) {
markupAnnotation.getPopupAnnotation().setOpen(false);
}
Expand All @@ -352,7 +350,7 @@ else if (annotObj instanceof DictionaryEntries) { // HashMap lacks "Type"->"Anno
logger.log(Level.WARNING, e, () -> " " + finalA.getPObjectReference() + finalA.getEntries());
}
}
//The popup annotations may not be referenced in the page annotations entry, we have to add them manually.
// The popup annotations may not be referenced in the page annotations entry, we have to add them manually.
final Set<Annotation> annotSet = new HashSet<>(annotations);
for (final Annotation annot : annotSet) {
if (annot instanceof MarkupAnnotation) {
Expand Down Expand Up @@ -380,6 +378,15 @@ public void resetInitializedState() {
* child elements. Once a page has been initialized, it can be painted.
*/
public synchronized void init() throws InterruptedException {
init(null);
}

/**
* Initialize the Page object. This method triggers the parsing of a page's
* child elements. Once a page has been initialized, it can be painted.
* @param contentStreamRedactorCallback callback use to rewrite content stream
*/
public synchronized void init(ContentStreamRedactorCallback contentStreamRedactorCallback) throws InterruptedException {
try {
// make sure we are not revisiting this method
if (inited) {
Expand Down Expand Up @@ -407,34 +414,31 @@ public synchronized void init() throws InterruptedException {
}

/*
Finally iterate through the contents vector and concat all of the
the resource streams together so that the content parser can
go to town and build all of the page's shapes.
Finally iterate through the contents vector and concat all the
resource streams together so that the content parser can
go to town and build all the page's shapes.
*/
notifyPageInitializationStarted();
if (contents != null) {
try {
ContentParser cp = new ContentParser(library, resources);
byte[][] streams = new byte[contents.size()][];
byte[] stream;
Reference[] references = new Reference[contents.size()];
ContentParser cp = new ContentParser(library, resources, contentStreamRedactorCallback);
Stream[] streams = new Stream[contents.size()];
byte[] streamByte;
for (int i = 0, max = contents.size(); i < max; i++) {
stream = contents.get(i).getDecodedStreamBytes();
if (stream != null) {
streams[i] = stream;
references[i] = contents.get(i).pObjectReference;
streamByte = contents.get(i).getDecodedStreamBytes();
if (streamByte != null) {
streams[i] = contents.get(i);
}
}
// get any optional groups from the catalog, which control
// visibility
// get any optional groups from the catalog, which control visibility
OptionalContent optionalContent = library.getCatalog().getOptionalContent();
if (optionalContent != null) {
optionalContent.init();
}

// pass in option group references into parse.
if (streams.length > 0) {
shapes = cp.parse(streams, references, this).getShapes();
shapes = cp.parse(streams, this).getShapes();
}
// set the initiated flag, first as there are couple corner
// cases where the content parsing can call page.init() again
Expand All @@ -455,14 +459,18 @@ public synchronized void init() throws InterruptedException {
logger.log(Level.WARNING, "Error initializing Page, no page content.");
}
} catch (InterruptedException e) {
// keeps shapes vector so we can paint what we have but make init state as false
// so we can try to re parse it later.
// keeps shapes vector so that we can paint what we have but make init state as false
// so that we can try to reparse it later.
inited = false;
throw new InterruptedException(e.getMessage());
}
notifyPageInitializationEnded(inited);
}

public List<Stream> getContentStreams() {
return contents;
}

/**
* Gets a Thumbnail object associated with this page. If no Thumbnail
* entry exists then null is returned.
Expand Down Expand Up @@ -679,6 +687,7 @@ private void paintPageContent(Graphics2D g2, int renderHintType, float userRotat
}
}
}
g2.setComposite(AlphaComposite.getInstance(AlphaComposite.SRC_OVER, 1.0f));
//g2.setComposite(BlendComposite.getInstance(BlendComposite.BlendingMode.NORMAL, 1.0f));
}
}
Expand Down Expand Up @@ -1296,9 +1305,27 @@ public List<Annotation> getAnnotations() {
return annotations;
}

public List<RedactionAnnotation> getRedactionAnnotations() {
if (annotations == null) {
try {
initPageAnnotations();
} catch (InterruptedException e) {
logger.finer("Interrupt exception getting annotations. ");
}
}
// todo make this method more generic to any Annotation subtype
if (annotations != null) {
return annotations.stream()
.filter(RedactionAnnotation.class::isInstance)
.map(RedactionAnnotation.class::cast)
.collect(Collectors.toList());
}
return null;
}

/**
* Returns the decoded content stream for this page instance. A page instance
* can have more then one content stream associated with it.
* can have more than one content stream associated with it.
*
* @return An array of decoded content stream. Each index in the array
* represents one content stream. Null return and null String array
Expand Down Expand Up @@ -1379,7 +1406,7 @@ public Rectangle2D.Float getCropBox() {
if (cropBox != null) {
return cropBox;
}
// add all of the pages crop box dimensions to a vector and process
// add all the pages crop box dimensions to a vector and process
List boxDimensions = (List) (library.getObject(entries, CROPBOX_KEY));
if (boxDimensions != null) {
cropBox = new PRectangle(boxDimensions);
Expand Down Expand Up @@ -1532,9 +1559,9 @@ public synchronized PageText getText() throws InterruptedException {
Shapes textBlockShapes = new Shapes();

/*
Finally iterate through the contents vector and concat all of the
the resource streams together so that the content parser can
go to town and build all of the pages shapes.
Finally iterate through the contents array and concat all the
resource streams together so that the content parser can
go to town and build all the pages shapes.
*/
if (contents == null) {
// Get the value of the page's content entry
Expand All @@ -1547,12 +1574,9 @@ public synchronized PageText getText() throws InterruptedException {
}
if (contents != null) {
try {

ContentParser cp = new ContentParser(library, resources);
byte[][] streams = new byte[contents.size()][];
for (int i = 0, max = contents.size(); i < max; i++) {
streams[i] = contents.get(i).getDecodedStreamBytes();
}
Stream[] streams = new Stream[contents.size()];
contents.toArray(streams);
textBlockShapes = cp.parseTextBlocks(streams);
// print off any fuzz left on the stack
if (logger.isLoggable(Level.FINER)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,10 @@ public Object getXObject(Name s) {
return library.getObject(xobjects, s);
}

public DictionaryEntries getXObjects() {
return xobjects;
}

/**
* Gets a rough count of the images resources associated with this page. Does
* not include inline images.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
*/
package org.icepdf.core.pobjects;

import org.icepdf.core.pobjects.annotations.RedactionAnnotation;
import org.icepdf.core.pobjects.structure.CrossReferenceRoot;

import java.util.*;
Expand Down Expand Up @@ -205,6 +206,18 @@ public CrossReferenceRoot getCrossReferenceRoot() {
return crossReferenceRoot;
}

public boolean hasRedactions() {
if (changes.isEmpty()) return false;
Collection<Change> changesValues = changes.values();
for (Change change : changesValues) {
Object object = change.getPObject().getObject();
if (object instanceof RedactionAnnotation) {
return true;
}
}
return false;
}

private static class PObjectComparatorByReferenceObjectNumber
implements Comparator<Change> {
public int compare(Change a, Change b) {
Expand Down
Loading

0 comments on commit 3f345f7

Please sign in to comment.