diff --git a/repository/src/main/java/org/alfresco/repo/content/metadata/AbstractMappingMetadataExtracter.java b/repository/src/main/java/org/alfresco/repo/content/metadata/AbstractMappingMetadataExtracter.java index 99b4cd1f9a2..09cf091c7e0 100644 --- a/repository/src/main/java/org/alfresco/repo/content/metadata/AbstractMappingMetadataExtracter.java +++ b/repository/src/main/java/org/alfresco/repo/content/metadata/AbstractMappingMetadataExtracter.java @@ -2,7 +2,7 @@ * #%L * Alfresco Repository * %% - * Copyright (C) 2005 - 2020 Alfresco Software Limited + * Copyright (C) 2005 - 2021 Alfresco Software Limited * %% * This file is part of the Alfresco software. * If the software was purchased under a paid Alfresco license, the terms of @@ -1348,8 +1348,9 @@ public void embed( try { - embedInternal(nodeRef, mapSystemToRaw(properties), reader, writer); - if(logger.isDebugEnabled()) + Map metadata = mapSystemToRaw(properties); + embedInternal(nodeRef, metadata, reader, writer); + if (logger.isDebugEnabled()) { logger.debug("Embedded Metadata into " + writer); } @@ -1462,7 +1463,7 @@ private Map mapRawToSystem(Map rawMet * @param systemMetadata Metadata keyed by system properties * @return Returns the metadata keyed by the content file metadata properties */ - private Map mapSystemToRaw(Map systemMetadata) + protected Map mapSystemToRaw(Map systemMetadata) { Map metadataProperties = new HashMap(systemMetadata.size() * 2 + 1); for (Map.Entry entry : systemMetadata.entrySet()) @@ -2261,45 +2262,4 @@ protected void embedInternal(Map metadata, ContentReader r { // TODO make this an abstract method once more extracters support embedding } - - public static Map convertMetadataToStrings(Map properties) - { - Map propertiesAsStrings = new HashMap<>(); - for (String metadataKey : properties.keySet()) - { - Serializable value = properties.get(metadataKey); - if (value == null) - { - continue; - } - if (value instanceof Collection) - { - for (Object singleValue : (Collection) value) - { - try - { - // Convert to a string value - propertiesAsStrings.put(metadataKey, DefaultTypeConverter.INSTANCE.convert(String.class, singleValue)); - } - catch (TypeConversionException e) - { - logger.info("Could not convert " + metadataKey + ": " + e.getMessage()); - } - } - } - else - { - try - { - // Convert to a string value - propertiesAsStrings.put(metadataKey, DefaultTypeConverter.INSTANCE.convert(String.class, value)); - } - catch (TypeConversionException e) - { - logger.info("Could not convert " + metadataKey + ": " + e.getMessage()); - } - } - } - return propertiesAsStrings; - } } diff --git a/repository/src/main/java/org/alfresco/repo/content/metadata/AsynchronousExtractor.java b/repository/src/main/java/org/alfresco/repo/content/metadata/AsynchronousExtractor.java index 4699200d83f..f186060ca1d 100644 --- a/repository/src/main/java/org/alfresco/repo/content/metadata/AsynchronousExtractor.java +++ b/repository/src/main/java/org/alfresco/repo/content/metadata/AsynchronousExtractor.java @@ -2,7 +2,7 @@ * #%L * Alfresco Repository * %% - * Copyright (C) 2020 Alfresco Software Limited + * Copyright (C) 2021 Alfresco Software Limited * %% * This file is part of the Alfresco software. * If the software was purchased under a paid Alfresco license, the terms of @@ -31,7 +31,6 @@ import org.alfresco.model.ContentModel; import org.alfresco.repo.action.executer.ContentMetadataExtracter; import org.alfresco.repo.content.transform.TransformerDebug; -import org.alfresco.repo.rendition2.RenditionDefinitionRegistry2; import org.alfresco.repo.rendition2.RenditionDefinitionRegistry2Impl; import org.alfresco.repo.rendition2.RenditionService2; import org.alfresco.repo.rendition2.TransformDefinition; @@ -44,6 +43,8 @@ import org.alfresco.service.cmr.repository.ContentWriter; import org.alfresco.service.cmr.repository.NodeRef; import org.alfresco.service.cmr.repository.NodeService; +import org.alfresco.service.cmr.repository.datatype.DefaultTypeConverter; +import org.alfresco.service.cmr.repository.datatype.TypeConversionException; import org.alfresco.service.cmr.tagging.TaggingService; import org.alfresco.service.namespace.NamespaceException; import org.alfresco.service.namespace.NamespacePrefixResolver; @@ -59,6 +60,7 @@ import java.io.InputStream; import java.io.Serializable; import java.util.ArrayList; +import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.Iterator; @@ -306,6 +308,64 @@ private String extractMappingToString(Map> map) } } + /** + * As T-Engines do the mapping, all this method can do is convert QNames to fully qualified Strings and the + * values to Strings or a Collection of Strings. + * @param systemMetadata Metadata keyed by system properties + * @return the original map but with QNames turned into Strings. + */ + @Override + protected Map mapSystemToRaw(Map systemMetadata) + { + Map metadataProperties = new HashMap<>(systemMetadata.size()); + for (Map.Entry entry : systemMetadata.entrySet()) + { + Serializable serializableValue = entry.getValue(); + if (serializableValue == null) + { + continue; + } + + QName modelProperty = entry.getKey(); + String key = modelProperty.toString(); + + if (serializableValue instanceof Collection) + { + Collection serializableCollection = (Collection) serializableValue; + ArrayList collection = new ArrayList<>(serializableCollection.size()); + for (Object singleValue : serializableCollection) + { + try + { + String value = DefaultTypeConverter.INSTANCE.convert(String.class, singleValue); + collection.add(value); + } + catch (TypeConversionException e) + { + logger.info("Could not convert " + key + ": " + e.getMessage()); + } + } + if (!collection.isEmpty()) + { + metadataProperties.put(key, collection); + } + } + else + { + try + { + String value = DefaultTypeConverter.INSTANCE.convert(String.class, serializableValue); + metadataProperties.put(key, value); + } + catch (TypeConversionException e) + { + logger.info("Could not convert " + key + ": " + e.getMessage()); + } + } + } + return metadataProperties; + } + @Override protected void embedInternal(NodeRef nodeRef, Map metadata, ContentReader reader, ContentWriter writer) { @@ -475,10 +535,9 @@ private Map readMetadata(InputStream transformInputStream) private String metadataToString(Map metadata) { - Map metadataAsStrings = AbstractMappingMetadataExtracter.convertMetadataToStrings(metadata); try { - return jsonObjectMapper.writeValueAsString(metadataAsStrings); + return jsonObjectMapper.writeValueAsString(metadata); } catch (JsonProcessingException e) { diff --git a/repository/src/test/java/org/alfresco/repo/action/executer/AsynchronousExtractorTest.java b/repository/src/test/java/org/alfresco/repo/action/executer/AsynchronousExtractorTest.java index a470d16b025..1f9db9647d8 100644 --- a/repository/src/test/java/org/alfresco/repo/action/executer/AsynchronousExtractorTest.java +++ b/repository/src/test/java/org/alfresco/repo/action/executer/AsynchronousExtractorTest.java @@ -2,7 +2,7 @@ * #%L * Alfresco Repository * %% - * Copyright (C) 2005 - 2020 Alfresco Software Limited + * Copyright (C) 2005 - 2021 Alfresco Software Limited * %% * This file is part of the Alfresco software. * If the software was purchased under a paid Alfresco license, the terms of @@ -36,7 +36,6 @@ import org.alfresco.repo.content.transform.TransformerDebug; import org.alfresco.repo.content.transform.UnsupportedTransformationException; import org.alfresco.repo.rendition2.RenditionDefinition2; -import org.alfresco.repo.rendition2.RenditionDefinitionRegistry2; import org.alfresco.repo.rendition2.RenditionDefinitionRegistry2Impl; import org.alfresco.repo.rendition2.RenditionService2Impl; import org.alfresco.repo.rendition2.TransformClient; @@ -97,6 +96,7 @@ import static org.alfresco.model.ContentModel.PROP_CREATOR; import static org.alfresco.model.ContentModel.PROP_MODIFIED; import static org.alfresco.model.ContentModel.PROP_MODIFIER; +import static org.alfresco.model.ContentModel.PROP_TITLE; import static org.alfresco.repo.rendition2.RenditionService2Impl.SOURCE_HAS_NO_CONTENT; /** @@ -145,6 +145,7 @@ public class AsynchronousExtractorTest extends BaseSpringTest private Map origProperties; private Map expectedProperties; private Map properties; + private Map transformOptionsPassedToTEngine; private class TestAsynchronousExtractor extends AsynchronousExtractor { @@ -215,8 +216,20 @@ public boolean isEmbedderSupported(String sourceMimetype, long sourceSizeInBytes return true; } + @Override + protected Map mapSystemToRaw(Map systemMetadata) + { + // Add a property value that is a Collection, to ensure we can handle it. + Map metadataWithCollection = new HashMap<>(systemMetadata); + Serializable collection = new ArrayList(Set.of("one", "two", "three")); + metadataWithCollection.put(PROP_TITLE, collection); + + return super.mapSystemToRaw(metadataWithCollection); + } + private void mockTransform(NodeRef sourceNodeRef, RenditionDefinition2 renditionDefinition, int sourceContentHashCode) { + transformOptionsPassedToTEngine = renditionDefinition.getTransformOptions(); try { transformerDebug.pushMisc(); @@ -604,8 +617,26 @@ public void testEmbed() throws Exception File file = new File(resource.toURI()); long fileSize = file.length(); - assertAsyncMetadataExecute(contentMetadataEmbedder, "quick/quick.html", // just replace the pdf with html! + // Replace the source pdf with html so we can see the content change size + assertAsyncMetadataExecute(contentMetadataEmbedder, "quick/quick.html", UNCHANGED_HASHCODE, fileSize, expectedProperties, OverwritePolicy.PRAGMATIC); + + // Check the metadata sent to the T-Engine contains one of the fixed property values. + String metadata = transformOptionsPassedToTEngine.get("metadata"); + System.err.println("METADATA="+metadata); + assertTrue("System properties were not set: simple value", metadata.contains("\"{http://www.alfresco.org/model/content/1.0}creator\":\"System\"")); + + // Check the metadata sent to the T-Engine contains the collection value added by the mockTransform. + // The order of elements in the collection may change, so we cannot use a simple string compare. + int i = metadata.indexOf("\"{http://www.alfresco.org/model/content/1.0}title\":["); + assertTrue("The title is missing: "+metadata, i > 0); + int j = metadata.indexOf(']', i); + assertTrue("No closing ] : "+metadata.substring(i), j > 0); + String collection = metadata.substring(i, j); + assertTrue("There should have 3 elements: "+collection, collection.split(",").length == 3); + assertTrue("\"one\" is missing", collection.contains("\"one\"")); + assertTrue("\"two\" is missing", collection.contains("\"two\"")); + assertTrue("\"three\" is missing", collection.contains("\"three\"")); } @Test diff --git a/repository/src/test/java/org/alfresco/repo/action/executer/ContentMetadataEmbedderTest.java b/repository/src/test/java/org/alfresco/repo/action/executer/ContentMetadataEmbedderTest.java index d5e493616a4..f71a0b29260 100644 --- a/repository/src/test/java/org/alfresco/repo/action/executer/ContentMetadataEmbedderTest.java +++ b/repository/src/test/java/org/alfresco/repo/action/executer/ContentMetadataEmbedderTest.java @@ -2,7 +2,7 @@ * #%L * Alfresco Repository * %% - * Copyright (C) 2005 - 2020 Alfresco Software Limited + * Copyright (C) 2005 - 2021 Alfresco Software Limited * %% * This file is part of the Alfresco software. * If the software was purchased under a paid Alfresco license, the terms of @@ -43,18 +43,12 @@ import org.alfresco.service.namespace.QName; import org.alfresco.util.BaseSpringTest; import org.alfresco.util.GUID; -import org.apache.tika.embedder.Embedder; -import org.apache.tika.metadata.Metadata; -import org.apache.tika.mime.MediaType; -import org.apache.tika.parser.ParseContext; -import org.junit.After; import org.junit.Before; import org.junit.Test; import org.springframework.transaction.annotation.Transactional; +import java.io.File; import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; import java.io.Serializable; import java.util.Arrays; import java.util.Collection; @@ -82,6 +76,8 @@ public class ContentMetadataEmbedderTest extends BaseSpringTest private NodeRef nodeRef; private ContentMetadataEmbedder executer; + private long origSize = -1; + private long newSize = -1; private final static String ID = GUID.generate(); @@ -128,73 +124,81 @@ public void before() throws Exception @Test public void testFailingEmbedder() { - AbstractMappingMetadataExtracter embedder = new FailingMappingMetadataEmbedder(Arrays.asList(MimetypeMap.MIMETYPE_PDF)); + AbstractMappingMetadataExtracter embedder = new MockFailingEmbedder(); + setupEmbedderActionAndThenExecute(embedder); + + assertEquals("The original content should remain unchanged on embed failures", origSize, newSize); + } + + /** + * Test that a successful embedder does change the original content + */ + @Test + public void testSuccessfulEmbedder() + { + AbstractMappingMetadataExtracter embedder = new MockSuccessfulEmbedder(); + setupEmbedderActionAndThenExecute(embedder); + + assertNotSame("The original content should remain unchanged on embed failures", origSize, newSize); + } + + private void setupEmbedderActionAndThenExecute(AbstractMappingMetadataExtracter embedder) + { embedder.setRegistry(metadataExtracterRegistry); - embedder.setDictionaryService(this.dictionaryService); - embedder.setMimetypeService(this.mimetypeService); + embedder.setDictionaryService(dictionaryService); + embedder.setMimetypeService(mimetypeService); embedder.register(); - + String myCreator = "Embedded creator"; // Get the old props - Map props = this.nodeService.getProperties(this.nodeRef); - props.put(ContentModel.PROP_AUTHOR, myCreator); - this.nodeService.setProperties(this.nodeRef, props); + Map origProps = nodeService.getProperties(nodeRef); + origProps.put(ContentModel.PROP_AUTHOR, myCreator); + nodeService.setProperties(nodeRef, origProps); - // Execute the action + // Create the action ActionImpl action = new ActionImpl(null, ID, SetPropertyValueActionExecuter.NAME, null); - - ContentReader origReader = this.contentService.getReader(this.nodeRef, ContentModel.PROP_CONTENT); - long origSize = origReader.getSize(); + + ContentReader origReader = contentService.getReader(nodeRef, ContentModel.PROP_CONTENT); + origSize = origReader.getSize(); assertTrue(origSize > 0); - this.executer.execute(action, this.nodeRef); - - ContentReader embeddedReader = this.contentService.getReader(this.nodeRef, ContentModel.PROP_CONTENT); - - assertEquals("The original content should remain unchanged on embed failures", origSize, embeddedReader.getSize()); + // Execute the action + executer.execute(action, nodeRef); + + ContentReader embeddedReader = contentService.getReader(nodeRef, ContentModel.PROP_CONTENT); + newSize = embeddedReader.getSize(); } - - /** - * Embedder which fails upon calling embed on its {@link FailingEmbedder} - */ - private class FailingMappingMetadataEmbedder extends AbstractMappingMetadataExtracter + + private static class MockFailingEmbedder extends MockEmbedder { - /** - * Constructor for setting supported extract and embed mimetypes - * - * @param mimetypes the supported extract and embed mimetypes - */ - public FailingMappingMetadataEmbedder(Collection mimetypes) + @Override + protected void embedInternal(Map metadata, ContentReader reader, ContentWriter writer) throws Throwable { - super( - new HashSet(mimetypes), - new HashSet(mimetypes)); + throw new IOException("Forced test failure"); } + } + private static class MockSuccessfulEmbedder extends MockEmbedder + { @Override protected void embedInternal(Map metadata, ContentReader reader, ContentWriter writer) throws Throwable { - Embedder embedder = getEmbedder(); - if (embedder == null) - { - return; - } - - Map metadataAsStrings = convertMetadataToStrings(metadata); - Metadata metadataToEmbed = new Metadata(); - metadataAsStrings.forEach((k,v)->metadataToEmbed.add(k, v)); - - InputStream inputStream = reader.getContentInputStream(); - OutputStream outputStream = writer.getContentOutputStream(); - embedder.embed(metadataToEmbed, null, outputStream, null); + // Just set the content as this is testing embedding in the repo rather than via the AsynchronousExtractor and T-Engines. + File htmlFile = AbstractContentTransformerTest.loadQuickTestFile("html"); + writer.putContent(htmlFile); } + } - protected Embedder getEmbedder() + private static class MockEmbedder extends AbstractMappingMetadataExtracter + { + private static final Collection MIMETYPES = Arrays.asList(MimetypeMap.MIMETYPE_PDF); + + public MockEmbedder() { - return new FailingEmbedder(); + super(new HashSet(MIMETYPES), new HashSet(MIMETYPES)); } - + @Override protected Map> readMappingProperties(String propertiesUrl) { @@ -217,26 +221,4 @@ protected Map extractRaw(ContentReader reader) throws Thro return null; } } - - /** - * Metadata embedder which fails on a call to embed. - */ - private class FailingEmbedder implements Embedder - { - private static final long serialVersionUID = -4954679684941467571L; - - @Override - public Set getSupportedEmbedTypes(ParseContext context) - { - return null; - } - - @Override - public void embed(Metadata metadata, InputStream originalStream, OutputStream outputStream, ParseContext context) - throws IOException - { - throw new IOException("Forced failure"); - } - } - }