Skip to content

Commit

Permalink
Keep UTF-8 encoding header if present (#8964)
Browse files Browse the repository at this point in the history
Co-authored-by: Christoph <siedlerkiller@gmail.com>
Co-authored-by: ThiloteE <73715071+ThiloteE@users.noreply.github.com>
  • Loading branch information
3 people committed Jul 9, 2022
1 parent f9093f0 commit 3874aa3
Show file tree
Hide file tree
Showing 8 changed files with 111 additions and 4 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve
- When configured SSL certificates changed, JabRef warns the user to restart to apply the configuration.
- We improved the appearances and logic of the "Manage field names & content" dialog, and renamed it to "Automatic field editor". [#6536](https://github.com/JabRef/jabref/issues/6536)
- We improved the message explaining the options when modifying an automatic keyword group [#8911](https://github.com/JabRef/jabref/issues/8911)
- We moved the preferences option "Warn about duplicates on import" option from the tab "File" to the tab "Import and Export". [kopper#570](https://github.com/koppor/jabref/issues/570)
- We moved the preferences option "Warn about duplicates on import" option from the tab "File" to the tab "Import and Export". [koppor#570](https://github.com/koppor/jabref/issues/570)
- When JabRef encounters `% Encoding: UTF-8` header, it is kept during writing (and not removed). [#8964](https://github.com/JabRef/jabref/pull/8964)

### Fixed

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,11 @@ protected void writeEntryTypeDefinition(BibEntryType customType) throws IOExcept

@Override
protected void writeProlog(BibDatabaseContext bibDatabaseContext, Charset encoding) throws IOException {
if ((encoding == null) || (encoding == StandardCharsets.UTF_8)) {
// We write the encoding if
// - it is provided (!= null)
// - explicitly set in the .bib file OR not equal to UTF_8
// Otherwise, we do not write anything and return
if ((encoding == null) || (!bibDatabaseContext.getMetaData().getEncodingExplicitlySupplied() && (encoding == StandardCharsets.UTF_8))) {
return;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,11 @@ public ParserResult importDatabase(Path filePath) throws IOException {
}

Charset encoding;
boolean encodingExplicitlySupplied;
try (BufferedReader reader = Files.newBufferedReader(filePath, detectedCharset)) {
Optional<Charset> suppliedEncoding = getSuppliedEncoding(reader);
LOGGER.debug("Supplied encoding: {}", suppliedEncoding);
encodingExplicitlySupplied = suppliedEncoding.isPresent();

// in case no encoding information is present, use the detected one
encoding = suppliedEncoding.orElse(detectedCharset);
Expand All @@ -80,6 +82,7 @@ public ParserResult importDatabase(Path filePath) throws IOException {
try (BufferedReader reader = Files.newBufferedReader(filePath, encoding)) {
ParserResult parserResult = this.importDatabase(reader);
parserResult.getMetaData().setEncoding(encoding);
parserResult.getMetaData().setEncodingExplicitlySupplied(encodingExplicitlySupplied);
parserResult.setPath(filePath);
if (parserResult.getMetaData().getMode().isEmpty()) {
parserResult.getMetaData().setMode(BibDatabaseModeDetection.inferMode(parserResult.getDatabase()));
Expand Down
15 changes: 14 additions & 1 deletion src/main/java/org/jabref/model/metadata/MetaData.java
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ public class MetaData {
private final ContentSelectors contentSelectors = new ContentSelectors();
private final Map<String, List<String>> unknownMetaData = new HashMap<>();
private boolean isEventPropagationEnabled = true;
private boolean encodingExplicitlySupplied;

/**
* Constructs an empty metadata.
Expand Down Expand Up @@ -291,6 +292,17 @@ public void setEncoding(Charset encoding, ChangePropagation postChanges) {
}
}

public boolean getEncodingExplicitlySupplied() {
return encodingExplicitlySupplied;
}

/**
* Sets the indication whether the encoding was set using "% Encoding: ..." or whether it was detected "magically"
*/
public void setEncodingExplicitlySupplied(boolean encodingExplicitlySupplied) {
this.encodingExplicitlySupplied = encodingExplicitlySupplied;
}

/**
* If disabled {@link MetaDataChangedEvent} will not be posted.
*/
Expand Down Expand Up @@ -349,6 +361,7 @@ public boolean equals(Object o) {
return (isProtected == metaData.isProtected)
&& Objects.equals(groupsRoot.getValue(), metaData.groupsRoot.getValue())
&& Objects.equals(encoding, metaData.encoding)
&& Objects.equals(encodingExplicitlySupplied, metaData.encodingExplicitlySupplied)
&& Objects.equals(saveOrderConfig, metaData.saveOrderConfig)
&& Objects.equals(citeKeyPatterns, metaData.citeKeyPatterns)
&& Objects.equals(userFileDirectory, metaData.userFileDirectory)
Expand All @@ -362,7 +375,7 @@ public boolean equals(Object o) {

@Override
public int hashCode() {
return Objects.hash(groupsRoot.getValue(), encoding, saveOrderConfig, citeKeyPatterns, userFileDirectory,
return Objects.hash(groupsRoot.getValue(), encoding, encodingExplicitlySupplied, saveOrderConfig, citeKeyPatterns, userFileDirectory,
defaultCiteKeyPattern, saveActions, mode, isProtected, defaultFileDirectory);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,27 @@ void writeEpilogueAndEncoding() throws Exception {
"Test epilog" + OS.NEWLINE, stringWriter.toString());
}

@Test
void utf8EncodingWrittenIfExplicitlyDefined() throws Exception {
metaData.setEncoding(StandardCharsets.UTF_8);
metaData.setEncodingExplicitlySupplied(true);

databaseWriter.savePartOfDatabase(bibtexContext, Collections.emptyList());

assertEquals("% Encoding: UTF-8" + OS.NEWLINE,
stringWriter.toString());
}

@Test
void utf8EncodingNotWrittenIfNotExplicitlyDefined() throws Exception {
metaData.setEncoding(StandardCharsets.UTF_8);
metaData.setEncodingExplicitlySupplied(false);

databaseWriter.savePartOfDatabase(bibtexContext, Collections.emptyList());

assertEquals("", stringWriter.toString());
}

@Test
void writeMetadata() throws Exception {
DatabaseCitationKeyPattern bibtexKeyPattern = new DatabaseCitationKeyPattern(mock(GlobalCitationKeyPattern.class));
Expand Down Expand Up @@ -404,6 +425,44 @@ void roundtripWin1252HeaderKept(@TempDir Path bibFolder) throws Exception {
assertEquals(Files.readString(testFile, charset), Files.readString(file, charset));
}

@Test
void roundtripUtf8HeaderKept(@TempDir Path bibFolder) throws Exception {
Path testFile = Path.of(BibtexImporterTest.class.getResource("encoding-utf-8-with-header-with-databasetypecomment.bib").toURI());
ParserResult result = new BibtexImporter(importFormatPreferences, fileMonitor).importDatabase(testFile);
BibDatabaseContext context = new BibDatabaseContext(result.getDatabase(), result.getMetaData());

Path pathToFile = bibFolder.resolve("JabRef.bib");
Path file = Files.createFile(pathToFile);
Charset charset = StandardCharsets.UTF_8;

try (BufferedWriter fileWriter = Files.newBufferedWriter(file, charset)) {
BibWriter bibWriter = new BibWriter(fileWriter, context.getDatabase().getNewLineSeparator());
BibtexDatabaseWriter databaseWriter = new BibtexDatabaseWriter(bibWriter, generalPreferences, savePreferences, entryTypesManager);
databaseWriter.saveDatabase(context);
}

assertEquals(Files.readString(testFile, charset), Files.readString(file, charset));
}

@Test
void roundtripNotExplicitUtf8HeaderNotInsertedDuringWrite(@TempDir Path bibFolder) throws Exception {
Path testFile = Path.of(BibtexImporterTest.class.getResource("encoding-utf-8-without-header-with-databasetypecomment.bib").toURI());
ParserResult result = new BibtexImporter(importFormatPreferences, fileMonitor).importDatabase(testFile);
BibDatabaseContext context = new BibDatabaseContext(result.getDatabase(), result.getMetaData());

Path pathToFile = bibFolder.resolve("JabRef.bib");
Path file = Files.createFile(pathToFile);
Charset charset = StandardCharsets.UTF_8;

try (BufferedWriter fileWriter = Files.newBufferedWriter(file, charset)) {
BibWriter bibWriter = new BibWriter(fileWriter, context.getDatabase().getNewLineSeparator());
BibtexDatabaseWriter databaseWriter = new BibtexDatabaseWriter(bibWriter, generalPreferences, savePreferences, entryTypesManager);
databaseWriter.saveDatabase(context);
}

assertEquals(Files.readString(testFile, charset), Files.readString(file, charset));
}

@Test
void roundtripWithComplexBib() throws Exception {
Path testBibtexFile = Path.of("src/test/resources/testbib/complex.bib");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import org.mockito.Answers;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.Mockito.mock;

Expand Down Expand Up @@ -161,11 +162,25 @@ public void testParsingOfWindows1252EncodedFileReadsDegreeCharacterCorrectly(Str

@ParameterizedTest
@CsvSource({"encoding-utf-8-with-header.bib", "encoding-utf-8-without-header.bib"})
public void testParsingOfUtf8EncodedFileReadsUmlatCharacterCorrectly(String filename) throws Exception {
public void testParsingOfUtf8EncodedFileReadsUmlautCharacterCorrectly(String filename) throws Exception {
ParserResult parserResult = importer.importDatabase(
Path.of(BibtexImporterTest.class.getResource(filename).toURI()));
assertEquals(
List.of(new BibEntry(StandardEntryType.Article).withField(StandardField.TITLE, "Ü ist ein Umlaut")),
parserResult.getDatabase().getEntries());
}

@Test
public void encodingSupplied() throws Exception {
ParserResult parserResult = importer.importDatabase(
Path.of(BibtexImporterTest.class.getResource("encoding-utf-8-with-header.bib").toURI()));
assertTrue(parserResult.getMetaData().getEncodingExplicitlySupplied());
}

@Test
public void encodingNotSupplied() throws Exception {
ParserResult parserResult = importer.importDatabase(
Path.of(BibtexImporterTest.class.getResource("encoding-utf-8-without-header.bib").toURI()));
assertFalse(parserResult.getMetaData().getEncodingExplicitlySupplied());
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
% Encoding: UTF-8
@article{,
title = {Ü ist ein Umlaut},
}

@Comment{jabref-meta: databaseType:bibtex;}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
@article{,
title = {Ü ist ein Umlaut},
}

@Comment{jabref-meta: databaseType:bibtex;}

0 comments on commit 3874aa3

Please sign in to comment.