Skip to content

Commit

Permalink
Improve author parsing (#4931)
Browse files Browse the repository at this point in the history
Fixes #4864. The author class added an additional space when converting "A O" to the dotted version "A. O.".
  • Loading branch information
tobiasdiez authored and Siedlerchr committed Apr 30, 2019
1 parent 63634b0 commit e73dd2e
Show file tree
Hide file tree
Showing 5 changed files with 36 additions and 41 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ We refer to [GitHub issues](https://github.com/JabRef/jabref/issues) by using `#
- We fixed an issue where a non-existing aux file in a group made it impossible to open the library. [#4735](https://github.com/JabRef/jabref/issues/4735)
- We fixed an issue where some journal names were wrongly marked as abbreviated. [#4115](https://github.com/JabRef/jabref/issues/4115)
- We fixed an issue where the custom file column were sorted incorrectly. https://github.com/JabRef/jabref/issues/3119
- We improved the parsing of author names whose infix is abbreviated without a dot. [#4864](https://github.com/JabRef/jabref/issues/4864)
- We fixed an issues where the entry losses focus when a field is edited and at the same time used for sorting. https://github.com/JabRef/jabref/issues/3373
- We fixed an issue where the menu on Mac OS was not displayed in the usual Mac-specific way. https://github.com/JabRef/jabref/issues/3146
- We improved the integrity check for page numbers. [#4113](https://github.com/JabRef/jabref/issues/4113) and [feature request in the forum](http://discourse.jabref.org/t/pages-field-allow-use-of-en-dash/1199)
Expand Down
16 changes: 9 additions & 7 deletions src/main/java/org/jabref/model/entry/Author.java
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@ public Author(String first, String firstabbr, String von, String last, String jr
}

public static String addDotIfAbbreviation(String name) {
// Avoid arrayindexoutof.... :
if ((name == null) || name.isEmpty()) {
return name;
}
Expand Down Expand Up @@ -114,23 +113,26 @@ public static String addDotIfAbbreviation(String name) {
// AA -> A. A.
// Only append ". " if the rest of the 'word' is uppercase
boolean nextWordIsUppercase = true;
char furtherChar = Character.MIN_VALUE;
for (int j = i + 1; j < name.length(); j++) {
char furtherChar = name.charAt(j);
if (Character.isWhitespace(furtherChar) || (furtherChar == '-') || (furtherChar == '~')
|| (furtherChar == '.')) {
furtherChar = name.charAt(j);
if (Character.isWhitespace(furtherChar) || (furtherChar == '-') || (furtherChar == '~') || (furtherChar == '.')) {
// end of word
break;
}

boolean furtherIsUppercaseLetter = Character.isLetter(furtherChar)
&& Character.isUpperCase(furtherChar);
boolean furtherIsUppercaseLetter = Character.isLetter(furtherChar) && Character.isUpperCase(furtherChar);
if (!furtherIsUppercaseLetter) {
nextWordIsUppercase = false;
break;
}
}
if (nextWordIsUppercase) {
sb.append(". ");
if (Character.isWhitespace(furtherChar)) {
sb.append(".");
} else {
sb.append(". ");
}
}
}

Expand Down
13 changes: 4 additions & 9 deletions src/main/java/org/jabref/model/entry/AuthorListParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,6 @@ public class AuthorListParser {
* @return a parsed list of persons
*/
public AuthorList parse(String listOfNames) {

Objects.requireNonNull(listOfNames);

// initialization of parser
Expand All @@ -103,7 +102,6 @@ public AuthorList parse(String listOfNames) {
* empty.
*/
private Optional<Author> getAuthor() {

List<Object> tokens = new ArrayList<>(); // initialization
int vonStart = -1;
int lastStart = -1;
Expand Down Expand Up @@ -154,7 +152,7 @@ private Optional<Author> getAuthor() {
vonStart = tokens.size() - TOKEN_GROUP_LENGTH;
break;
}
} else if ((lastStart < 0) && tokenCase) {
} else if (tokenCase) {
lastStart = tokens.size() - TOKEN_GROUP_LENGTH;
break;
}
Expand Down Expand Up @@ -255,13 +253,10 @@ private Optional<Author> getAuthor() {
}

// Third step: do actual splitting, construct Author object
String firstPart = firstPartStart < 0 ? null : concatTokens(tokens, firstPartStart, firstPartEnd, OFFSET_TOKEN,
false);
String firstAbbr = firstPartStart < 0 ? null : concatTokens(tokens, firstPartStart, firstPartEnd,
OFFSET_TOKEN_ABBR, true);
String firstPart = firstPartStart < 0 ? null : concatTokens(tokens, firstPartStart, firstPartEnd, OFFSET_TOKEN, false);
String firstAbbr = firstPartStart < 0 ? null : concatTokens(tokens, firstPartStart, firstPartEnd, OFFSET_TOKEN_ABBR, true);
String vonPart = vonPartStart < 0 ? null : concatTokens(tokens, vonPartStart, vonPartEnd, OFFSET_TOKEN, false);
String lastPart = lastPartStart < 0 ? null : concatTokens(tokens, lastPartStart, lastPartEnd, OFFSET_TOKEN,
false);
String lastPart = lastPartStart < 0 ? null : concatTokens(tokens, lastPartStart, lastPartEnd, OFFSET_TOKEN, false);
String jrPart = jrPartStart < 0 ? null : concatTokens(tokens, jrPartStart, jrPartEnd, OFFSET_TOKEN, false);

if ((firstPart != null) && (lastPart != null) && lastPart.equals(lastPart.toUpperCase(Locale.ROOT)) && (lastPart.length() < 5)
Expand Down
33 changes: 13 additions & 20 deletions src/test/java/org/jabref/model/entry/AuthorListParameterTest.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
package org.jabref.model.entry;

import java.util.Arrays;
import java.util.stream.Stream;

import org.junit.jupiter.params.ParameterizedTest;
Expand All @@ -9,32 +8,26 @@

import static org.junit.jupiter.api.Assertions.assertEquals;

public class AuthorListParameterTest {
class AuthorListParameterTest {

private static Stream<Arguments> data() {

return Stream.of(
Arguments.of("王, 军", authorList(new Author("军", "军.", null, "王", null))),
Arguments.of("Doe, John", authorList(new Author("John", "J.", null, "Doe", null))),
Arguments.of("von Berlichingen zu Hornberg, Johann Gottfried",
authorList(new Author("Johann Gottfried", "J. G.", "von", "Berlichingen zu Hornberg", null))),
//Arguments.of("Robert and Sons, Inc.", authorList(new Author(null, null, null, "Robert and Sons, Inc.", null))),
//Arguments.of("al-Ṣāliḥ, Abdallāh", authorList(new Author("Abdallāh", "A.", null, "al-Ṣāliḥ", null))),
Arguments.of("de la Vallée Poussin, Jean Charles Gabriel",
authorList(new Author("Jean Charles Gabriel", "J. C. G.", "de la", "Vallée Poussin", null))),
Arguments.of("de la Vallée Poussin, J. C. G.",
authorList(new Author("J. C. G.", "J. C. G.", "de la", "Vallée Poussin", null))),
Arguments.of("{K}ent-{B}oswell, E. S.", authorList(new Author("E. S.", "E. S.", null, "{K}ent-{B}oswell", null))));
}

private static AuthorList authorList(Author author) {
return new AuthorList(Arrays.asList(author));
Arguments.of("王, 军", new Author("军", "军.", null, "王", null)),
Arguments.of("Doe, John", new Author("John", "J.", null, "Doe", null)),
Arguments.of("von Berlichingen zu Hornberg, Johann Gottfried", new Author("Johann Gottfried", "J. G.", "von", "Berlichingen zu Hornberg", null)),
//Arguments.of("Robert and Sons, Inc.", new Author(null, null, null, "Robert and Sons, Inc.", null))),
//Arguments.of("al-Ṣāliḥ, Abdallāh", new Author("Abdallāh", "A.", null, "al-Ṣāliḥ", null))),
Arguments.of("de la Vallée Poussin, Jean Charles Gabriel", new Author("Jean Charles Gabriel", "J. C. G.", "de la", "Vallée Poussin", null)),
Arguments.of("de la Vallée Poussin, J. C. G.", new Author("J. C. G.", "J. C. G.", "de la", "Vallée Poussin", null)),
Arguments.of("{K}ent-{B}oswell, E. S.", new Author("E. S.", "E. S.", null, "{K}ent-{B}oswell", null)),
Arguments.of("Uhlenhaut, N Henriette", new Author("N Henriette", "N. H.", null, "Uhlenhaut", null))
);
}

@ParameterizedTest
@MethodSource("data")
void parseCorrectly(String authorsString, AuthorList authorsParsed) {
void parseCorrectly(String authorsString, Author authorsParsed) {
AuthorListParser parser = new AuthorListParser();
assertEquals(authorsParsed, parser.parse(authorsString));
assertEquals(new AuthorList(authorsParsed), parser.parse(authorsString));
}
}
14 changes: 9 additions & 5 deletions src/test/java/org/jabref/model/entry/AuthorTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@

import static org.junit.jupiter.api.Assertions.assertEquals;

public class AuthorTest {
class AuthorTest {

@Test
public void addDotIfAbbreviationAddDot() {
void addDotIfAbbreviationAddDot() {
assertEquals("O.", Author.addDotIfAbbreviation("O"));
assertEquals("A. O.", Author.addDotIfAbbreviation("AO"));
assertEquals("A. O.", Author.addDotIfAbbreviation("AO."));
Expand All @@ -16,7 +16,12 @@ public void addDotIfAbbreviationAddDot() {
}

@Test
public void addDotIfAbbreviationDoNotAddDot() {
void addDotIfAbbreviationDoesNotAddMultipleSpaces() {
assertEquals("A. O.", Author.addDotIfAbbreviation("A O"));
}

@Test
void addDotIfAbbreviationDoNotAddDot() {
assertEquals("O.", Author.addDotIfAbbreviation("O."));
assertEquals("A. O.", Author.addDotIfAbbreviation("A. O."));
assertEquals("A.-O.", Author.addDotIfAbbreviation("A.-O."));
Expand All @@ -32,7 +37,6 @@ public void addDotIfAbbreviationDoNotAddDot() {
assertEquals("{\\'{E}}douard", Author.addDotIfAbbreviation("{\\'{E}}douard"));
assertEquals("J{\\\"o}rg", Author.addDotIfAbbreviation("J{\\\"o}rg"));
assertEquals("Moore, O. and O. Moore", Author.addDotIfAbbreviation("Moore, O. and O. Moore"));
assertEquals("Moore, O. and O. Moore and Moore, O. O.",
Author.addDotIfAbbreviation("Moore, O. and O. Moore and Moore, O. O."));
assertEquals("Moore, O. and O. Moore and Moore, O. O.", Author.addDotIfAbbreviation("Moore, O. and O. Moore and Moore, O. O."));
}
}

0 comments on commit e73dd2e

Please sign in to comment.