Skip to content

Commit

Permalink
Added integrity checker to detect HTML encoded characters in fields
Browse files Browse the repository at this point in the history
  • Loading branch information
oscargus committed Apr 19, 2016
1 parent 990b0e5 commit 406a4d8
Show file tree
Hide file tree
Showing 20 changed files with 68 additions and 0 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ to [sourceforge feature requests](https://sourceforge.net/p/jabref/features/) by
- Added integrity checker for an odd number of unescaped '#'
- Implemented [feature request 384](https://sourceforge.net/p/jabref/features/384): The merge entries dialog now show all text and colored differences between the fields
- Implemented [#1233](https://github.com/JabRef/jabref/issues/1233): Group side pane now takes up all the remaining space
- Added integrity check detecting HTML-encoded characters

### Fixed

Expand Down
24 changes: 24 additions & 0 deletions src/main/java/net/sf/jabref/logic/integrity/IntegrityCheck.java
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ private List<IntegrityMessage> checkBibtexEntry(BibEntry entry) {
result.addAll(new AbbreviationChecker("journal").check(entry));
result.addAll(new AbbreviationChecker("booktitle").check(entry));
result.addAll(new BibStringChecker().check(entry));
result.addAll(new HTMLCharacterChecker().check(entry));

return result;
}
Expand Down Expand Up @@ -355,4 +356,27 @@ public List<IntegrityMessage> check(BibEntry entry) {
}
}

private static class HTMLCharacterChecker implements Checker {

// Detect any HTML encoded character,
private static final Pattern HTML_CHARACTER_PATTERN = Pattern.compile("&[#\\p{Alnum}]+;");


/**
* Checks, if there are any HTML encoded characters in the fields
*/
@Override
public List<IntegrityMessage> check(BibEntry entry) {
List<IntegrityMessage> results = new ArrayList<>();
for (Map.Entry<String, String> field : entry.getFieldMap().entrySet()) {
Matcher characterMatcher = HTML_CHARACTER_PATTERN.matcher(field.getValue());
if (characterMatcher.find()) {
results.add(new IntegrityMessage(Localization.lang("HTML encoded character found"), entry,
field.getKey()));
}
}
return results;
}
}

}
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_da.properties
Original file line number Diff line number Diff line change
Expand Up @@ -1672,3 +1672,5 @@ character=
word=

Show_symmetric_diff=

HTML_encoded_character_found=
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_de.properties
Original file line number Diff line number Diff line change
Expand Up @@ -2390,3 +2390,5 @@ character=
word=

Show_symmetric_diff=

HTML_encoded_character_found=
1 change: 1 addition & 0 deletions src/main/resources/l10n/JabRef_en.properties
Original file line number Diff line number Diff line change
Expand Up @@ -2254,3 +2254,4 @@ character=character
word=word
Show_symmetric_diff=Show_symmetric_diff

HTML_encoded_character_found=HTML_encoded_character_found
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_es.properties
Original file line number Diff line number Diff line change
Expand Up @@ -1573,3 +1573,5 @@ character=
word=

Show_symmetric_diff=

HTML_encoded_character_found=
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_fa.properties
Original file line number Diff line number Diff line change
Expand Up @@ -2360,3 +2360,5 @@ character=
word=

Show_symmetric_diff=

HTML_encoded_character_found=
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_fr.properties
Original file line number Diff line number Diff line change
Expand Up @@ -1616,3 +1616,5 @@ character=
word=

Show_symmetric_diff=

HTML_encoded_character_found=
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_in.properties
Original file line number Diff line number Diff line change
Expand Up @@ -1592,3 +1592,5 @@ character=
word=

Show_symmetric_diff=

HTML_encoded_character_found=
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_it.properties
Original file line number Diff line number Diff line change
Expand Up @@ -1692,3 +1692,5 @@ character=
word=

Show_symmetric_diff=

HTML_encoded_character_found=
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_ja.properties
Original file line number Diff line number Diff line change
Expand Up @@ -2336,3 +2336,5 @@ character=
word=

Show_symmetric_diff=

HTML_encoded_character_found=
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_nl.properties
Original file line number Diff line number Diff line change
Expand Up @@ -2370,3 +2370,5 @@ character=
word=

Show_symmetric_diff=

HTML_encoded_character_found=
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_no.properties
Original file line number Diff line number Diff line change
Expand Up @@ -2766,3 +2766,5 @@ character=
word=

Show_symmetric_diff=

HTML_encoded_character_found=
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_pt_BR.properties
Original file line number Diff line number Diff line change
Expand Up @@ -1586,3 +1586,5 @@ character=
word=

Show_symmetric_diff=

HTML_encoded_character_found=
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_ru.properties
Original file line number Diff line number Diff line change
Expand Up @@ -2337,3 +2337,5 @@ character=
word=

Show_symmetric_diff=

HTML_encoded_character_found=
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_sv.properties
Original file line number Diff line number Diff line change
Expand Up @@ -1530,3 +1530,5 @@ character=bokstav
word=ord

Show_symmetric_diff=Visa_skillnad_symmetriskt

HTML_encoded_character_found=
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_tr.properties
Original file line number Diff line number Diff line change
Expand Up @@ -1604,3 +1604,5 @@ character=
word=

Show_symmetric_diff=

HTML_encoded_character_found=
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_vi.properties
Original file line number Diff line number Diff line change
Expand Up @@ -2361,3 +2361,5 @@ character=
word=

Show_symmetric_diff=

HTML_encoded_character_found=
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_zh.properties
Original file line number Diff line number Diff line change
Expand Up @@ -1598,3 +1598,5 @@ character=
word=

Show_symmetric_diff=

HTML_encoded_character_found=
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,16 @@ public void testBibStringChecks() {
assertWrong(createContext("author", "#einstein# #amp; #newton#"));
}

@Test
public void testHTMLCharacterChecks() {
assertCorrect(createContext("title", "Not a single {HTML} character"));
assertCorrect(createContext("month", "#jan#"));
assertCorrect(createContext("author", "A. Einstein and I. Newton"));
assertWrong(createContext("author", "Lenhard, J&ouml;rg"));
assertWrong(createContext("author", "Lenhard, J&#227;rg"));
assertWrong(createContext("journal", "&Auml;rling Str&ouml;m for &#8211; &#x2031;"));
}

private BibDatabaseContext createContext(String field, String value, String type) {
BibEntry entry = new BibEntry();
entry.setField(field, value);
Expand Down

0 comments on commit 406a4d8

Please sign in to comment.