Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added integrity checker to detect HTML encoded characters in fields #1238

Merged
merged 1 commit into from
Apr 19, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ to [sourceforge feature requests](https://sourceforge.net/p/jabref/features/) by
- Added integrity checker for an odd number of unescaped '#'
- Implemented [feature request 384](https://sourceforge.net/p/jabref/features/384): The merge entries dialog now show all text and colored differences between the fields
- Implemented [#1233](https://github.com/JabRef/jabref/issues/1233): Group side pane now takes up all the remaining space
- Added integrity check detecting HTML-encoded characters

### Fixed

Expand Down
24 changes: 24 additions & 0 deletions src/main/java/net/sf/jabref/logic/integrity/IntegrityCheck.java
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ private List<IntegrityMessage> checkBibtexEntry(BibEntry entry) {
result.addAll(new AbbreviationChecker("journal").check(entry));
result.addAll(new AbbreviationChecker("booktitle").check(entry));
result.addAll(new BibStringChecker().check(entry));
result.addAll(new HTMLCharacterChecker().check(entry));

return result;
}
Expand Down Expand Up @@ -355,4 +356,27 @@ public List<IntegrityMessage> check(BibEntry entry) {
}
}

private static class HTMLCharacterChecker implements Checker {

// Detect any HTML encoded character,
private static final Pattern HTML_CHARACTER_PATTERN = Pattern.compile("&[#\\p{Alnum}]+;");


/**
* Checks, if there are any HTML encoded characters in the fields
*/
@Override
public List<IntegrityMessage> check(BibEntry entry) {
List<IntegrityMessage> results = new ArrayList<>();
for (Map.Entry<String, String> field : entry.getFieldMap().entrySet()) {
Matcher characterMatcher = HTML_CHARACTER_PATTERN.matcher(field.getValue());
if (characterMatcher.find()) {
results.add(new IntegrityMessage(Localization.lang("HTML encoded character found"), entry,
field.getKey()));
}
}
return results;
}
}

}
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_da.properties
Original file line number Diff line number Diff line change
Expand Up @@ -1672,3 +1672,5 @@ character=
word=

Show_symmetric_diff=

HTML_encoded_character_found=
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_de.properties
Original file line number Diff line number Diff line change
Expand Up @@ -2390,3 +2390,5 @@ character=
word=

Show_symmetric_diff=

HTML_encoded_character_found=
1 change: 1 addition & 0 deletions src/main/resources/l10n/JabRef_en.properties
Original file line number Diff line number Diff line change
Expand Up @@ -2254,3 +2254,4 @@ character=character
word=word
Show_symmetric_diff=Show_symmetric_diff

HTML_encoded_character_found=HTML_encoded_character_found
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_es.properties
Original file line number Diff line number Diff line change
Expand Up @@ -1573,3 +1573,5 @@ character=
word=

Show_symmetric_diff=

HTML_encoded_character_found=
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_fa.properties
Original file line number Diff line number Diff line change
Expand Up @@ -2360,3 +2360,5 @@ character=
word=

Show_symmetric_diff=

HTML_encoded_character_found=
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_fr.properties
Original file line number Diff line number Diff line change
Expand Up @@ -1616,3 +1616,5 @@ character=
word=

Show_symmetric_diff=

HTML_encoded_character_found=
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_in.properties
Original file line number Diff line number Diff line change
Expand Up @@ -1592,3 +1592,5 @@ character=
word=

Show_symmetric_diff=

HTML_encoded_character_found=
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_it.properties
Original file line number Diff line number Diff line change
Expand Up @@ -1692,3 +1692,5 @@ character=
word=

Show_symmetric_diff=

HTML_encoded_character_found=
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_ja.properties
Original file line number Diff line number Diff line change
Expand Up @@ -2336,3 +2336,5 @@ character=
word=

Show_symmetric_diff=

HTML_encoded_character_found=
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_nl.properties
Original file line number Diff line number Diff line change
Expand Up @@ -2370,3 +2370,5 @@ character=
word=

Show_symmetric_diff=

HTML_encoded_character_found=
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_no.properties
Original file line number Diff line number Diff line change
Expand Up @@ -2766,3 +2766,5 @@ character=
word=

Show_symmetric_diff=

HTML_encoded_character_found=
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_pt_BR.properties
Original file line number Diff line number Diff line change
Expand Up @@ -1586,3 +1586,5 @@ character=
word=

Show_symmetric_diff=

HTML_encoded_character_found=
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_ru.properties
Original file line number Diff line number Diff line change
Expand Up @@ -2337,3 +2337,5 @@ character=
word=

Show_symmetric_diff=

HTML_encoded_character_found=
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_sv.properties
Original file line number Diff line number Diff line change
Expand Up @@ -1530,3 +1530,5 @@ character=bokstav
word=ord

Show_symmetric_diff=Visa_skillnad_symmetriskt

HTML_encoded_character_found=
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_tr.properties
Original file line number Diff line number Diff line change
Expand Up @@ -1604,3 +1604,5 @@ character=
word=

Show_symmetric_diff=

HTML_encoded_character_found=
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_vi.properties
Original file line number Diff line number Diff line change
Expand Up @@ -2361,3 +2361,5 @@ character=
word=

Show_symmetric_diff=

HTML_encoded_character_found=
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_zh.properties
Original file line number Diff line number Diff line change
Expand Up @@ -1598,3 +1598,5 @@ character=
word=

Show_symmetric_diff=

HTML_encoded_character_found=
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,16 @@ public void testBibStringChecks() {
assertWrong(createContext("author", "#einstein# #amp; #newton#"));
}

@Test
public void testHTMLCharacterChecks() {
assertCorrect(createContext("title", "Not a single {HTML} character"));
assertCorrect(createContext("month", "#jan#"));
assertCorrect(createContext("author", "A. Einstein and I. Newton"));
assertWrong(createContext("author", "Lenhard, J&ouml;rg"));
assertWrong(createContext("author", "Lenhard, J&#227;rg"));
assertWrong(createContext("journal", "&Auml;rling Str&ouml;m for &#8211; &#x2031;"));
}

private BibDatabaseContext createContext(String field, String value, String type) {
BibEntry entry = new BibEntry();
entry.setField(field, value);
Expand Down