diff --git a/CHANGELOG.md b/CHANGELOG.md index 95dd0125f57..8e714471e17 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ to [sourceforge feature requests](https://sourceforge.net/p/jabref/features/) by - Added integrity checker for an odd number of unescaped '#' - Implemented [feature request 384](https://sourceforge.net/p/jabref/features/384): The merge entries dialog now show all text and colored differences between the fields - Implemented [#1233](https://github.com/JabRef/jabref/issues/1233): Group side pane now takes up all the remaining space +- Added integrity check detecting HTML-encoded characters ### Fixed diff --git a/src/main/java/net/sf/jabref/logic/integrity/IntegrityCheck.java b/src/main/java/net/sf/jabref/logic/integrity/IntegrityCheck.java index dfa9041dadc..5606ce2d636 100644 --- a/src/main/java/net/sf/jabref/logic/integrity/IntegrityCheck.java +++ b/src/main/java/net/sf/jabref/logic/integrity/IntegrityCheck.java @@ -62,6 +62,7 @@ private List checkBibtexEntry(BibEntry entry) { result.addAll(new AbbreviationChecker("journal").check(entry)); result.addAll(new AbbreviationChecker("booktitle").check(entry)); result.addAll(new BibStringChecker().check(entry)); + result.addAll(new HTMLCharacterChecker().check(entry)); return result; } @@ -355,4 +356,27 @@ public List check(BibEntry entry) { } } + private static class HTMLCharacterChecker implements Checker { + + // Detect any HTML encoded character, + private static final Pattern HTML_CHARACTER_PATTERN = Pattern.compile("&[#\\p{Alnum}]+;"); + + + /** + * Checks, if there are any HTML encoded characters in the fields + */ + @Override + public List check(BibEntry entry) { + List results = new ArrayList<>(); + for (Map.Entry field : entry.getFieldMap().entrySet()) { + Matcher characterMatcher = HTML_CHARACTER_PATTERN.matcher(field.getValue()); + if (characterMatcher.find()) { + results.add(new IntegrityMessage(Localization.lang("HTML encoded character found"), entry, + field.getKey())); + } + } + return results; + } + } + } diff --git a/src/main/resources/l10n/JabRef_da.properties b/src/main/resources/l10n/JabRef_da.properties index d4c4a55d0e5..680960ed891 100644 --- a/src/main/resources/l10n/JabRef_da.properties +++ b/src/main/resources/l10n/JabRef_da.properties @@ -1672,3 +1672,5 @@ character= word= Show_symmetric_diff= + +HTML_encoded_character_found= diff --git a/src/main/resources/l10n/JabRef_de.properties b/src/main/resources/l10n/JabRef_de.properties index 22793198be9..710edadc5f6 100644 --- a/src/main/resources/l10n/JabRef_de.properties +++ b/src/main/resources/l10n/JabRef_de.properties @@ -2390,3 +2390,5 @@ character= word= Show_symmetric_diff= + +HTML_encoded_character_found= diff --git a/src/main/resources/l10n/JabRef_en.properties b/src/main/resources/l10n/JabRef_en.properties index cc26ed39028..a30551fc2a0 100644 --- a/src/main/resources/l10n/JabRef_en.properties +++ b/src/main/resources/l10n/JabRef_en.properties @@ -2254,3 +2254,4 @@ character=character word=word Show_symmetric_diff=Show_symmetric_diff +HTML_encoded_character_found=HTML_encoded_character_found diff --git a/src/main/resources/l10n/JabRef_es.properties b/src/main/resources/l10n/JabRef_es.properties index 106ffe7469e..cc4162d45cd 100644 --- a/src/main/resources/l10n/JabRef_es.properties +++ b/src/main/resources/l10n/JabRef_es.properties @@ -1573,3 +1573,5 @@ character= word= Show_symmetric_diff= + +HTML_encoded_character_found= diff --git a/src/main/resources/l10n/JabRef_fa.properties b/src/main/resources/l10n/JabRef_fa.properties index 9c98f86450c..613b25f8d50 100644 --- a/src/main/resources/l10n/JabRef_fa.properties +++ b/src/main/resources/l10n/JabRef_fa.properties @@ -2360,3 +2360,5 @@ character= word= Show_symmetric_diff= + +HTML_encoded_character_found= diff --git a/src/main/resources/l10n/JabRef_fr.properties b/src/main/resources/l10n/JabRef_fr.properties index 743e736d841..960f0d92ec5 100644 --- a/src/main/resources/l10n/JabRef_fr.properties +++ b/src/main/resources/l10n/JabRef_fr.properties @@ -1616,3 +1616,5 @@ character= word= Show_symmetric_diff= + +HTML_encoded_character_found= diff --git a/src/main/resources/l10n/JabRef_in.properties b/src/main/resources/l10n/JabRef_in.properties index 6c73bcf4f71..aefd0bf9ea6 100644 --- a/src/main/resources/l10n/JabRef_in.properties +++ b/src/main/resources/l10n/JabRef_in.properties @@ -1592,3 +1592,5 @@ character= word= Show_symmetric_diff= + +HTML_encoded_character_found= diff --git a/src/main/resources/l10n/JabRef_it.properties b/src/main/resources/l10n/JabRef_it.properties index 4f9a99a4452..070186aa68b 100644 --- a/src/main/resources/l10n/JabRef_it.properties +++ b/src/main/resources/l10n/JabRef_it.properties @@ -1692,3 +1692,5 @@ character= word= Show_symmetric_diff= + +HTML_encoded_character_found= diff --git a/src/main/resources/l10n/JabRef_ja.properties b/src/main/resources/l10n/JabRef_ja.properties index 1cc8d459f8b..82aec3cfa1a 100644 --- a/src/main/resources/l10n/JabRef_ja.properties +++ b/src/main/resources/l10n/JabRef_ja.properties @@ -2336,3 +2336,5 @@ character= word= Show_symmetric_diff= + +HTML_encoded_character_found= diff --git a/src/main/resources/l10n/JabRef_nl.properties b/src/main/resources/l10n/JabRef_nl.properties index 2735ac406ba..108ba96dadc 100644 --- a/src/main/resources/l10n/JabRef_nl.properties +++ b/src/main/resources/l10n/JabRef_nl.properties @@ -2370,3 +2370,5 @@ character= word= Show_symmetric_diff= + +HTML_encoded_character_found= diff --git a/src/main/resources/l10n/JabRef_no.properties b/src/main/resources/l10n/JabRef_no.properties index bc256bbfb32..3a7eee7844e 100644 --- a/src/main/resources/l10n/JabRef_no.properties +++ b/src/main/resources/l10n/JabRef_no.properties @@ -2766,3 +2766,5 @@ character= word= Show_symmetric_diff= + +HTML_encoded_character_found= diff --git a/src/main/resources/l10n/JabRef_pt_BR.properties b/src/main/resources/l10n/JabRef_pt_BR.properties index 2f91b312fef..dadb6cb947c 100644 --- a/src/main/resources/l10n/JabRef_pt_BR.properties +++ b/src/main/resources/l10n/JabRef_pt_BR.properties @@ -1586,3 +1586,5 @@ character= word= Show_symmetric_diff= + +HTML_encoded_character_found= diff --git a/src/main/resources/l10n/JabRef_ru.properties b/src/main/resources/l10n/JabRef_ru.properties index cf150dc7449..564d5e193e6 100644 --- a/src/main/resources/l10n/JabRef_ru.properties +++ b/src/main/resources/l10n/JabRef_ru.properties @@ -2337,3 +2337,5 @@ character= word= Show_symmetric_diff= + +HTML_encoded_character_found= diff --git a/src/main/resources/l10n/JabRef_sv.properties b/src/main/resources/l10n/JabRef_sv.properties index 2ca2b457df3..5fdc5f71590 100644 --- a/src/main/resources/l10n/JabRef_sv.properties +++ b/src/main/resources/l10n/JabRef_sv.properties @@ -1530,3 +1530,5 @@ character=bokstav word=ord Show_symmetric_diff=Visa_skillnad_symmetriskt + +HTML_encoded_character_found= diff --git a/src/main/resources/l10n/JabRef_tr.properties b/src/main/resources/l10n/JabRef_tr.properties index 76e68213bfa..a4c33d13006 100644 --- a/src/main/resources/l10n/JabRef_tr.properties +++ b/src/main/resources/l10n/JabRef_tr.properties @@ -1604,3 +1604,5 @@ character= word= Show_symmetric_diff= + +HTML_encoded_character_found= diff --git a/src/main/resources/l10n/JabRef_vi.properties b/src/main/resources/l10n/JabRef_vi.properties index 9a003efa4c3..fb5e19a5bcf 100644 --- a/src/main/resources/l10n/JabRef_vi.properties +++ b/src/main/resources/l10n/JabRef_vi.properties @@ -2361,3 +2361,5 @@ character= word= Show_symmetric_diff= + +HTML_encoded_character_found= diff --git a/src/main/resources/l10n/JabRef_zh.properties b/src/main/resources/l10n/JabRef_zh.properties index b42b897c986..7135e5cccee 100644 --- a/src/main/resources/l10n/JabRef_zh.properties +++ b/src/main/resources/l10n/JabRef_zh.properties @@ -1598,3 +1598,5 @@ character= word= Show_symmetric_diff= + +HTML_encoded_character_found= diff --git a/src/test/java/net/sf/jabref/logic/integrity/IntegrityCheckTest.java b/src/test/java/net/sf/jabref/logic/integrity/IntegrityCheckTest.java index 714f4740712..3dba673eaf8 100644 --- a/src/test/java/net/sf/jabref/logic/integrity/IntegrityCheckTest.java +++ b/src/test/java/net/sf/jabref/logic/integrity/IntegrityCheckTest.java @@ -160,6 +160,16 @@ public void testBibStringChecks() { assertWrong(createContext("author", "#einstein# #amp; #newton#")); } + @Test + public void testHTMLCharacterChecks() { + assertCorrect(createContext("title", "Not a single {HTML} character")); + assertCorrect(createContext("month", "#jan#")); + assertCorrect(createContext("author", "A. Einstein and I. Newton")); + assertWrong(createContext("author", "Lenhard, Jörg")); + assertWrong(createContext("author", "Lenhard, Jãrg")); + assertWrong(createContext("journal", "Ärling Ström for – ‱")); + } + private BibDatabaseContext createContext(String field, String value, String type) { BibEntry entry = new BibEntry(); entry.setField(field, value);