diff --git a/parseEntry.txt b/parseEntry.txt new file mode 100644 index 00000000000..0bb2be481e1 --- /dev/null +++ b/parseEntry.txt @@ -0,0 +1,136 @@ +branch 0 was visited. +branch 1 was visited. +branch 2 was not visited. +branch 3 was visited. +branch 4 was visited. +branch 5 was visited. +branch 6 was visited. +branch 7 was visited. +branch 8 was visited. +branch 9 was not visited. +branch 10 was visited. +branch 11 was visited. +branch 12 was not visited. +branch 13 was visited. +branch 14 was visited. +branch 15 was visited. +branch 16 was visited. +branch 17 was visited. +branch 18 was visited. +branch 19 was visited. +branch 20 was visited. +branch 21 was visited. +branch 22 was visited. +branch 23 was visited. +branch 24 was visited. +branch 25 was not visited. +branch 26 was visited. +branch 27 was visited. +branch 28 was visited. +branch 29 was visited. +branch 30 was not visited. +branch 31 was not visited. +branch 32 was visited. +branch 33 was visited. +branch 34 was not visited. +branch 35 was visited. +branch 36 was visited. +branch 37 was visited. +branch 38 was visited. +branch 39 was visited. +branch 40 was visited. +branch 41 was visited. +branch 42 was visited. +branch 43 was visited. +branch 44 was visited. +branch 45 was visited. +branch 46 was visited. +branch 47 was not visited. +branch 48 was not visited. +branch 49 was not visited. +branch 50 was not visited. +branch 51 was not visited. +branch 52 was not visited. +branch 53 was not visited. +branch 54 was visited. +branch 55 was not visited. +branch 56 was visited. +branch 57 was visited. +branch 58 was visited. +branch 59 was not visited. +branch 60 was visited. +branch 61 was not visited. +branch 62 was not visited. +branch 63 was not visited. +branch 64 was visited. +branch 65 was not visited. +branch 66 was visited. +branch 67 was not visited. +branch 68 was visited. +branch 69 was not visited. +branch 70 was visited. +branch 71 was visited. +branch 72 was visited. +branch 73 was not visited. +branch 74 was not visited. +branch 75 was visited. +branch 76 was visited. +branch 77 was visited. +branch 78 was visited. +branch 79 was visited. +branch 80 was visited. +branch 81 was visited. +branch 82 was visited. +branch 83 was visited. +branch 84 was not visited. +branch 85 was not visited. +branch 86 was not visited. +branch 87 was not visited. +branch 88 was not visited. +branch 89 was visited. +branch 90 was visited. +branch 91 was visited. +branch 92 was visited. +branch 93 was visited. +branch 94 was visited. +branch 95 was visited. +branch 96 was visited. +branch 97 was visited. +branch 98 was visited. +branch 99 was visited. +branch 100 was visited. +branch 101 was visited. +branch 102 was visited. +branch 103 was visited. +branch 104 was visited. +branch 105 was visited. +branch 106 was visited. +branch 107 was visited. +branch 108 was visited. +branch 109 was visited. +branch 110 was visited. +branch 111 was visited. +branch 112 was visited. +branch 113 was visited. +branch 114 was not visited. +branch 115 was visited. +branch 116 was visited. +branch 117 was visited. +branch 118 was visited. +branch 119 was visited. +branch 120 was not visited. +branch 121 was visited. +branch 122 was visited. +branch 123 was visited. +branch 124 was not visited. +branch 125 was visited. +branch 126 was visited. +branch 127 was visited. +branch 128 was visited. +branch 129 was visited. +branch 130 was visited. +branch 131 was visited. +branch 132 was not visited. +branch 133 was not visited. +branch 134 was visited. +0.7481481481481481 \ No newline at end of file diff --git a/parseEntry_NEW.txt b/parseEntry_NEW.txt new file mode 100644 index 00000000000..e75c56e46ee --- /dev/null +++ b/parseEntry_NEW.txt @@ -0,0 +1,136 @@ +branch 0 was visited. +branch 1 was visited. +branch 2 was not visited. +branch 3 was visited. +branch 4 was visited. +branch 5 was visited. +branch 6 was visited. +branch 7 was visited. +branch 8 was visited. +branch 9 was visited. +branch 10 was visited. +branch 11 was visited. +branch 12 was not visited. +branch 13 was visited. +branch 14 was visited. +branch 15 was visited. +branch 16 was visited. +branch 17 was visited. +branch 18 was visited. +branch 19 was visited. +branch 20 was visited. +branch 21 was visited. +branch 22 was visited. +branch 23 was visited. +branch 24 was visited. +branch 25 was visited. +branch 26 was visited. +branch 27 was visited. +branch 28 was visited. +branch 29 was visited. +branch 30 was not visited. +branch 31 was not visited. +branch 32 was visited. +branch 33 was visited. +branch 34 was not visited. +branch 35 was visited. +branch 36 was visited. +branch 37 was visited. +branch 38 was visited. +branch 39 was visited. +branch 40 was visited. +branch 41 was visited. +branch 42 was visited. +branch 43 was visited. +branch 44 was visited. +branch 45 was visited. +branch 46 was visited. +branch 47 was visited. +branch 48 was visited. +branch 49 was visited. +branch 50 was not visited. +branch 51 was not visited. +branch 52 was visited. +branch 53 was not visited. +branch 54 was visited. +branch 55 was not visited. +branch 56 was visited. +branch 57 was visited. +branch 58 was visited. +branch 59 was not visited. +branch 60 was visited. +branch 61 was visited. +branch 62 was visited. +branch 63 was not visited. +branch 64 was visited. +branch 65 was visited. +branch 66 was visited. +branch 67 was not visited. +branch 68 was visited. +branch 69 was not visited. +branch 70 was visited. +branch 71 was visited. +branch 72 was visited. +branch 73 was not visited. +branch 74 was not visited. +branch 75 was visited. +branch 76 was visited. +branch 77 was visited. +branch 78 was visited. +branch 79 was visited. +branch 80 was visited. +branch 81 was visited. +branch 82 was visited. +branch 83 was visited. +branch 84 was visited. +branch 85 was not visited. +branch 86 was visited. +branch 87 was not visited. +branch 88 was visited. +branch 89 was visited. +branch 90 was visited. +branch 91 was visited. +branch 92 was visited. +branch 93 was visited. +branch 94 was visited. +branch 95 was visited. +branch 96 was visited. +branch 97 was visited. +branch 98 was visited. +branch 99 was visited. +branch 100 was visited. +branch 101 was visited. +branch 102 was visited. +branch 103 was visited. +branch 104 was visited. +branch 105 was visited. +branch 106 was visited. +branch 107 was visited. +branch 108 was visited. +branch 109 was visited. +branch 110 was visited. +branch 111 was visited. +branch 112 was visited. +branch 113 was visited. +branch 114 was not visited. +branch 115 was visited. +branch 116 was visited. +branch 117 was visited. +branch 118 was visited. +branch 119 was visited. +branch 120 was not visited. +branch 121 was visited. +branch 122 was visited. +branch 123 was visited. +branch 124 was not visited. +branch 125 was visited. +branch 126 was visited. +branch 127 was visited. +branch 128 was visited. +branch 129 was visited. +branch 130 was visited. +branch 131 was visited. +branch 132 was not visited. +branch 133 was not visited. +branch 134 was visited. +0.837037037037037 \ No newline at end of file diff --git a/src/main/java/org/jabref/logic/importer/fileformat/GvkParser.java b/src/main/java/org/jabref/logic/importer/fileformat/GvkParser.java index 29c24e20f4d..7263d5a5b7d 100644 --- a/src/main/java/org/jabref/logic/importer/fileformat/GvkParser.java +++ b/src/main/java/org/jabref/logic/importer/fileformat/GvkParser.java @@ -1,5 +1,8 @@ package org.jabref.logic.importer.fileformat; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileWriter; import java.io.IOException; import java.io.InputStream; import java.util.LinkedList; @@ -27,8 +30,35 @@ import org.xml.sax.SAXException; public class GvkParser implements Parser { + private static boolean[] visited = new boolean[64]; private static final Logger LOGGER = LoggerFactory.getLogger(GvkParser.class); + private String author = null; + private String editor = null; + private String title = null; + private String publisher = null; + private String year = null; + private String address = null; + private String series = null; + private String edition = null; + private String isbn = null; + private String issn = null; + private String number = null; + private String pagetotal = null; + private String volume = null; + private String pages = null; + private String journal = null; + private String ppn = null; + private String booktitle = null; + private String url = null; + private String note = null; + + private String quelle = ""; + private String mak = ""; + private String subtitle = ""; + + private EntryType entryType = StandardEntryType.Book; // Default + @Override public List parseEntries(InputStream inputStream) throws ParseException { try { @@ -70,186 +100,136 @@ private List parseEntries(Document content) { } private BibEntry parseEntry(Element e) { - String author = null; - String editor = null; - String title = null; - String publisher = null; - String year = null; - String address = null; - String series = null; - String edition = null; - String isbn = null; - String issn = null; - String number = null; - String pagetotal = null; - String volume = null; - String pages = null; - String journal = null; - String ppn = null; - String booktitle = null; - String url = null; - String note = null; - - String quelle = ""; - String mak = ""; - String subtitle = ""; - - EntryType entryType = StandardEntryType.Book; // Default - // Alle relevanten Informationen einsammeln List datafields = getChildren("datafield", e); + int SIZEOFDATAFIELDS = datafields.size(); for (Element datafield : datafields) { + visited[0] = true; String tag = datafield.getAttribute("tag"); LOGGER.debug("tag: " + tag); // mak if ("002@".equals(tag)) { - mak = getSubfield("0", datafield); - if (mak == null) { - mak = ""; - } + visited[1] = true; + parseBibliographicTypeAndStatusData(datafield); + } else { + visited[2] = true; } //ppn if ("003@".equals(tag)) { - ppn = getSubfield("0", datafield); + visited[3] = true; + parseRecordControlNumberData(datafield); + } else { + visited[4] = true; } //author if ("028A".equals(tag)) { - String vorname = getSubfield("d", datafield); - String nachname = getSubfield("a", datafield); - - if (author == null) { - author = ""; - } else { - author = author.concat(" and "); - } - author = author.concat(vorname + " " + nachname); + visited[5] = true; + parsePrimaryAuthorData(datafield); + } else { + visited[6] = true; } + //author (weiterer) if ("028B".equals(tag)) { - String vorname = getSubfield("d", datafield); - String nachname = getSubfield("a", datafield); - - if (author == null) { - author = ""; - } else { - author = author.concat(" and "); - } - author = author.concat(vorname + " " + nachname); + visited[7] = true; + parseCoauthorData(datafield); + } else { + visited[8] = true; } //editor if ("028C".equals(tag)) { - String vorname = getSubfield("d", datafield); - String nachname = getSubfield("a", datafield); - - if (editor == null) { - editor = ""; - } else { - editor = editor.concat(" and "); - } - editor = editor.concat(vorname + " " + nachname); + visited[9] = true; + parseSecondaryAuthorData(datafield); + } else { + visited[10] = true; } //title and subtitle if ("021A".equals(tag)) { - title = getSubfield("a", datafield); - subtitle = getSubfield("d", datafield); + visited[11] = true; + parseTitleAndStatementOfResponsibilityAreaData(datafield); + } else { + visited[12] = true; } //publisher and address if ("033A".equals(tag)) { - publisher = getSubfield("n", datafield); - address = getSubfield("p", datafield); + visited[13] = true; + parseFirstPublisherData(datafield); + } else { + visited[14] = true; } //year if ("011@".equals(tag)) { - year = getSubfield("a", datafield); + visited[15] = true; + parseDateOfPublicationData(datafield); + } else { + visited[16] = true; } //year, volume, number, pages (year bei Zeitschriften (evtl. redundant mit 011@)) if ("031A".equals(tag)) { - year = getSubfield("j", datafield); - - volume = getSubfield("e", datafield); - number = getSubfield("a", datafield); - pages = getSubfield("h", datafield); - + visited[17] = true; + parseNumberingAreaData(datafield); + } else { + visited[18] = true; } // 036D seems to contain more information than the other fields // overwrite information using that field // 036D also contains information normally found in 036E if ("036D".equals(tag)) { - // 021 might have been present - if (title != null) { - // convert old title (contained in "a" of 021A) to volume - if (title.startsWith("@")) { - // "@" indicates a number - title = title.substring(1); - } - number = title; - } - //title and subtitle - title = getSubfield("a", datafield); - subtitle = getSubfield("d", datafield); - volume = getSubfield("l", datafield); + visited[19] = true; + parseLinkToMultiVolumePublicationData(datafield); + } else { + visited[20] = true; } //series and number if ("036E".equals(tag)) { - series = getSubfield("a", datafield); - number = getSubfield("l", datafield); - String kor = getSubfield("b", datafield); - - if (kor != null) { - series = series + " / " + kor; - } + visited[21] = true; + parseExtraLinkSerialPublicationData(datafield); + } else { + visited[22] = true; } //note if ("037A".equals(tag)) { - note = getSubfield("a", datafield); + visited[23] = true; + parseGeneralNoteData(datafield); + } else { + visited[24] = true; } //edition if ("032@".equals(tag)) { - edition = getSubfield("a", datafield); + visited[25] = true; + parseEditionAreaData(datafield); + } else { + visited[26] = true; } //isbn if ("004A".equals(tag)) { - final String isbn10 = getSubfield("0", datafield); - final String isbn13 = getSubfield("A", datafield); - - if (isbn10 != null) { - isbn = isbn10; - } - - if (isbn13 != null) { - isbn = isbn13; - } - + visited[27] = true; + parseISBNData(datafield); + } else { + visited[28] = true; } // Hochschulschriftenvermerk // Bei einer Verlagsdissertation ist der Ort schon eingetragen if ("037C".equals(tag)) { - if (address == null) { - address = getSubfield("b", datafield); - if (address != null) { - address = removeSortCharacters(address); - } - } - - String st = getSubfield("a", datafield); - if ((st != null) && st.contains("Diss")) { - entryType = StandardEntryType.PhdThesis; - } + visited[29] = true; + parseDissertationNoteData(datafield); + } else { + visited[30] = true; } //journal oder booktitle @@ -265,36 +245,34 @@ private BibEntry parseEntry(Element e) { * (sonst in Kategorie 033A). */ if ("027D".equals(tag)) { - journal = getSubfield("a", datafield); - booktitle = getSubfield("a", datafield); - address = getSubfield("p", datafield); - publisher = getSubfield("n", datafield); + visited[31] = true; + parseVolumeSetAndEssayData(datafield); + } else { + visited[32] = true; } //pagetotal if ("034D".equals(tag)) { - pagetotal = getSubfield("a", datafield); - - if (pagetotal != null) { - // S, S. etc. entfernen - pagetotal = pagetotal.replaceAll(" S\\.?$", ""); - } + visited[33] = true; + parsePhysicalInformationData(datafield); + } else { + visited[34] = true; } // Behandlung von Konferenzen if ("030F".equals(tag)) { - address = getSubfield("k", datafield); - - if (!"proceedings".equals(entryType)) { - subtitle = getSubfield("a", datafield); - } - - entryType = StandardEntryType.Proceedings; + visited[35] = true; + parseConferenceData(datafield); + } else { + visited[36] = true; } // Wenn eine Verlagsdiss vorliegt if (entryType.equals(StandardEntryType.PhdThesis) && (isbn != null)) { + visited[37] = true; entryType = StandardEntryType.Book; + } else { + visited[38] = true; } //Hilfskategorien zur Entscheidung @article @@ -305,55 +283,99 @@ private BibEntry parseEntry(Element e) { //Quelle unvollständig sind (z.B. nicht Serie //und Nummer angegeben werden) if ("039B".equals(tag)) { - quelle = getSubfield("8", datafield); + visited[38] = true; + parseRelationToParentLiteratureData(datafield); + } else { + visited[40] = true; } + if ("046R".equals(tag) && ((quelle == null) || quelle.isEmpty())) { - quelle = getSubfield("a", datafield); + visited[41] = true; + parseLiteratureSourceData(datafield); + } else { + visited[42] = true; } // URLs behandeln if ("009P".equals(tag) && ("03".equals(datafield.getAttribute("occurrence")) || "05".equals(datafield.getAttribute("occurrence"))) && (url == null)) { - url = getSubfield("a", datafield); + visited[43] = true; + parseOnlineResourceData(datafield); + } else { + visited[44] = true; } } - + // if we skipped the for loop completely + if (SIZEOFDATAFIELDS == 0) { + visited[45] = true; + } // Abfangen von Nulleintraegen if (quelle == null) { + visited[46] = true; quelle = ""; + } else { + visited[47] = true; } // Nichtsortierzeichen entfernen if (author != null) { + visited[48] = true; author = removeSortCharacters(author); + } else { + visited[49] = true; } + if (editor != null) { + visited[50] = true; editor = removeSortCharacters(editor); + } else { + visited[51] = true; } + if (title != null) { + visited[52] = true; title = removeSortCharacters(title); + } else { + visited[53] = true; } + if (subtitle != null) { + visited[54] = true; subtitle = removeSortCharacters(subtitle); + } else { + visited[55] = true; } // Dokumenttyp bestimmen und Eintrag anlegen if (mak.startsWith("As")) { + visited[56] = true; entryType = BibEntry.DEFAULT_TYPE; if (quelle.contains("ISBN")) { + visited[57] = true; entryType = StandardEntryType.InCollection; + } else { + visited[58] = true; } + if (quelle.contains("ZDB-ID")) { + visited[59] = true; entryType = StandardEntryType.Article; + } else { + visited[60] = true; } + } else if (mak.isEmpty()) { + visited[61] = true; entryType = BibEntry.DEFAULT_TYPE; } else if (mak.startsWith("O")) { + visited[62] = true; entryType = BibEntry.DEFAULT_TYPE; // FIXME: online only available in Biblatex //entryType = "online"; + } else { + visited[63] = true; } /* @@ -365,16 +387,80 @@ private BibEntry parseEntry(Element e) { */ BibEntry result = new BibEntry(entryType); + configureBibEntry(result); + + try { + File f = new File("/tmp/parseEntry.txt"); + BufferedWriter bw = new BufferedWriter(new FileWriter(f)); + double frac = 0; + for(int i = 0; i < visited.length; ++i) { + frac += (visited[i] ? 1 : 0); + bw.write("branch " + i + " was " + (visited[i] ? " visited." : " not visited.") + "\n"); + } + bw.write("" + frac/visited.length); + bw.close(); + } catch (Exception exc) { + System.err.println("Could not open/write to file!"); + exc.printStackTrace(); + } + + return result; + } + + /* HELPER FUNCTIONS FOR parseEntry */ + + private void parseVolumeSetAndEssayData(Element datafield) { + journal = getSubfield("a", datafield); + booktitle = getSubfield("a", datafield); + address = getSubfield("p", datafield); + publisher = getSubfield("n", datafield); + } + + private void parsePhysicalInformationData(Element datafield) { + pagetotal = getSubfield("a", datafield); + + if (pagetotal != null) { + // S, S. etc. entfernen + pagetotal = pagetotal.replaceAll(" S\\.?$", ""); + } + } + + private void parseConferenceData(Element datafield) { + address = getSubfield("k", datafield); + + if (!"proceedings".equals(entryType)) { + subtitle = getSubfield("a", datafield); + } + + entryType = StandardEntryType.Proceedings; + } + + private void parseRelationToParentLiteratureData(Element datafield) { + quelle = getSubfield("8", datafield); + } + + private void parseLiteratureSourceData(Element datafield) { + quelle = getSubfield("a", datafield); + } + + private void parseOnlineResourceData(Element datafield) { + url = getSubfield("a", datafield); + } + + private void configureBibEntry(BibEntry result) { // Zuordnung der Felder in Abhängigkeit vom Dokumenttyp if (author != null) { result.setField(StandardField.AUTHOR, author); } + if (editor != null) { result.setField(StandardField.EDITOR, editor); } + if (title != null) { result.setField(StandardField.TITLE, title); } + if (!Strings.isNullOrEmpty(subtitle)) { // ensure that first letter is an upper case letter // there could be the edge case that the string is only one character long, therefore, this special treatment @@ -384,50 +470,66 @@ private BibEntry parseEntry(Element e) { if (subtitle.length() > 1) { newSubtitle.append(subtitle.substring(1)); } + result.setField(StandardField.SUBTITLE, newSubtitle.toString()); } + if (publisher != null) { result.setField(StandardField.PUBLISHER, publisher); - } + } + if (year != null) { result.setField(StandardField.YEAR, year); } + if (address != null) { result.setField(StandardField.ADDRESS, address); } + if (series != null) { result.setField(StandardField.SERIES, series); } + if (edition != null) { result.setField(StandardField.EDITION, edition); - } + } + if (isbn != null) { result.setField(StandardField.ISBN, isbn); } + if (issn != null) { result.setField(StandardField.ISSN, issn); } + if (number != null) { result.setField(StandardField.NUMBER, number); } + if (pagetotal != null) { result.setField(StandardField.PAGETOTAL, pagetotal); } + if (pages != null) { result.setField(StandardField.PAGES, pages); } + if (volume != null) { result.setField(StandardField.VOLUME, volume); } + if (journal != null) { result.setField(StandardField.JOURNAL, journal); } + if (ppn != null) { result.setField(new UnknownField("ppn_GVK"), ppn); - } + } + if (url != null) { result.setField(StandardField.URL, url); } + if (note != null) { result.setField(StandardField.NOTE, note); } @@ -437,8 +539,137 @@ private BibEntry parseEntry(Element e) { } else if ("incollection".equals(entryType) && (booktitle != null)) { result.setField(StandardField.BOOKTITLE, booktitle); } + } - return result; + private void parseBibliographicTypeAndStatusData(Element datafield) { + mak = getSubfield("0", datafield); + if (mak == null) { + mak = ""; + } else { + } + } + + private void parseRecordControlNumberData(Element datafield) { + ppn = getSubfield("0", datafield); + } + + private void parsePrimaryAuthorData(Element datafield) { + String vorname = getSubfield("d", datafield); + String nachname = getSubfield("a", datafield); + + if (author == null) { + author = ""; + } else { + author = author.concat(" and "); + } + author = author.concat(vorname + " " + nachname); + } + + private void parseCoauthorData(Element datafield) { + String vorname = getSubfield("d", datafield); + String nachname = getSubfield("a", datafield); + + if (author == null) { + author = ""; + } else { + author = author.concat(" and "); + } + author = author.concat(vorname + " " + nachname); + } + + private void parseSecondaryAuthorData(Element datafield) { + String vorname = getSubfield("d", datafield); + String nachname = getSubfield("a", datafield); + + if (editor == null) { + editor = ""; + } else { + editor = editor.concat(" and "); + } + editor = editor.concat(vorname + " " + nachname); + } + + private void parseLinkToMultiVolumePublicationData(Element datafield) { + // 021 might have been present + if (title != null) { + // convert old title (contained in "a" of 021A) to volume + if (title.startsWith("@")) { + // "@" indicates a number + title = title.substring(1); + } + + number = title; + } + + //title and subtitle + title = getSubfield("a", datafield); + subtitle = getSubfield("d", datafield); + volume = getSubfield("l", datafield); + } + + private void parseNumberingAreaData(Element datafield) { + year = getSubfield("j", datafield); + volume = getSubfield("e", datafield); + number = getSubfield("a", datafield); + pages = getSubfield("h", datafield); + } + + private void parseDateOfPublicationData(Element datafield) { + year = getSubfield("a", datafield); + } + + private void parseFirstPublisherData(Element datafield) { + publisher = getSubfield("n", datafield); + address = getSubfield("p", datafield); + } + + private void parseTitleAndStatementOfResponsibilityAreaData(Element datafield) { + title = getSubfield("a", datafield); + subtitle = getSubfield("d", datafield); + } + + private void parseExtraLinkSerialPublicationData(Element datafield) { + series = getSubfield("a", datafield); + number = getSubfield("l", datafield); + String kor = getSubfield("b", datafield); + if (kor != null) { + series = series + " / " + kor; + } + } + + private void parseGeneralNoteData(Element datafield) { + note = getSubfield("a", datafield); + } + + private void parseEditionAreaData(Element datafield) { + edition = getSubfield("a", datafield); + } + + private void parseISBNData(Element datafield) { + final String isbn10 = getSubfield("0", datafield); + final String isbn13 = getSubfield("A", datafield); + + if (isbn10 != null) { + isbn = isbn10; + } + + if (isbn13 != null) { + isbn = isbn13; + } + } + + private void parseDissertationNoteData(Element datafield) { + if (address == null) { + address = getSubfield("b", datafield); + if (address != null) { + address = removeSortCharacters(address); + } + } + + String st = getSubfield("a", datafield); + if ((st != null) && st.contains("Diss")) { + entryType = StandardEntryType.PhdThesis; + } } private String getSubfield(String a, Element datafield) { diff --git a/src/test/java/org/jabref/logic/importer/fetcher/GvkParserTest.java b/src/test/java/org/jabref/logic/importer/fetcher/GvkParserTest.java index 6cc91297e18..92cebdad212 100644 --- a/src/test/java/org/jabref/logic/importer/fetcher/GvkParserTest.java +++ b/src/test/java/org/jabref/logic/importer/fetcher/GvkParserTest.java @@ -44,6 +44,68 @@ public void resultFor797485368() throws Exception { doTest("gvk_result_for_797485368.xml", 1, Collections.singletonList("gvk_result_for_797485368.bib")); } + @Test + /** + * Checks that the tag 037C works correctly. + * + * Specifically, checks that the PICA+ (PICA 3.0) + * specification is correctly followed for the tag + * 037C (i.e academic records), by checking that + * dissertation information and address can be + * correctly extracted from the academic record. + */ + public void test037C() throws Exception { + doTest("gvk_037C_diss.xml", 1, Collections.singletonList("gvk_037C_diss.bib")); + doTest("gvk_037C_book.xml", 1, Collections.singletonList("gvk_037C_book.bib")); + } + + @Test + /** + * Checks that the tag 030F works correctly. + * + * Specifically, checks that the PICA+ (PICA 3.0) + * specification is correctly followed for the tag + * 030F (i.e conference information), by checking that + * conference information is correctly extracted and + * put into the Bib entry. + * + * This is done by sending conference information such + * as a subtitle and conference address. + */ + public void test030F() throws Exception { + doTest("gvk_030F.xml", 1, Collections.singletonList("gvk_030F.bib")); + } + + @Test + /** + * Checks that the tag 031A works correctly. + * + * Specifically, checks that the PICA+ (PICA 3.0) + * specification is correctly followed for the tag + * 031A (i.e differentiating information), by checking + * that the year is properly overwritten with the new + * year (2020). + */ + public void test031A() throws Exception { + doTest("gvk_031A.xml", 1, Collections.singletonList("gvk_031A.bib")); + } + + @Test + /** + * Checks that the tag 002@ works correctly. + * + * Specifically, checks that the PICA+ (PICA 3.0) + * specification is correctly followed for the tag + * 002@ (i.e document type/status/information), by + * checking that the document type is correctly + * extracted. + * + * This test checks the type "Asy". + */ + public void test002AT() throws Exception { + doTest("gvk_002@.xml", 1, Collections.singletonList("gvk_002@.bib")); + } + @Test public void testGMP() throws Exception { doTest("gvk_gmp.xml", 2, Arrays.asList("gvk_gmp.1.bib", "gvk_gmp.2.bib")); diff --git a/src/test/resources/org/jabref/logic/importer/fetcher/gvk_002@.bib b/src/test/resources/org/jabref/logic/importer/fetcher/gvk_002@.bib new file mode 100644 index 00000000000..b228be116f9 --- /dev/null +++ b/src/test/resources/org/jabref/logic/importer/fetcher/gvk_002@.bib @@ -0,0 +1,7 @@ +@Misc{, + Title = {On the Simplicity-Efficiency Trade-off in Learning}, + Author = {Anne Doe and John Doe}, + Year = {2020}, + Address = {Stockholm}, + Subtitle = {As Applied to the "Hello World" Example} +} diff --git a/src/test/resources/org/jabref/logic/importer/fetcher/gvk_002@.xml b/src/test/resources/org/jabref/logic/importer/fetcher/gvk_002@.xml new file mode 100644 index 00000000000..3b1f02b7281 --- /dev/null +++ b/src/test/resources/org/jabref/logic/importer/fetcher/gvk_002@.xml @@ -0,0 +1,46 @@ + + +1.1 +1 + + +picaxml +xml + + + + 2020 + + + Asy + + + On the Simplicity-Efficiency Trade-off in Learning + As Applied to the "Hello World" Example + + + Anne + Doe + + + John + Doe + + + Stockholm + KTH Royal Institute of Technology, Diss., 2020 + + + +1 + + + +1.1 +pica.all=Stockholm +50 +xml +picaxml +Year,,1 + + \ No newline at end of file diff --git a/src/test/resources/org/jabref/logic/importer/fetcher/gvk_030F.bib b/src/test/resources/org/jabref/logic/importer/fetcher/gvk_030F.bib new file mode 100644 index 00000000000..0ac307b6db7 --- /dev/null +++ b/src/test/resources/org/jabref/logic/importer/fetcher/gvk_030F.bib @@ -0,0 +1,7 @@ +@Misc{, + Title = {Hello}, + Author = {John Doe}, + Year = {2020}, + Address = {Hacky Street}, + Subtitle = {World} +} diff --git a/src/test/resources/org/jabref/logic/importer/fetcher/gvk_030F.xml b/src/test/resources/org/jabref/logic/importer/fetcher/gvk_030F.xml new file mode 100644 index 00000000000..8f30dec984e --- /dev/null +++ b/src/test/resources/org/jabref/logic/importer/fetcher/gvk_030F.xml @@ -0,0 +1,39 @@ + + +1.1 +1 + + +picaxml +xml + + + + 2020 + + + Hello + World + + + John + Doe + + + Hacky Street + World + + + +1 + + + +1.1 +pica.all=797485368 +50 +xml +picaxml +Year,,1 + + \ No newline at end of file diff --git a/src/test/resources/org/jabref/logic/importer/fetcher/gvk_031A.bib b/src/test/resources/org/jabref/logic/importer/fetcher/gvk_031A.bib new file mode 100644 index 00000000000..15a9f427acc --- /dev/null +++ b/src/test/resources/org/jabref/logic/importer/fetcher/gvk_031A.bib @@ -0,0 +1,7 @@ +@PhdThesis{, + Title = {On the Simplicity-Efficiency Trade-off in Learning}, + Author = {Anne Doe and John Doe}, + Year = {2020}, + Address = {Stockholm}, + Subtitle = {As Applied to the "Hello World" Example} +} diff --git a/src/test/resources/org/jabref/logic/importer/fetcher/gvk_031A.xml b/src/test/resources/org/jabref/logic/importer/fetcher/gvk_031A.xml new file mode 100644 index 00000000000..322344b301f --- /dev/null +++ b/src/test/resources/org/jabref/logic/importer/fetcher/gvk_031A.xml @@ -0,0 +1,49 @@ + + +1.1 +1 + + +picaxml +xml + + + + 1999 + + + Aau + + + 2020 + + + On the Simplicity-Efficiency Trade-off in Learning + As Applied to the "Hello World" Example + + + Anne + Doe + + + John + Doe + + + Stockholm + KTH Royal Institute of Technology, Diss., 2020 + + + +1 + + + +1.1 +pica.all=Stockholm +50 +xml +picaxml +Year,,1 + + \ No newline at end of file diff --git a/src/test/resources/org/jabref/logic/importer/fetcher/gvk_037C_book.bib b/src/test/resources/org/jabref/logic/importer/fetcher/gvk_037C_book.bib new file mode 100644 index 00000000000..9f3f74ed86e --- /dev/null +++ b/src/test/resources/org/jabref/logic/importer/fetcher/gvk_037C_book.bib @@ -0,0 +1,8 @@ +@Book{, + Title = {On the Simplicity-Efficiency Trade-off in Learning}, + Author = {Anne Doe and John Doe}, + ISBN = {123456789}, + Year = {2020}, + Address = {Stockholm}, + Subtitle = {As Applied to the "Hello World" Example} +} diff --git a/src/test/resources/org/jabref/logic/importer/fetcher/gvk_037C_book.xml b/src/test/resources/org/jabref/logic/importer/fetcher/gvk_037C_book.xml new file mode 100644 index 00000000000..4da0f89ec4a --- /dev/null +++ b/src/test/resources/org/jabref/logic/importer/fetcher/gvk_037C_book.xml @@ -0,0 +1,49 @@ + + +1.1 +1 + + +picaxml +xml + + + + 2020 + + + Aau + + + 123456789 + + + On the Simplicity-Efficiency Trade-off in Learning + As Applied to the "Hello World" Example + + + Anne + Doe + + + John + Doe + + + Stockholm + KTH Royal Institute of Technology, Diss., 2020 + + + +1 + + + +1.1 +pica.all=123456789 +50 +xml +picaxml +Year,,1 + + \ No newline at end of file diff --git a/src/test/resources/org/jabref/logic/importer/fetcher/gvk_037C_diss.bib b/src/test/resources/org/jabref/logic/importer/fetcher/gvk_037C_diss.bib new file mode 100644 index 00000000000..15a9f427acc --- /dev/null +++ b/src/test/resources/org/jabref/logic/importer/fetcher/gvk_037C_diss.bib @@ -0,0 +1,7 @@ +@PhdThesis{, + Title = {On the Simplicity-Efficiency Trade-off in Learning}, + Author = {Anne Doe and John Doe}, + Year = {2020}, + Address = {Stockholm}, + Subtitle = {As Applied to the "Hello World" Example} +} diff --git a/src/test/resources/org/jabref/logic/importer/fetcher/gvk_037C_diss.xml b/src/test/resources/org/jabref/logic/importer/fetcher/gvk_037C_diss.xml new file mode 100644 index 00000000000..a6e70a621de --- /dev/null +++ b/src/test/resources/org/jabref/logic/importer/fetcher/gvk_037C_diss.xml @@ -0,0 +1,46 @@ + + +1.1 +1 + + +picaxml +xml + + + + 2020 + + + Aau + + + On the Simplicity-Efficiency Trade-off in Learning + As Applied to the "Hello World" Example + + + Anne + Doe + + + John + Doe + + + Stockholm + KTH Royal Institute of Technology, Diss., 2020 + + + +1 + + + +1.1 +pica.all=Stockholm +50 +xml +picaxml +Year,,1 + + \ No newline at end of file