From 682bbc51d8f2d0eadcdd6548dcf06c1f385da589 Mon Sep 17 00:00:00 2001 From: Simon Cozens Date: Sun, 5 Apr 2015 20:51:16 +0900 Subject: [PATCH] =?UTF-8?q?Here=E2=80=99s=20a=20bibtex=20replacement=20in?= =?UTF-8?q?=20400=20lines=20of=20Lua.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- packages/bibliography.lua | 392 +++++++++++++++++++++++++++++++++ packages/bibstyles/chicago.lua | 14 ++ packages/bibtex.lua | 12 +- 3 files changed, 416 insertions(+), 2 deletions(-) create mode 100644 packages/bibliography.lua create mode 100644 packages/bibstyles/chicago.lua diff --git a/packages/bibliography.lua b/packages/bibliography.lua new file mode 100644 index 000000000..6d025cc5a --- /dev/null +++ b/packages/bibliography.lua @@ -0,0 +1,392 @@ +std = require("std") + +local function find_outside_braces(s, pat, i) + local len = string.len(s) + local j, k = string.find(s, pat, i) + if not j then return j, k end + local jb, kb = string.find(s, '%b{}', i) + while jb and jb < j do --- scan past braces + --- braces come first, so we search again after close brace + local i2 = kb + 1 + j, k = string.find(s, pat, i2) + if not j then return j, k end + jb, kb = string.find(s, '%b{}', i2) + end + -- either pat precedes braces or there are no braces + return string.find(s, pat, j) --- 2nd call needed to get captures +end + +local function split(s, pat, find) --- return list of substrings separated by pat + find = find or string.find -- could be find_outside_braces + local len = string.len(s) + local t = { } + local insert = table.insert + local i, j, k = 1, true + while j and i <= len + 1 do + j, k = find(s, pat, i) + if j then + insert(t, string.sub(s, i, j-1)) + i = k + 1 + else + insert(t, string.sub(s, i)) + end + end + return t +end + +local function splitters(s, pat, find) --- return list of separators + find = find or string.find -- could be find_outside_braces + local t = { } + local insert = table.insert + local j, k = find(s, pat, 1) + while j do + insert(t, string.sub(s, j, k)) + j, k = find(s, pat, k+1) + end + return t +end + +local function namesplit(s) + local t = split(s, '%s+[aA][nN][dD]%s+', find_outside_braces) + local i = 2 + while i <= #t do + while string.find(t[i], '^[aA][nN][dD]%s+') do + t[i] = string.gsub(t[i], '^[aA][nN][dD]%s+', '') + table.insert(t, i, '') + i = i + 1 + end + i = i + 1 + end + return t +end + +local sep_and_not_tie = '%-' +local sep_chars = sep_and_not_tie .. '%~' + +local parse_name +do + local white_sep = '[' .. sep_chars .. '%s]+' + local white_comma_sep = '[' .. sep_chars .. '%s%,]+' + local trailing_commas = '(,[' .. sep_chars .. '%s%,]*)$' + local sep_char = '[' .. sep_chars .. ']' + local leading_white_sep = '^' .. white_sep + + -- = + function isVon(s) + local lower = find_outside_braces(s, '%l') -- first nonbrace lowercase + local letter = find_outside_braces(s, '%a') -- first nonbrace letter + local bs, ebs, command = find_outside_braces(s, '%{%\\(%a+)') -- \xxx + if lower and lower <= letter and lower <= (bs or lower) then + return true + elseif letter and letter <= (bs or letter) then + return false + elseif bs then + if upper_specials[command] then + return false + elseif lower_specials[command] then + return true + else + local close_brace = find_outside_braces(s, '%}', ebs+1) + lower = find(s, '%l') -- first nonbrace lowercase + letter = find(s, '%a') -- first nonbrace letter + return lower and lower <= letter + end + else + return false + end + end + + function parse_name(s, inter_token) + if string.find(s, trailing_commas) then + biberrorf("Name '%s' has one or more commas at the end", s) + end + s = string.gsub(s, trailing_commas, '') + s = string.gsub(s, leading_white_sep, '') + local tokens = split(s, white_comma_sep, find_outside_braces) + local trailers = splitters(s, white_comma_sep, find_outside_braces) + -- The string separating tokens is reduced to a single + -- ``separator character.'' A comma always trumps other + -- separator characters. Otherwise, if there's no comma, + -- we take the first character, be it a separator or a + -- space. (Patashnik considers that multiple such + -- characters constitute ``silliness'' on the user's + -- part.) + -- = + for i = 1, #trailers do + local s = trailers[i] + assert(string.len(s) > 0) + if string.find(s, ',') then + trailers[i] = ',' + else + trailers[i] = string.sub(s, 1, 1) + end + end + local commas = { } --- maps each comma to index of token the follows it + for i, t in ipairs(trailers) do + string.gsub(t, ',', function() table.insert(commas, i+1) end) + end + local name = { } + -- A name has up to four parts: the most general form is + -- either ``First von Last, Junior'' or ``von Last, + -- First, Junior'', but various vons and Juniors can be + -- omitted. The name-parsing algorithm is baroque and is + -- transliterated from the original BibTeX source, but + -- the principle is clear: assign the full version of + -- each part to the four fields [[ff]], [[vv]], [[ll]], + -- and [[jj]]; and assign an abbreviated version of each + -- part to the fields [[f]], [[v]], [[l]], and [[j]]. + -- = + local first_start, first_lim, last_lim, von_start, von_lim, jr_lim + -- variables mark subsequences; if start == lim, sequence is empty + local n = #tokens + -- The von name, if any, goes from the first von token to + -- the last von token, except the last name is entitled + -- to at least one token. So to find the limit of the von + -- name, we start just before the last token and wind + -- down until we find a von token or we hit the von start + -- (in which latter case there is no von name). + -- = + function divide_von_from_last() + von_lim = last_lim - 1; + while von_lim > von_start and not isVon(tokens[von_lim-1]) do + von_lim = von_lim - 1 + end + end + + local commacount = #commas + if commacount == 0 then -- first von last jr + von_start, first_start, last_lim, jr_lim = 1, 1, n+1, n+1 + -- OK, here's one form. + -- + -- = + local got_von = false + while von_start < last_lim-1 do + if isVon(tokens[von_start]) then + divide_von_from_last() + got_von = true + break + else + von_start = von_start + 1 + end + end + if not got_von then -- there is no von name + while von_start > 1 and string.find(trailers[von_start - 1], sep_and_not_tie) do + von_start = von_start - 1 + end + von_lim = von_start + end + first_lim = von_start + elseif commacount == 1 then -- von last jr, first + von_start, last_lim, jr_lim, first_start, first_lim = + 1, commas[1], commas[1], commas[1], n+1 + divide_von_from_last() + elseif commacount == 2 then -- von last, jr, first + von_start, last_lim, jr_lim, first_start, first_lim = + 1, commas[1], commas[2], commas[2], n+1 + divide_von_from_last() + else + biberrorf("Too many commas in name '%s'") + end + -- = + -- We set long and short forms together; [[ss]] is the + -- long form and [[s]] is the short form. + -- = + local function set_name(start, lim, long, short) + if start < lim then + -- string concatenation is quadratic, but names are short + -- An abbreviated token is the first letter of a token, + -- except again we have to deal with the damned specials. + -- = + local function abbrev(token) + local first_alpha, _, alpha = string.find(token, '(%a)') + local first_brace = string.find(token, '%{%\\') + if first_alpha and first_alpha <= (first_brace or first_alpha) then + return alpha + elseif first_brace then + local i, j, special = string.find(token, '(%b{})', first_brace) + if i then + return special + else -- unbalanced braces + return string.sub(token, first_brace) + end + else + return '' + end + end + local ss = tokens[start] + local s = abbrev(tokens[start]) + for i = start + 1, lim - 1 do + if inter_token then + ss = ss .. inter_token .. tokens[i] + s = s .. inter_token .. abbrev(tokens[i]) + else + local ssep, nnext = trailers[i-1], tokens[i] + local sep, next = ssep, abbrev(nnext) + -- Here is the default for a character between tokens: + -- a tie is the default space character between the last + -- two tokens of the name part, and between the first two + -- tokens if the first token is short enough; otherwise, + -- a space is the default. + -- = + if find(sep, sep_char) then + -- do nothing; sep is OK + elseif i == lim-1 then + sep, ssep = '~', '~' + elseif i == start + 1 then + sep = text_char_count(s) < 3 and '~' or ' ' + ssep = text_char_count(ss) < 3 and '~' or ' ' + else + sep, ssep = ' ', ' ' + end + ss = ss .. ssep .. nnext + s = s .. '.' .. sep .. next + end + end + name[long] = ss + name[short] = s + end + end + set_name(first_start, first_lim, 'ff', 'f') + set_name(von_start, von_lim, 'vv', 'v') + set_name(von_lim, last_lim, 'll', 'l') + set_name(last_lim, jr_lim, 'jj', 'j') + return name + end +end + +Bibliography = { -- This is big enough to have its own global var + CitationStyles = { + AuthorYear = function(_ENV) + return andSurnames(3), " ", year, optional(", ", cite.page) + end + }, + + produceCitation = function (cite, bib, style) + local item = bib[cite.key] + if not item then + return Bibliography.Errors.UNKNOWN_REFERENCE + end + local t = Bibliography.buildEnv(cite, item.attributes, style) + return Bibliography._process(item.attributes, {style.CitationStyle(t)}) + end, + + produceReference = function (cite, bib, style) + local item = bib[cite.key] + if not item then + return Bibliography.Errors.UNKNOWN_REFERENCE + end + item.type = (item.type:gsub("^%l", string.upper)) + if not style[item.type] then + return Bibliography.Errors.UNKNOWN_TYPE + end + + local t = Bibliography.buildEnv(cite, item.attributes, style) + return Bibliography._process(item.attributes, {style[item.type](t)}) + end, + + buildEnv = function (cite,item, style) + local t = std.table.clone(_ENV) + t.cite = cite + t.item = item + for k,v in pairs(item) do t[k:lower()] = v end + return std.table.merge(t, style) + end, + + _process = function (item, t, dStart, dEnd) + for i = 1,#t do + if type(t[i]) == "function" then + t[i] = t[i](item) + end + end + local res = table.concat(t,"") + if dStart or dEnd then + if res ~= "" then return (dStart .. res .. dEnd) end + else + return res + end + end, + + Errors = { + UNKNOWN_REFERENCE = 1, + }, + + Style = std.object { + andAuthors = function(item) + local authors = namesplit(item.Author) + if #authors == 1 then + return parse_name(authors[1]).ll + else + for i = 1,#authors do + local author = parse_name(authors[i]) + authors[i] = author.ll.. ", "..author.f.."." + end + return table.concat(authors, " and ") + end + end, + + andSurnames = function (max) + return function(item) + local authors = namesplit(item.Author) + if #authors > max then + return parse_name(authors[1]).ll .. " et al." + else + for i = 1,#authors do authors[i] = parse_name(authors[i]).ll end + return Bibliography.Style.commafy(authors) + end + end + end, + + transEditor = function(item) + local r = {} + if item.Editor then r[#r+1] = "Edited by "..item.Editor end + if item.Translator then r[#r+1] = "Translated by "..item.Translator end + if #r then return table.concat(r, ", ") end + return nil + end, + quotes = function (...) + local t = {...} + return function(item) + return Bibliography._process(item, t, "“", "”") + end + end, + italic = function (...) + local t = {...} + return function(item) + return Bibliography._process(item, t, "\\em{", "}") + end + end, + parens = function (...) + local t = {...} + return function(item) + return Bibliography._process(item, t, "(", ")") + end + end, + optional = function(...) + local t = {n=select('#', ...), ...} + return function(item) + for i = 1,t.n do + if type(t[i]) == "function" then t[i] = t[i](item) end + if not t[i] or t[i] == "" then return "" end + end + return table.concat(t, "") + end + end, + + -- The following functions borrowed from Norman Ramsey's nbibtex, + -- with permission. + commafy = function (t, andword) + andword = andword or 'and' + if #t == 1 then + return t[1] + elseif #t == 2 then + return t[1] .. ' ' .. andword .. ' ' .. t[2] + else + local last = t[#t] + t[#t] = andword .. ' ' .. t[#t] + local answer = table.concat(t, ', ') + t[#t] = last + return answer + end + end + } +} diff --git a/packages/bibstyles/chicago.lua b/packages/bibstyles/chicago.lua new file mode 100644 index 000000000..925108918 --- /dev/null +++ b/packages/bibstyles/chicago.lua @@ -0,0 +1,14 @@ +return Bibliography.Style { + CitationStyle = Bibliography.CitationStyles.AuthorYear, + + Book = function(_ENV) + return andAuthors, " ", year, ". ", italic(title), ". ", + optional(transEditor, ". "), + address, ": ", publisher, "." + end, + + Article = function(_ENV) + return andAuthors, ". ", year, ". ", quotes(title, "."), " ", italic(journal), " ", + parens(volume), number, optional(":", pages) + end +} \ No newline at end of file diff --git a/packages/bibtex.lua b/packages/bibtex.lua index 8e738d6dd..f4f6cf5dd 100644 --- a/packages/bibtex.lua +++ b/packages/bibtex.lua @@ -1,7 +1,7 @@ epnf = require( "epnf" ) local ID = lpeg.C( (SILE.parserBits.letter+SILE.parserBits.digit)^1 ) -local identifier = (ID + lpeg.P("-"))^1 +local identifier = (ID + lpeg.S(":-"))^1 local balanced = lpeg.C{ "{" * lpeg.P(" ")^0 * lpeg.C(((1 - lpeg.S"{}") + lpeg.V(1))^0) * "}" } / function(...) t={...}; return t[2] end local doubleq = lpeg.C( lpeg.P '"' * lpeg.C(((1 - lpeg.S '"\r\n\f\\') + (lpeg.P '\\' * 1)) ^ 0) * '"' ) @@ -37,4 +37,12 @@ local parseBibtex = function(fn) end end return entries -end \ No newline at end of file +end + +SILE.scratch.bibtex = {} + +SILE.registerCommand("loadbibliography", function(o,c) + local file = SU.required(o, "file", "loadbibliography") + SILE.scratch.bibtex = parseBibtex(file) -- Later we'll do multiple bibliogs, but not now +end) +