-
-
Notifications
You must be signed in to change notification settings - Fork 98
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Here’s a bibtex replacement in 400 lines of Lua.
- Loading branch information
1 parent
541a22d
commit 682bbc5
Showing
3 changed files
with
416 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,392 @@ | ||
std = require("std") | ||
|
||
local function find_outside_braces(s, pat, i) | ||
local len = string.len(s) | ||
local j, k = string.find(s, pat, i) | ||
if not j then return j, k end | ||
local jb, kb = string.find(s, '%b{}', i) | ||
while jb and jb < j do --- scan past braces | ||
--- braces come first, so we search again after close brace | ||
local i2 = kb + 1 | ||
j, k = string.find(s, pat, i2) | ||
if not j then return j, k end | ||
jb, kb = string.find(s, '%b{}', i2) | ||
end | ||
-- either pat precedes braces or there are no braces | ||
return string.find(s, pat, j) --- 2nd call needed to get captures | ||
end | ||
|
||
local function split(s, pat, find) --- return list of substrings separated by pat | ||
find = find or string.find -- could be find_outside_braces | ||
local len = string.len(s) | ||
local t = { } | ||
local insert = table.insert | ||
local i, j, k = 1, true | ||
while j and i <= len + 1 do | ||
j, k = find(s, pat, i) | ||
if j then | ||
insert(t, string.sub(s, i, j-1)) | ||
i = k + 1 | ||
else | ||
insert(t, string.sub(s, i)) | ||
end | ||
end | ||
return t | ||
end | ||
|
||
local function splitters(s, pat, find) --- return list of separators | ||
find = find or string.find -- could be find_outside_braces | ||
local t = { } | ||
local insert = table.insert | ||
local j, k = find(s, pat, 1) | ||
while j do | ||
insert(t, string.sub(s, j, k)) | ||
j, k = find(s, pat, k+1) | ||
end | ||
return t | ||
end | ||
|
||
local function namesplit(s) | ||
local t = split(s, '%s+[aA][nN][dD]%s+', find_outside_braces) | ||
local i = 2 | ||
while i <= #t do | ||
while string.find(t[i], '^[aA][nN][dD]%s+') do | ||
t[i] = string.gsub(t[i], '^[aA][nN][dD]%s+', '') | ||
table.insert(t, i, '') | ||
i = i + 1 | ||
end | ||
i = i + 1 | ||
end | ||
return t | ||
end | ||
|
||
local sep_and_not_tie = '%-' | ||
local sep_chars = sep_and_not_tie .. '%~' | ||
|
||
local parse_name | ||
do | ||
local white_sep = '[' .. sep_chars .. '%s]+' | ||
local white_comma_sep = '[' .. sep_chars .. '%s%,]+' | ||
local trailing_commas = '(,[' .. sep_chars .. '%s%,]*)$' | ||
local sep_char = '[' .. sep_chars .. ']' | ||
local leading_white_sep = '^' .. white_sep | ||
|
||
-- <name-parsing utilities>= | ||
function isVon(s) | ||
local lower = find_outside_braces(s, '%l') -- first nonbrace lowercase | ||
local letter = find_outside_braces(s, '%a') -- first nonbrace letter | ||
local bs, ebs, command = find_outside_braces(s, '%{%\\(%a+)') -- \xxx | ||
if lower and lower <= letter and lower <= (bs or lower) then | ||
return true | ||
elseif letter and letter <= (bs or letter) then | ||
return false | ||
elseif bs then | ||
if upper_specials[command] then | ||
return false | ||
elseif lower_specials[command] then | ||
return true | ||
else | ||
local close_brace = find_outside_braces(s, '%}', ebs+1) | ||
lower = find(s, '%l') -- first nonbrace lowercase | ||
letter = find(s, '%a') -- first nonbrace letter | ||
return lower and lower <= letter | ||
end | ||
else | ||
return false | ||
end | ||
end | ||
|
||
function parse_name(s, inter_token) | ||
if string.find(s, trailing_commas) then | ||
biberrorf("Name '%s' has one or more commas at the end", s) | ||
end | ||
s = string.gsub(s, trailing_commas, '') | ||
s = string.gsub(s, leading_white_sep, '') | ||
local tokens = split(s, white_comma_sep, find_outside_braces) | ||
local trailers = splitters(s, white_comma_sep, find_outside_braces) | ||
-- The string separating tokens is reduced to a single | ||
-- ``separator character.'' A comma always trumps other | ||
-- separator characters. Otherwise, if there's no comma, | ||
-- we take the first character, be it a separator or a | ||
-- space. (Patashnik considers that multiple such | ||
-- characters constitute ``silliness'' on the user's | ||
-- part.) | ||
-- <rewrite [[trailers]] to hold a single separator character each>= | ||
for i = 1, #trailers do | ||
local s = trailers[i] | ||
assert(string.len(s) > 0) | ||
if string.find(s, ',') then | ||
trailers[i] = ',' | ||
else | ||
trailers[i] = string.sub(s, 1, 1) | ||
end | ||
end | ||
local commas = { } --- maps each comma to index of token the follows it | ||
for i, t in ipairs(trailers) do | ||
string.gsub(t, ',', function() table.insert(commas, i+1) end) | ||
end | ||
local name = { } | ||
-- A name has up to four parts: the most general form is | ||
-- either ``First von Last, Junior'' or ``von Last, | ||
-- First, Junior'', but various vons and Juniors can be | ||
-- omitted. The name-parsing algorithm is baroque and is | ||
-- transliterated from the original BibTeX source, but | ||
-- the principle is clear: assign the full version of | ||
-- each part to the four fields [[ff]], [[vv]], [[ll]], | ||
-- and [[jj]]; and assign an abbreviated version of each | ||
-- part to the fields [[f]], [[v]], [[l]], and [[j]]. | ||
-- <parse the name tokens and set fields of [[name]]>= | ||
local first_start, first_lim, last_lim, von_start, von_lim, jr_lim | ||
-- variables mark subsequences; if start == lim, sequence is empty | ||
local n = #tokens | ||
-- The von name, if any, goes from the first von token to | ||
-- the last von token, except the last name is entitled | ||
-- to at least one token. So to find the limit of the von | ||
-- name, we start just before the last token and wind | ||
-- down until we find a von token or we hit the von start | ||
-- (in which latter case there is no von name). | ||
-- <local parsing functions>= | ||
function divide_von_from_last() | ||
von_lim = last_lim - 1; | ||
while von_lim > von_start and not isVon(tokens[von_lim-1]) do | ||
von_lim = von_lim - 1 | ||
end | ||
end | ||
|
||
local commacount = #commas | ||
if commacount == 0 then -- first von last jr | ||
von_start, first_start, last_lim, jr_lim = 1, 1, n+1, n+1 | ||
-- OK, here's one form. | ||
-- | ||
-- <parse first von last jr>= | ||
local got_von = false | ||
while von_start < last_lim-1 do | ||
if isVon(tokens[von_start]) then | ||
divide_von_from_last() | ||
got_von = true | ||
break | ||
else | ||
von_start = von_start + 1 | ||
end | ||
end | ||
if not got_von then -- there is no von name | ||
while von_start > 1 and string.find(trailers[von_start - 1], sep_and_not_tie) do | ||
von_start = von_start - 1 | ||
end | ||
von_lim = von_start | ||
end | ||
first_lim = von_start | ||
elseif commacount == 1 then -- von last jr, first | ||
von_start, last_lim, jr_lim, first_start, first_lim = | ||
1, commas[1], commas[1], commas[1], n+1 | ||
divide_von_from_last() | ||
elseif commacount == 2 then -- von last, jr, first | ||
von_start, last_lim, jr_lim, first_start, first_lim = | ||
1, commas[1], commas[2], commas[2], n+1 | ||
divide_von_from_last() | ||
else | ||
biberrorf("Too many commas in name '%s'") | ||
end | ||
-- <set fields of name based on [[first_start]] and friends>= | ||
-- We set long and short forms together; [[ss]] is the | ||
-- long form and [[s]] is the short form. | ||
-- <definition of function [[set_name]]>= | ||
local function set_name(start, lim, long, short) | ||
if start < lim then | ||
-- string concatenation is quadratic, but names are short | ||
-- An abbreviated token is the first letter of a token, | ||
-- except again we have to deal with the damned specials. | ||
-- <definition of [[abbrev]], for shortening a token>= | ||
local function abbrev(token) | ||
local first_alpha, _, alpha = string.find(token, '(%a)') | ||
local first_brace = string.find(token, '%{%\\') | ||
if first_alpha and first_alpha <= (first_brace or first_alpha) then | ||
return alpha | ||
elseif first_brace then | ||
local i, j, special = string.find(token, '(%b{})', first_brace) | ||
if i then | ||
return special | ||
else -- unbalanced braces | ||
return string.sub(token, first_brace) | ||
end | ||
else | ||
return '' | ||
end | ||
end | ||
local ss = tokens[start] | ||
local s = abbrev(tokens[start]) | ||
for i = start + 1, lim - 1 do | ||
if inter_token then | ||
ss = ss .. inter_token .. tokens[i] | ||
s = s .. inter_token .. abbrev(tokens[i]) | ||
else | ||
local ssep, nnext = trailers[i-1], tokens[i] | ||
local sep, next = ssep, abbrev(nnext) | ||
-- Here is the default for a character between tokens: | ||
-- a tie is the default space character between the last | ||
-- two tokens of the name part, and between the first two | ||
-- tokens if the first token is short enough; otherwise, | ||
-- a space is the default. | ||
-- <possibly adjust [[sep]] and [[ssep]] according to token position and size>= | ||
if find(sep, sep_char) then | ||
-- do nothing; sep is OK | ||
elseif i == lim-1 then | ||
sep, ssep = '~', '~' | ||
elseif i == start + 1 then | ||
sep = text_char_count(s) < 3 and '~' or ' ' | ||
ssep = text_char_count(ss) < 3 and '~' or ' ' | ||
else | ||
sep, ssep = ' ', ' ' | ||
end | ||
ss = ss .. ssep .. nnext | ||
s = s .. '.' .. sep .. next | ||
end | ||
end | ||
name[long] = ss | ||
name[short] = s | ||
end | ||
end | ||
set_name(first_start, first_lim, 'ff', 'f') | ||
set_name(von_start, von_lim, 'vv', 'v') | ||
set_name(von_lim, last_lim, 'll', 'l') | ||
set_name(last_lim, jr_lim, 'jj', 'j') | ||
return name | ||
end | ||
end | ||
|
||
Bibliography = { -- This is big enough to have its own global var | ||
CitationStyles = { | ||
AuthorYear = function(_ENV) | ||
return andSurnames(3), " ", year, optional(", ", cite.page) | ||
end | ||
}, | ||
|
||
produceCitation = function (cite, bib, style) | ||
local item = bib[cite.key] | ||
if not item then | ||
return Bibliography.Errors.UNKNOWN_REFERENCE | ||
end | ||
local t = Bibliography.buildEnv(cite, item.attributes, style) | ||
return Bibliography._process(item.attributes, {style.CitationStyle(t)}) | ||
end, | ||
|
||
produceReference = function (cite, bib, style) | ||
local item = bib[cite.key] | ||
if not item then | ||
return Bibliography.Errors.UNKNOWN_REFERENCE | ||
end | ||
item.type = (item.type:gsub("^%l", string.upper)) | ||
if not style[item.type] then | ||
return Bibliography.Errors.UNKNOWN_TYPE | ||
end | ||
|
||
local t = Bibliography.buildEnv(cite, item.attributes, style) | ||
return Bibliography._process(item.attributes, {style[item.type](t)}) | ||
end, | ||
|
||
buildEnv = function (cite,item, style) | ||
local t = std.table.clone(_ENV) | ||
t.cite = cite | ||
t.item = item | ||
for k,v in pairs(item) do t[k:lower()] = v end | ||
return std.table.merge(t, style) | ||
end, | ||
|
||
_process = function (item, t, dStart, dEnd) | ||
for i = 1,#t do | ||
if type(t[i]) == "function" then | ||
t[i] = t[i](item) | ||
end | ||
end | ||
local res = table.concat(t,"") | ||
if dStart or dEnd then | ||
if res ~= "" then return (dStart .. res .. dEnd) end | ||
else | ||
return res | ||
end | ||
end, | ||
|
||
Errors = { | ||
UNKNOWN_REFERENCE = 1, | ||
}, | ||
|
||
Style = std.object { | ||
andAuthors = function(item) | ||
local authors = namesplit(item.Author) | ||
if #authors == 1 then | ||
return parse_name(authors[1]).ll | ||
else | ||
for i = 1,#authors do | ||
local author = parse_name(authors[i]) | ||
authors[i] = author.ll.. ", "..author.f.."." | ||
end | ||
return table.concat(authors, " and ") | ||
end | ||
end, | ||
|
||
andSurnames = function (max) | ||
return function(item) | ||
local authors = namesplit(item.Author) | ||
if #authors > max then | ||
return parse_name(authors[1]).ll .. " et al." | ||
else | ||
for i = 1,#authors do authors[i] = parse_name(authors[i]).ll end | ||
return Bibliography.Style.commafy(authors) | ||
end | ||
end | ||
end, | ||
|
||
transEditor = function(item) | ||
local r = {} | ||
if item.Editor then r[#r+1] = "Edited by "..item.Editor end | ||
if item.Translator then r[#r+1] = "Translated by "..item.Translator end | ||
if #r then return table.concat(r, ", ") end | ||
return nil | ||
end, | ||
quotes = function (...) | ||
local t = {...} | ||
return function(item) | ||
return Bibliography._process(item, t, "“", "”") | ||
end | ||
end, | ||
italic = function (...) | ||
local t = {...} | ||
return function(item) | ||
return Bibliography._process(item, t, "\\em{", "}") | ||
end | ||
end, | ||
parens = function (...) | ||
local t = {...} | ||
return function(item) | ||
return Bibliography._process(item, t, "(", ")") | ||
end | ||
end, | ||
optional = function(...) | ||
local t = {n=select('#', ...), ...} | ||
return function(item) | ||
for i = 1,t.n do | ||
if type(t[i]) == "function" then t[i] = t[i](item) end | ||
if not t[i] or t[i] == "" then return "" end | ||
end | ||
return table.concat(t, "") | ||
end | ||
end, | ||
|
||
-- The following functions borrowed from Norman Ramsey's nbibtex, | ||
-- with permission. | ||
commafy = function (t, andword) | ||
andword = andword or 'and' | ||
if #t == 1 then | ||
return t[1] | ||
elseif #t == 2 then | ||
return t[1] .. ' ' .. andword .. ' ' .. t[2] | ||
else | ||
local last = t[#t] | ||
t[#t] = andword .. ' ' .. t[#t] | ||
local answer = table.concat(t, ', ') | ||
t[#t] = last | ||
return answer | ||
end | ||
end | ||
} | ||
} |
Oops, something went wrong.