Content deleted Content added
glf() tweak |
m "A", "O" and the like are excluded from rendering in small caps |
||
Line 8: | Line 8: | ||
GlossAbbrBoundary = "-.;:<>\\~=%s1-9%[%]", --!! TO DO: allow for html escape codes, but how? |
GlossAbbrBoundary = "-.;:<>\\~=%s1-9%[%]", --!! TO DO: allow for html escape codes, but how? |
||
GlossAbbrStyle = "font-variant: small-caps oldstyle-nums; text-transform: lowercase", |
GlossAbbrStyle = "font-variant: small-caps oldstyle-nums; text-transform: lowercase", |
||
GlossSmallCapsExclude = "^[AOPS]$", |
|||
GlossClass = "gloss-abbr", |
GlossClass = "gloss-abbr", |
||
GlossingType = "label", -- if set to "label" gloss abbreviations are formatted as an <abbr> with the "label" appearing in a tooltip |
GlossingType = "label", -- if set to "label" gloss abbreviations are formatted as an <abbr> with the "label" appearing in a tooltip |
||
Line 69: | Line 70: | ||
end |
end |
||
if gloss_text then gloss = gloss_text end |
if gloss_text then gloss = gloss_text end |
||
if label or wikilink then |
if label or wikilink then |
||
local abbr_label |
local abbr_label |
||
if label then abbr_label = label |
if label then abbr_label = label |
||
Line 78: | Line 79: | ||
:attr("title", abbr_label) |
:attr("title", abbr_label) |
||
:attr("style", conf.GlossAbbrStyle) |
:attr("style", conf.GlossAbbrStyle) |
||
--- excluding the likes of A or S from rendering in small caps: |
|||
if mw.ustring.match(gloss,conf.GlossSmallCapsExclude) then |
|||
gloss_node:attr("style", "font-variant:normal; text-transform: none") end |
|||
if conf.GlossingType == "label" then |
if conf.GlossingType == "label" then |
||
gloss_node:wikitext(gloss) |
gloss_node:wikitext(gloss) |
Revision as of 13:30, 29 August 2016
Module documentation[view] [edit] [history] [purge]
This is the module for Template:Interlinear and Template:gcl. The function invoked by the first one is p.interlinearise
and the one invoked by the latter is p.gcl
. See those templates' documentation for usage instructions.
Most of the glossing abbreviations are loaded from the data subpage.
local p = {}
local data = mw.loadData( 'Module:Sandbox/Uanfala/glt/data' )
local getArgs = require('Module:Arguments').getArgs
conf = { --settings
WordSeparator = "[ \n\r\t]", -- Don't replace with %s as this would include non-breaking spaces
GlossAbbrPattern = "^([Ø1-9A-Z]+)$", -- this isn't a full regex, but a Lua pattern
GlossAbbrBoundary = "-.;:<>\\~=%s1-9%[%]", --!! TO DO: allow for html escape codes, but how?
GlossAbbrStyle = "font-variant: small-caps oldstyle-nums; text-transform: lowercase",
GlossSmallCapsExclude = "^[AOPS]$",
GlossClass = "gloss-abbr",
GlossingType = "label", -- if set to "label" gloss abbreviations are formatted as an <abbr> with the "label" appearing in a tooltip
-- if set to "wikilink" the abbreviation is formatted as a wikilink to the relevant wikipedia article
-- if set to "none" abbreviations aren't formatted at all
GlossingLine = 2,
ExampleNumberStyle = "float: left; min-width: 40px;",
WordDivStyle = "float: left; margin-bottom: 1em; margin-right: 1em;",
WordPstyle = "margin: 0px;", -- the style for the word <p> elements
HiddenTextStyle = "display: none;",
FtStyle = "clear: left;",
EndDivStyle = "clear: left; display: block;", -- style of the <div> element at the end of the interlinear display
InterlinearClass = "interlinear"
}
local function find_gloss(word)
local function scan_gloss(boundary, gloss_abbr) -- checks a morpheme if it is a gloss abbreviation
--morpheme = mw.ustring.gsub(morpheme, conf.GlossAbbrPattern, "[%1]" )
if mw.ustring.match(gloss_abbr, conf.GlossAbbrPattern) then
gloss_abbr = format_gloss(gloss_abbr)
end --find some way of intercepting errors from GlossAbbr:lookup() and still formatting the gloss in small caps if it looks like an abbreviation
return boundary .. gloss_abbr
end
local word = mw.text.decode(word)
-- TO DO: insert a f() to build a table of all the punctuation used (if help ~= off)
local pattern = "([" .. conf.GlossAbbrBoundary .. "]?)([^" .. conf.GlossAbbrBoundary .. "]+)"
word = mw.ustring.gsub(word, pattern, scan_gloss) -- splits into morphemes
return word
end
local function gloss_lookup(gloss, label, wikilink)
_label, _wikilink = nil, nil
local a = mw.text.trim(mw.ustring.upper(gloss))
if data.abbreviations[a] then
_label = data.abbreviations[a].expansion
_wikilink = data.abbreviations[a].wikipage
elseif mw.ustring.sub(a,1,1) == "N" then -- dealing with cases like NPST = non-past
a = mw.ustring.sub(a,2)
local s = data.abbreviations[a]
if s ~= nil and not s.ExcludeNegation then
_label = "non-" .. s.expansion
_wikilink = s.wikipage
end
s = nil
end
if not (_label or _wikilink) then
WikiError:add("Abbreviation " .. gloss .. " not found in list. Please consider using the gl template or adding the abbreviation to /data")
end
if _label == "" then _label = nil end
if _wikilink == "" then _wikilink = nil end
if not label then label = _label end
if not wikilink then wikilink = _wikilink end
return label, wikilink
end
function format_gloss(gloss, label, wikilink, gloss_text)
if not (label or wikilink)
or (not label and conf.GlossingType == "label")
or (not wikilink and conf.GlossingType == "wikilink")
then label, wikilink = gloss_lookup(gloss, label, wikilink)
end
if gloss_text then gloss = gloss_text end
if label or wikilink then
local abbr_label
if label then abbr_label = label
else abbr_label = wikilink end
gloss_node = mw.html.create("abbr")
gloss_node
:addClass(conf.GlossClass)
:attr("title", abbr_label)
:attr("style", conf.GlossAbbrStyle)
--- excluding the likes of A or S from rendering in small caps:
if mw.ustring.match(gloss,conf.GlossSmallCapsExclude) then
gloss_node:attr("style", "font-variant:normal; text-transform: none") end
if conf.GlossingType == "label" then
gloss_node:wikitext(gloss)
elseif conf.GlossingType == "wikilink" then
if wikilink then gloss_node:wikitext("[[", wikilink, "|" , gloss, "]]")
else gloss_node:wikitext(gloss) end
end
else
gloss_node = mw.html.create("span")
gloss_node
:addClass(conf.GlossClass)
:attr("style", conf.GlossAbbrStyle)
:wikitext(gloss)
end-- output some error message
return tostring(gloss_node)
end
local function normalise(str)
return mw.ustring.gsub(str,conf.WordSeparator.."+"," ")
end
local function tidyCss(str)
str = mw.ustring.gsub(str, '^[\"\']*(.-)[\"\']*$', "%1") -- trims quotation marks
if mw.ustring.sub(str, -1) ~= ";" then str = str .. ";" end -- appends ";" if missing
return str
end
function set_glossing_settings(glossingtype, glossingstyle)
if glossingtype then
glossingtype = mw.ustring.lower(mw.text.trim(glossingtype))
if mw.ustring.find(glossingtype, 'link') then
conf.GlossingType = "wikilink"
elseif mw.ustring.find(glossingtype, 'label') then
conf.GlossingType = 'label'
elseif mw.ustring.find(glossingtype, "no") then
conf.GlossingType = 'none'
else error('Unrecognised glossing type' .. glossingtype) end
end
if glossingstyle then conf.GlossAbbrStyle = conf.GlossAbbrStyle .. tidyCss(glossingstyle) end
if (help ~= "off" or help ~= "no") -- Not done yet
and custom_help_string == ""
then help = true
else help = nil end
end
function p.glf(frame) -- the function called by Template:gl
local args = getArgs(frame,{
trim = true,
removeBlanks = true,
parentOnly = true,
})
local orig_gloss, label, wikilink
set_glossing_settings(args.glossing, args['glossing-style'])
local orig_gloss, label, wikilink = args[1], args[2], args[3]
if not orig_gloss then return error('No gloss supplied') end
if wikilink and not args.glossing then -- if a wikilink is supplied and glossing isn't set to 'label'...
conf.GlossingType = 'wikilink' end -- .. then the wikilink will be formatted as such
local gloss = mw.ustring.gsub(orig_gloss,"<.->","") -- remove any html fluff
gloss = mw.ustring.gsub(gloss, "%'+", "") -- remove wiki bold/italic formatting
return format_gloss(gloss, label, wikilink, orig_gloss)
end
function p.interlinearise(frame)
----Prepare arguments----
if mw.isSubsting() then error('This template is not meant to be substituted.') end
local args = getArgs(frame, { -- configuration for Module:Arguments
trim = true,
removeBlanks = false,
parentFirst = true,
})
line = {}
for i,v in ipairs(args) do -- iterates over the numbered arguments from #invoke
line[i] = {}
v = normalise(v)
line[i].whole = v
line[i].length = mw.ustring.len(v)
---prepare style arguments----
local _style = args["style"..i]
if not _style then _style = ""
else _style = tidyCss(_style) end
--line[i].attr holds the attributes for the <p> elements that enclose the words in line i
line[i].attr = {style = conf.WordPstyle .. _style}
local _lang = args["lang" .. i]
if _lang and #_lang > 1 then
line[i].attr.lang = _lang
else _lang = args.lang
if _lang and #_lang > 1 and i == 1 then -- if a lang= parameter is supplied, it's assumed to apply to line 1
line[i].attr.lang = _lang
end
end
end
local line_count = #line
if line_count > 1 then
local gloss_line = tonumber(args['glossing-line'])
if gloss_line then -- set up which line is the one containing grammatical glosses
conf.GlossingLine = gloss_line
else conf.GlossingLine = 2 end
local italic = args.italics -- set up which line should be displayed in italics
if not italic or italic == "1" or italic == "" then -- by default the first line is in italics
line[1].attr.style = line[1].attr.style .. "font-style: italic;"
else
itanum = tonumber(italic)
if itanum and itanum > 1 then
line[itanum].attr.style = line[itanum].attr.style .. "font-style: italic;"
end
end
ft = line[line_count].whole -- the last unnamed parameter is assumed to be the free translation...
line [line_count] = nil --... and is thus excluded from interlinearising
-- if a single line is supplied, the assumption is that it contains grammatical glosses:
elseif line_count == 1 then
conf.GlossingLine = 1
elseif line_count == 0 then
return error('No arguments supplied')
end
set_glossing_settings(args.glossing, args['glossing-style'])
----Segment lines into words----
for i,v in ipairs(line) do
currentline = i
local ifglossing = conf.GlossingType ~= "none"
and i == conf.GlossingLine -- if true the parser will attempt to format gloss abbreviations in the current line
local wc, n = 1, 1
line[i].words = {}
while n <= line[i].length do
tmpres = ""
n = parse(n, 0, ifglossing)+2
line[i].words[wc] = tmpres
wc = wc + 1
end
end
----Check for mismatches in number of words across lines----
local number_of_words, mismatch_found = 0, false
for i,v in ipairs(line) do -- find the maximum number of words in any line
local wc = #line[i].words
if wc ~= number_of_words then
if i ~= 1 then
mismatch_found = true
end
if wc > number_of_words then
number_of_words = wc
end
end
end
----Deal with mismatches---
if mismatch_found then
local error_text = "Warning: lines don't have the same number of words: "
for i,v in ipairs(line) do
local wc = #line[i].words
error_text = error_text .. wc .. "words in line" .. i .. "; "
if wc ~= number_of_words then
for current_word = wc+1, number_of_words do
line[i].words[current_word] = " "
end
end
end
error_text = error_text .. "if formatting or a template is applied to a piece of text then this text is treated as a single word; non-breaking spaces are not treated as word separators."
end
----Build the HTML----
--insert error messages here
---- If only a single line was supplied, format it as inline text:
if line_count == 1 then
local span = mw.html.create('span')
span:attr(line[1].attr)
for wi = 1, number_of_words do
local space
if wi < number_of_words then space = " " else space = "" end
span:wikitext(line[1].words[wi] .. space)
end
return tostring(span)
end
---- more than one line supplied, so produce interlinear display
local div = mw.html.create("div")
div:addClass(conf.InterlinearClass)
for wi = 1, number_of_words do
local div2 = div:tag("div")
:attr("style", conf.WordDivStyle)
for i,_ in ipairs (line) do
if line[i].whole ~= "" then -- skipping emtpy lines
local p = div2:tag("p")
p:attr(line[i].attr)
p:wikitext(line[i].words[wi])
end
end
end
for i,v in ipairs(line) do
local hidden_line = div:tag("p")
hidden_line:attr("style", conf.HiddenTextStyle)
:wikitext(v.whole)
end
if ft and ft ~= "" then -- "ft" for "free translation" or "footer"
local ft_line = div:tag("p")
ft_line:attr("style", conf.FtStyle)
:wikitext(ft)
end
local end_div = div:tag("div")
end_div:attr("style", conf.EndDivStyle)
div:newline()
return tostring(div)
end
function parse(i,tags_found,ifglossing)
--[[ This function is so monstrous because of the lack of proper regular expressions in Lua.
The function goes through the string and ends when it finds a space outside of html tags. It makes sure any block within tags
is treated as one word (ex. "<b>once only</b>" would count as one word. The reason for this behaviour is to avoid producing
bad html in the interlinear display. It also calls the find_gloss function to go over any elligible pieces of text.
It modfies the global tmpres variable and returns the index of the end of the word ]]--
if i > line[currentline].length then return i end --this will only be triggered if the current line has less words than line 1
local probe = mw.ustring.sub(line[currentline].whole,i,i)
if mw.ustring.match(probe,conf.WordSeparator) and tags_found == 0
then return i-1 end
if probe == "<" then -- We've encountered an HTML tag. What do we do now?
local _,j,chunk = mw.ustring.find(line[currentline].whole,"(<.->)",i)
if mw.ustring.sub(line[currentline].whole,i,i+1) == "</" then -- It's a CLOSING tag
if conf.GlossingLine == currentline
and ifglossing==false
and mw.ustring.match(chunk,"</abbr>")
then ifglossing=true end
tags_found = tags_found - 1
elseif not mw.ustring.match(chunk, "/>$") -- It's an OPENING tag, unless it opens a self-closing element (in which case the element is ignored)
then if ifglossing == true
and mw.ustring.match(chunk,conf.GlossClass) -- checking for output of {{ggl}}
then ifglossing = false end
tags_found = tags_found + 1
end
tmpres = tmpres .. chunk
return parse(j+1,tags_found,ifglossing)
else -- No HTML tags, so we only need to find where the word ends
local _,k,chunk = mw.ustring.find(line[currentline].whole,"(..-)([ <])",i)
if k then --ordinary text
if ifglossing==true then
tmpres = tmpres .. find_gloss(chunk)
else tmpres = tmpres .. chunk
end
return parse(k,tags_found,ifglossing)
else -- reached end of string
if ifglossing == true then
tmpres = tmpres .. find_gloss(mw.ustring.sub(line[currentline].whole,i))
else tmpres = tmpres .. mw.ustring.sub(line[currentline].whole,i)
end
return line[currentline].length
end
end
end
--=====================================================================
------- the following need to be rewritten or discarded:
WikiError = {["counter"]=0, errors = ""}
function WikiError:add(message)
self.counter = self.counter+1
self.errors = self.errors .. message
--self[self.counter]=message
end
function WikiError:print()
if self.counter == 0 then return nil
else return '<strong class="error">' .. self.errors end
end
function wiki_error(message)
return '<strong class="error">"' .. message .. '</strong>'
end
--End
return p
--[[
Will break if:
- > appears within the contents of the attribute of a tag in the supplied string
- < appears in wikitext
- there are nested <abbr> elements
]]--