Texas

local p = {}
local data = mw.loadData( 'Module:Sandbox/Uanfala/glt/data' )
local getArgs = require('Module:Arguments').getArgs

conf = { --settings
	WordSeparator = "[ \n\r\t]", -- Don't replace with %s as this would include non-breaking spaces
	GlossAbbrPattern = "^([Ø1-9A-Z]+)$", -- this isn't a full regex, but a Lua pattern
	GlossAbbrBoundary = "-.;:<>\\~=%s1-9%[%]", --!! TO DO: allow for html escape codes, but how?
	GlossAbbrStyle = "font-variant: small-caps oldstyle-nums; text-transform: lowercase",
	GlossSmallCapsExclude = "^[AOPS]$",
	GlossClass = "gloss-abbr",
	GlossingType = "label", -- if set to "label" gloss abbreviations are formatted as an <abbr> with the "label" appearing in a tooltip
						-- if set to "wikilink" the abbreviation is formatted as a wikilink to the relevant wikipedia article
						-- if set to "none" abbreviations aren't formatted at all
	GlossingLine = 2,
	ExampleNumberStyle = "float: left; min-width: 40px;",
	WordDivStyle = "float: left; margin-bottom: 1em; margin-right: 1em;",
	WordPstyle = "margin: 0px;", -- the style for the word <p> elements
	HiddenTextStyle = "display: none;",
	FtStyle = "clear: left;",
	EndDivStyle = "clear: left; display: block;", -- style of the <div> element at the end of the interlinear display
	InterlinearClass = "interlinear"
	}

local function find_gloss(word)
	local function scan_gloss(boundary, gloss_abbr) -- checks a morpheme if it is a gloss abbreviation
		--morpheme = mw.ustring.gsub(morpheme, conf.GlossAbbrPattern, "[%1]" )
		if mw.ustring.match(gloss_abbr, conf.GlossAbbrPattern) then
			gloss_abbr = format_gloss(gloss_abbr)
		end --find some way of intercepting errors from GlossAbbr:lookup() and still formatting the gloss in small caps if it looks like an abbreviation
		return boundary .. gloss_abbr
	end
	local word = mw.text.decode(word)
	-- TO DO: insert a f() to build a table of all the punctuation used (if help ~= off)
	local pattern = "([" .. conf.GlossAbbrBoundary .. "]?)([^" .. conf.GlossAbbrBoundary .. "]+)"
	word = mw.ustring.gsub(word, pattern, scan_gloss) -- splits into morphemes
	return word
end

local function gloss_lookup(gloss, label, wikilink)
	_label, _wikilink = nil, nil
	local a = mw.text.trim(mw.ustring.upper(gloss))
	if data.abbreviations[a] then
		_label = data.abbreviations[a].expansion
		_wikilink = data.abbreviations[a].wikipage
	elseif mw.ustring.sub(a,1,1) == "N" then -- dealing with cases like NPST = non-past
		a = mw.ustring.sub(a,2)
		local s = data.abbreviations[a]
		if s ~= nil and not s.ExcludeNegation then
			_label = "non-" .. s.expansion
			_wikilink = s.wikipage
		end 
		s = nil
	end
	if not (_label or _wikilink) then
		WikiError:add("Abbreviation " .. gloss .. " not found in list. Please consider using the gl template or adding the abbreviation to /data")
	end
	if _label == "" then _label = nil end
	if _wikilink == "" then _wikilink = nil end
	if not label then label = _label end
	if not wikilink then wikilink = _wikilink end
	return label, wikilink
end

function format_gloss(gloss, label, wikilink, gloss_text)
	if not (label or wikilink)
		or (not label and conf.GlossingType == "label")
		or (not wikilink  and conf.GlossingType == "wikilink")
		then label, wikilink = gloss_lookup(gloss, label, wikilink)
	end
	if gloss_text then gloss = gloss_text end
	if label or wikilink then
		local abbr_label
		if label then abbr_label = label
			else abbr_label = wikilink end
		gloss_node = mw.html.create("abbr")
		gloss_node
			:addClass(conf.GlossClass)
			:attr("title", abbr_label)
			:attr("style", conf.GlossAbbrStyle)
		--- excluding the likes of A or S from rendering in small caps:
		if mw.ustring.match(gloss,conf.GlossSmallCapsExclude) then
			gloss_node:attr("style", "font-variant:normal; text-transform: none") end
		if conf.GlossingType == "label" then
			gloss_node:wikitext(gloss)
		elseif conf.GlossingType == "wikilink" then
			if wikilink then gloss_node:wikitext("[[", wikilink, "|" , gloss, "]]")
			else gloss_node:wikitext(gloss) end
		end
	else
		gloss_node = mw.html.create("span")
		gloss_node
			:addClass(conf.GlossClass)
			:attr("style", conf.GlossAbbrStyle)
			:wikitext(gloss)
		end-- output some error message
	return tostring(gloss_node)
end

local function normalise(str)
	return mw.ustring.gsub(str,conf.WordSeparator.."+"," ")
end

local function tidyCss(str)
	str = mw.ustring.gsub(str, '^[\"\']*(.-)[\"\']*$', "%1") -- trims quotation marks
	if mw.ustring.sub(str, -1) ~= ";" then str = str .. ";" end -- appends ";" if missing
	return str
end

function set_glossing_settings(glossingtype, glossingstyle)
	if glossingtype then
		glossingtype = mw.ustring.lower(mw.text.trim(glossingtype))
		if mw.ustring.find(glossingtype, 'link') then
			conf.GlossingType = "wikilink"
		elseif mw.ustring.find(glossingtype, 'label') then
			conf.GlossingType = 'label'
		elseif mw.ustring.find(glossingtype, "no") then
			conf.GlossingType = 'none'
		else error('Unrecognised glossing type' .. glossingtype) end
	end
	if glossingstyle then conf.GlossAbbrStyle = conf.GlossAbbrStyle .. tidyCss(glossingstyle) end
	if (help ~= "off" or help ~= "no") -- Not done yet
		and custom_help_string == ""
		then help = true
	else help = nil end
end

function p.glf(frame) -- the function called by Template:gl
	local args = getArgs(frame,{
		trim = true,
		removeBlanks = true,
		parentOnly = true,
	})
	local orig_gloss, label, wikilink
	set_glossing_settings(args.glossing, args['glossing-style'])
	local orig_gloss, label, wikilink = args[1], args[2], args[3]
	if not orig_gloss then return error('No gloss supplied') end
	if wikilink and not args.glossing then -- if a wikilink is supplied and glossing isn't set to 'label'...
		conf.GlossingType = 'wikilink' end -- .. then the wikilink will be formatted as such
	local gloss = mw.ustring.gsub(orig_gloss,"<.->","") -- remove any html fluff
	gloss = mw.ustring.gsub(gloss, "%'+", "") -- remove wiki bold/italic formatting	
	return format_gloss(gloss, label, wikilink, orig_gloss)
end

function p.interlinearise(frame)
	----Prepare arguments----
	if mw.isSubsting() then error('This template is not meant to be substituted.') end
	local args = getArgs(frame, { -- configuration for Module:Arguments
		trim = true,
		removeBlanks = false,
		parentFirst = true,
	})
	line = {}
	for i,v in ipairs(args) do -- iterates over the numbered arguments from #invoke
		line[i] = {}
		v = normalise(v)
		line[i].whole = v
		line[i].length = mw.ustring.len(v)
		
		---prepare style arguments----
		local _style = args["style"..i]
		if not _style then _style = ""
		else _style = tidyCss(_style) end
		--line[i].attr holds the attributes for the <p> elements that enclose the words in line i
		line[i].attr = {style = conf.WordPstyle .. _style}
		
		local _lang = args["lang" .. i]
		if _lang and #_lang > 1 then
			line[i].attr.lang = _lang
		else _lang = args.lang
			if _lang and #_lang > 1 and i == 1 then -- if a lang= parameter is supplied, it's assumed to apply to line 1
				line[i].attr.lang = _lang
			end
		end
	end

	local line_count = #line
	if line_count > 1 then
		local gloss_line = tonumber(args['glossing-line'])
		if gloss_line then -- set up which line is the one containing grammatical glosses
			conf.GlossingLine = gloss_line
		else conf.GlossingLine = 2 end
		local italic = args.italics -- set up which line should be displayed in italics
		if not italic or italic == "1"  or italic == "" then -- by default the first line is in italics
			line[1].attr.style = line[1].attr.style .. "font-style: italic;"
		else
			itanum = tonumber(italic)
			if itanum and itanum > 1 then
				line[itanum].attr.style = line[itanum].attr.style .. "font-style: italic;"
			end
		end
		ft = line[line_count].whole -- the last unnamed parameter is assumed to be the free translation...
		line [line_count] = nil 	--... and is thus excluded from interlinearising
	-- if a single line is supplied, the assumption is that it contains grammatical glosses:
	elseif line_count == 1 then
		conf.GlossingLine = 1
	elseif line_count == 0 then
		return error('No arguments supplied')
	end
	set_glossing_settings(args.glossing, args['glossing-style'])

	----Segment lines into words----
	for i,v in ipairs(line) do
		currentline = i
		local ifglossing = conf.GlossingType ~= "none"
							and i == conf.GlossingLine -- if true the parser will attempt to format gloss abbreviations in the current line
		local wc, n = 1, 1
		line[i].words = {}
		while n <= line[i].length do
			tmpres = ""
			n = parse(n, 0, ifglossing)+2
			line[i].words[wc] = tmpres
			wc = wc + 1
		end
	end

	----Check for mismatches in number of words across lines----
	local number_of_words, mismatch_found = 0, false
	for i,v in ipairs(line) do -- find the maximum number of words in any line
		local wc = #line[i].words
		if wc ~= number_of_words then
			if i ~= 1 then 
				mismatch_found = true
			end
			if wc > number_of_words then
				number_of_words = wc
			end
		end
	end
	----Deal with mismatches---
	if mismatch_found then
		local error_text = "Warning: lines don't have the same number of words: "
		for i,v in ipairs(line) do
			local wc = #line[i].words
			error_text = error_text .. wc .. "words in line" .. i .. "; "
			if wc ~= number_of_words then
				for current_word = wc+1, number_of_words do
					line[i].words[current_word] = "&nbsp;"
				end
			end
		end
		error_text = error_text .. "if formatting or a template is applied to a piece of text then this text is treated as a single word; non-breaking spaces are not treated as word separators."
	end
	
	----Build the HTML----
	--insert error messages here
	---- If only a single line was supplied, format it as inline text:
	if line_count == 1 then
		local span = mw.html.create('span')
		span:attr(line[1].attr)
		for wi = 1, number_of_words do
			local space
			if wi < number_of_words then space = " " else space = "" end
			span:wikitext(line[1].words[wi] .. space)
		end
		return tostring(span)
	end
	
	---- more than one line supplied, so produce interlinear display
	local div = mw.html.create("div")
	div:addClass(conf.InterlinearClass)
	for wi = 1, number_of_words do
		local div2 = div:tag("div")
					:attr("style", conf.WordDivStyle)
		for i,_ in ipairs (line) do
			if line[i].whole ~= "" then -- skipping emtpy lines
				local p = div2:tag("p")
				p:attr(line[i].attr)
				p:wikitext(line[i].words[wi])
			end
		end
	end

	for i,v in ipairs(line) do
		local hidden_line = div:tag("p")
		hidden_line:attr("style", conf.HiddenTextStyle)
					:wikitext(v.whole)
	end
	
	if ft and ft ~= "" then -- "ft" for "free translation" or "footer"
		local ft_line = div:tag("p")
		ft_line:attr("style", conf.FtStyle)
				:wikitext(ft)
	end
	local end_div = div:tag("div")
		end_div:attr("style", conf.EndDivStyle)
	div:newline()
	return tostring(div)
end

function parse(i,tags_found,ifglossing)
	--[[ This function is so monstrous because of the lack of proper regular expressions in Lua.
	The function goes through the string and ends when it finds a space outside of html tags. It makes sure	any block within tags
	is treated as one word (ex. "<b>once only</b>" would count as one word. The reason for this behaviour is to avoid producing
	bad html in the interlinear display. It also calls the find_gloss function to go over any elligible pieces of text.
	It modfies the global tmpres variable and returns the index of the end of the word ]]--
	if i > line[currentline].length then return i end --this will only be triggered if the current line has less words than line 1
	local probe = mw.ustring.sub(line[currentline].whole,i,i)
	if mw.ustring.match(probe,conf.WordSeparator) and tags_found == 0
		then return  i-1 end
	if probe == "<" then -- We've encountered an HTML tag. What do we do now?
		local _,j,chunk = mw.ustring.find(line[currentline].whole,"(<.->)",i)
		if mw.ustring.sub(line[currentline].whole,i,i+1) == "</" then -- It's a CLOSING tag
			if conf.GlossingLine == currentline
				and ifglossing==false
				and mw.ustring.match(chunk,"</abbr>")
				then ifglossing=true end
			tags_found = tags_found - 1
		elseif not mw.ustring.match(chunk, "/>$") -- It's an OPENING tag, unless it opens a self-closing element (in which case the element is ignored)
			then if ifglossing == true
					and mw.ustring.match(chunk,conf.GlossClass) -- checking for output of {{ggl}}
					then ifglossing = false end
			tags_found = tags_found + 1
		end
		tmpres = tmpres .. chunk
		return parse(j+1,tags_found,ifglossing)
	else -- No HTML tags, so we only need to find where the word ends
		local _,k,chunk = mw.ustring.find(line[currentline].whole,"(..-)([ <])",i)
		if k then --ordinary text
			if ifglossing==true then
				tmpres = tmpres .. find_gloss(chunk)
			else tmpres = tmpres .. chunk
			end
			return parse(k,tags_found,ifglossing)
		else -- reached end of string		
			if ifglossing == true then
				tmpres = tmpres .. find_gloss(mw.ustring.sub(line[currentline].whole,i))
			else tmpres = tmpres .. mw.ustring.sub(line[currentline].whole,i)
			end
			return line[currentline].length
		end
	end
end

--=====================================================================

------- the following need to be rewritten or discarded:

WikiError = {["counter"]=0, errors = ""}
function WikiError:add(message)
	self.counter = self.counter+1
	self.errors = self.errors .. message
	--self[self.counter]=message
end
function WikiError:print()
	if self.counter == 0 then return nil
	else return '<strong class="error">' .. self.errors end
end

function wiki_error(message)
	return '<strong class="error">"' .. message .. '</strong>'
end
--End
return p
--[[
Will break if:
- > appears within the contents of the attribute of a tag in the supplied string
- < appears in wikitext
- there are nested <abbr> elements
]]--
The best road to progress is freedom's road. - JFK

Texas

Revision as of 13:30, 29 August 2016