Module:Text

--[=[ 2014-09-27 Text utilities ]=]

local Text = { } local patternCJK       = false local patternLatin     = false local patternTerminated = false

Text.concatParams = function ( args, apply, adapt ) -- Concat list items into one string -- Parameter: --    args   -- table (sequence) with numKey=string --    apply  -- string (optional); separator (default: "|") --    adapt  -- string (optional); format including "%s" -- Returns: string local collect = { } for k, v in pairs( args ) do       if type( k ) == "number" then v = mw.text.trim( v ) if v ~= "" then if adapt then v = mw.ustring.format( adapt, v ) end table.insert( collect, v ) end end end return table.concat( collect, apply or "|" ) end -- Text.concatParams

Text.containsCJK = function ( analyse ) -- Is any CJK code within? -- Parameter: --    analyse  -- string -- Returns: true, if CJK detected local r   if not patternCJK then patternCJK = mw.ustring.char( 91,                                      13312, 45,  40959,                                      131072, 45, 178207,                                      93 ) end if mw.ustring.find( analyse, patternCJK ) then r = true else r = false end return r end -- Text.containsCJK

Text.listToText = function ( args, adapt ) -- Format list items similar to mw.text.listToText -- Parameter: --    args   -- table (sequence) with numKey=string --    adapt  -- string (optional); format including "%s" -- Returns: string local collect = { } for k, v in pairs( args ) do       if type( k ) == "number" then v = mw.text.trim( v ) if v ~= "" then if adapt then v = mw.ustring.format( adapt, v ) end table.insert( collect, v ) end end end return mw.text.listToText( collect ) end -- Text.listToText

Text.sentenceTerminated = function ( analyse ) -- Is string terminated by dot, question or exclamation mark? --    Quotation, link termination and so on granted -- Parameter: --    analyse  -- string -- Returns: true, if sentence terminated local r   if not patternTerminated then patternTerminated = mw.ustring.char( 91,                                            12290,                                             65281,                                             65294,                                             65311 ) .. "!%.%?…][\"'%]‹›«»‘’“”]*$"   end    if mw.ustring.find( analyse, patternTerminated ) then        r = true    else        r = false    end    return r end -- Text.sentenceTerminated

Text.ucfirstAll = function ( adjust ) -- Capitalize all words -- Precondition: --    adjust  -- string -- Returns: string with all first letters in upper case local r = " " .. adjust local i = 1 local c, j, m   if adjust:find( "&" ) then r = r:gsub( "&amp;",     "&#38;" ) :gsub( "&lt;",      "&#60;" ) :gsub( "&gt;",      "&#62;" ) :gsub( " ",   "&#160;" ) :gsub( "&thinsp;", "&#8201;" ) :gsub( "&zwnj;",  "&#8204;" ) :gsub( "&zwj;",   "&#8205;" ) :gsub( "&lrm;",   "&#8206;" ) :gsub( "&rlm;",   "&#8207;" ) m = true end while i do       i = mw.ustring.find( r, "%W%l", i ) if i then j = i + 1 c = mw.ustring.upper( mw.ustring.sub( r, j, j ) ) r = string.format( "%s%s%s",                              mw.ustring.sub( r, 1, i ),                               c,                               mw.ustring.sub( r, i + 2 ) ) i = j       end end -- while i   r = r:sub( 2 ) if m then r = r:gsub(    "&#38;", "&amp;" ) :gsub(    "&#60;", "&lt;" ) :gsub(    "&#62;", "&gt;" ) :gsub(   "&#160;", " " ) :gsub(  "&#8201;", "&thinsp;" ) :gsub(  "&#8204;", "&zwnj;" ) :gsub(  "&#8205;", "&zwj;" ) :gsub(  "&#8206;", "&lrm;" ) :gsub(  "&#8207;", "&rlm;" ) :gsub( "&#X(%x+);", "&#x%1;" ) end return r end -- Text.ucfirstAll

Text.uprightNonlatin = function ( adjust ) -- Ensure non-italics for non-latin text parts --    One single greek letter might be granted -- Precondition: --    adjust  -- string -- Returns: string with non-latin parts enclosed in    local r    if not patternLatin then patternLatin = mw.ustring.char(  94, 91,                                           7, 45,  591,                                        8194, 45, 8250,                                          93, 42, 36 ) end if mw.ustring.match( adjust, patternLatin ) then -- latin only, horizontal dashes, quotes r = adjust else local c       local j    = false local k   = 1 local m   = false local n   = mw.ustring.len( adjust ) local span = "%s%s %s " local flat = function ( a ) -- isLatin return a <= 591   or   ( a >= 8194  and  a <= 8250 ) end -- flat local form = function ( a ) return string.format( span,                                     r,                                      mw.ustring.sub( adjust, k, j - 1 ),                                      mw.ustring.sub( adjust, j, a ) ) end -- form r = "" for i = 1, n do           c = mw.ustring.codepoint( adjust, i, i ) if c > 64 or  c == 38  or  c == 60 then    -- '&' '<' if flat( c ) then if j then if m then if i == m then -- single greek letter. j = false end m = false end if j then local nx = i - 1 local s = "" for ix = nx, 1, -1 do                               c = mw.ustring.sub( adjust, ix, ix ) if c == " " or  c == "(" then                                    nx = nx - 1                                    s  = c .. s                                else                                    break -- for ix                                end                            end -- for ix                            r = form( nx ) .. s                            j = false                            k = i                        end                    end                elseif not j then                    j = i                    if c >= 880  and  c <= 1023 then                        -- single greek letter?                        m = i + 1                    else                        m = false                    end                end            elseif m then                m = m + 1            end        end -- for i        if j  and  ( not m  or  m < n ) then r = form( n ) else r = r .. mw.ustring.sub( adjust, k ) end end return r end -- Text.uprightNonlatin

-- Export local p = { }

function p.concatParams( frame ) local args local template = frame.args.template if type( template ) == "string" then template = mw.text.trim( template ) template = ( template == "1" ) end if template then args = frame:getParent.args else args = frame.args end return Text.concatParams( args,                             frame.args.separator,                              frame.args.format ) end

function p.containsCJK( frame ) return Text.containsCJK( frame.args[ 1 ] or "" ) and "1" or "" end

function p.listToText( frame ) local args local template = frame.args.template if type( template ) == "string" then template = mw.text.trim( template ) template = ( template == "1" ) end if template then args = frame:getParent.args else args = frame.args end return Text.listToText( args, frame.args.format ) end

function p.sentenceTerminated( frame ) return Text.sentenceTerminated( frame.args[ 1 ] or "" ) and "1" or "" end

function p.ucfirstAll( frame ) return Text.ucfirstAll( frame.args[ 1 ] or "" ) end

function p.uprightNonlatin( frame ) return Text.uprightNonlatin( frame.args[ 1 ] or "" ) end

function p.zip(frame) local lists = {} local seps = {} local defaultsep = frame.args["sep"] or "" local innersep = frame.args["isep"] or "" local outersep = frame.args["osep"] or "" -- Parameter parsen for k, v in pairs(frame.args) do		local knum = tonumber(k) if knum then lists[knum] = v else if string.sub(k, 1, 3) == "sep" then local sepnum = tonumber(string.sub(k, 4)) if sepnum then seps[sepnum] = v end end end end -- sofern keine expliziten Separatoren angegeben sind, den Standardseparator verwenden for i = 1, math.max(#seps, #lists) do		if not seps[i] then seps[i] = defaultsep end end

-- Listen splitten local maxListLen = 0 for i = 1, #lists do		lists[i] = mw.text.split(lists[i], seps[i]) if #lists[i] > maxListLen then maxListLen = #lists[i] end end

local result = "" for i = 1, maxListLen do if i ~= 1 then result = result .. outersep end for j = 1, #lists do if j ~= 1 then result = result .. innersep end result = result .. (lists[j][i] or "") end end return result end

-- removes all diacritics from the input string, by decomposing the characters, removing the combining diacritical marks and recomposing the remaining characters function p.removeDiacritics(frame) local combiningDiacriticalMarks = "[" .. mw.ustring.char(0x0300) .. "-" .. mw.ustring.char(0x036F) .. mw.ustring.char(0x1DC0) .. "-" .. mw.ustring.char(0x1DFF) .. "]"	return mw.ustring.toNFC(mw.ustring.gsub(mw.ustring.toNFD(frame.args[1] or ""), combiningDiacriticalMarks, "")) end

p.Text = function return Text end -- p.Text

return p