-
Notifications
You must be signed in to change notification settings - Fork 0
Create DateI18n.lua for date string processing #16
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
+364
−0
Merged
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,364 @@ | ||
| --[[ | ||
| __ __ _ _ ____ _ ___ _ ___ | ||
| | \/ | ___ __| |_ _| | ___ _| _ \ __ _| |_ ___|_ _/ |( _ ) _ __ | ||
| | |\/| |/ _ \ / _` | | | | |/ _ (_) | | |/ _` | __/ _ \| || |/ _ \| '_ \ | ||
| | | | | (_) | (_| | |_| | | __/_| |_| | (_| | || __/| || | (_) | | | | | ||
| |_| |_|\___/ \__,_|\__,_|_|\___(_)____/ \__,_|\__\___|___|_|\___/|_| |_| | ||
|
|
||
| This module is intended for processing of date strings. | ||
|
|
||
| Please do not modify this code without applying the changes first at Module:Date/sandbox and testing | ||
| at Module:Date/sandbox/testcases and Module talk:Date/sandbox/testcases. | ||
|
|
||
| Authors and maintainers: | ||
| * User:Parent5446 - original version of the function mimicking template:ISOdate | ||
| * User:Jarekt - original version of the functions mimicking template:Date | ||
| ]] | ||
| require('strict') | ||
|
|
||
| -- ================================================== | ||
| -- === Internal functions =========================== | ||
| -- ================================================== | ||
|
|
||
| -- Function allowing for consistent treatment of boolean-like wikitext input. | ||
| -- It works similarly to Module:Yesno | ||
| local function yesno(val, default) | ||
| if type(val) == 'boolean' then | ||
| return val | ||
| elseif type(val) == 'number' then | ||
| if val==1 then | ||
| return true | ||
| elseif val==0 then | ||
| return false | ||
| end | ||
| elseif type(val) == 'string' then | ||
| val = mw.ustring.lower(val) -- put in lower case | ||
| if val == 'no' or val == 'n' or val == 'false' or tonumber(val) == 0 then | ||
| return false | ||
| elseif val == 'yes' or val == 'y' or val == 'true' or tonumber(val) == 1 then | ||
| return true | ||
| end | ||
| end | ||
| return default | ||
| end | ||
|
|
||
| --------------------------------------------------------------------------------------- | ||
| -- String replacement that ignores part of the string in "..." | ||
| local function strReplace(String, old, new) | ||
| if String:find('"') then | ||
| local T={} | ||
| for i, str in ipairs(mw.text.split( String, '"', true )) do | ||
| if i%2==1 then | ||
| str = str:gsub(old, new) | ||
| end | ||
| table.insert(T, str) | ||
| end | ||
| return table.concat(T,'"') | ||
| else | ||
| return String:gsub(old, new) | ||
| end | ||
| end | ||
|
|
||
| --------------------------------------------------------------------------------------- | ||
| -- process datevec | ||
| -- INPUT: | ||
| -- * datevec - Array of {year,month,day,hour,minute,second, tzhour, tzmin} containing broken | ||
| -- down date-time component strings or numbers | ||
| -- OUTPUT: | ||
| -- * datecode - a code specifying content of the array where Y' is year, 'M' is month, | ||
| -- 'D' is day, 'H' is hour, 'M' minute, 'S' is second. output has to be one of YMDHMS, YMDHM, YMD, YM, MD, Y | ||
| -- * datenum - same array but holding only numbers or nuls | ||
| local function parserDatevec(datevec) | ||
| -- if month is not a number than check if it is a month name in project's language | ||
| local month = datevec[2] | ||
| if month and month~='' and not tonumber(month) then | ||
| datevec[2] = mw.getContentLanguage():formatDate( "n", month) | ||
| end | ||
|
|
||
| -- create datecode based on which variables are provided and check for out-of-bound values | ||
| local maxval = {nil, 12, 31, 23, 59, 59, 23, 59} -- max values for year, month, ... | ||
| local minval = {nil, 1, 1, 0, 0, 0, -23, 0} -- min values for year, month, ... | ||
| local c = {'Y', 'M', 'D', 'H', 'M', 'S', '', ''} | ||
| local datecode = '' -- a string signifying which combination of variables was provided | ||
| local datenum = {} -- date-time encoded as a vector = [year, month, ... , second] | ||
| for i = 1,8 do | ||
| datenum[i] = tonumber(datevec[i]) | ||
| if datenum[i] and (i==1 or (datenum[i]>=minval[i] and datenum[i]<=maxval[i])) then | ||
| datecode = datecode .. c[i] | ||
| end | ||
| end | ||
| return datecode, datenum | ||
| end | ||
|
|
||
| --------------------------------------------------------------------------------------- | ||
| -- process datevec | ||
| -- INPUT: | ||
| -- * datecode - a code specifying content of the array where Y' is year, 'M' is month, | ||
| -- 'D' is day, 'H' is hour, 'M' minute, 'S' is second. output has to be one of YMDHMS, YMDHM, YMD, YM, MD, Y | ||
| -- * datenum - Array of {year,month,day,hour,minute,second, tzhour, tzmin} as numbers or nuls | ||
| -- OUTPUT: | ||
| -- * timeStamp - date string in the format taken by mw.language:formatDate lua function and {{#time}} perser function | ||
| -- https://www.mediawiki.org/wiki/Extension:Scribunto/Lua_reference_manual#mw.language:formatDate | ||
| -- https://www.mediawiki.org/wiki/Help:Extension:ParserFunctions#.23time | ||
| -- * datecode - with possible corrections | ||
| local function getTimestamp(datecode, datenum) | ||
| -- create time stamp string (for example 2000-02-20 02:20:20) based on which variables were provided | ||
| local timeStamp | ||
| if datecode == 'YMDHMS' then | ||
| timeStamp = string.format('%04i-%02i-%02i %02i:%02i:%02i', datenum[1], datenum[2], datenum[3], datenum[4], datenum[5], datenum[6] ) | ||
| elseif datecode == 'YMDHM' then | ||
| timeStamp = string.format('%04i-%02i-%02i %02i:%02i', datenum[1], datenum[2], datenum[3], datenum[4], datenum[5] ) | ||
| elseif datecode:sub(1,3)=='YMD' then | ||
| timeStamp = string.format('%04i-%02i-%02i', datenum[1], datenum[2], datenum[3] ) | ||
| datecode = 'YMD' -- 'YMD', 'YMDHMS' and 'YMDHM' are the only supported format starting with 'YMD'. All others will be converted to 'YMD' | ||
| elseif datecode == 'YM' then | ||
| timeStamp = string.format('%04i-%02i', datenum[1], datenum[2] ) | ||
| elseif datecode:sub(1,1)=='Y' then | ||
| timeStamp = string.format('%04i', datenum[1] ) | ||
| datecode = 'Y' | ||
| elseif datecode == 'M' then | ||
| timeStamp = string.format('%04i-%02i-%02i', 2000, datenum[2], 1 ) | ||
| elseif datecode == 'MD' then | ||
| timeStamp = string.format('%04i-%02i-%02i', 2000, datenum[2], datenum[3] ) | ||
| else | ||
| timeStamp = nil -- format not supported | ||
| end | ||
| return timeStamp, datecode | ||
| end | ||
|
|
||
| --------------------------------------------------------------------------------------- | ||
| -- trim leading zeros in years prior to year 1000 | ||
| -- INPUT: | ||
| -- * datestr - translated date string | ||
| -- * lang - language of translation | ||
| -- OUTPUT: | ||
| -- * datestr - updated date string | ||
|
|
||
| local function trimYear(datestr, year, lang) | ||
| local yearStr0, yearStr1, yearStr2, zeroStr | ||
| yearStr0 = string.format('%04i', year ) -- 4 digit year in standard form "0123" | ||
| yearStr1 = mw.language.new(lang):formatDate( 'Y', yearStr0) -- same as calling {{#time}} parser function | ||
| --yearStr1 = mw.getCurrentFrame():callParserFunction( "#time", { 'Y', yearStr0, lang } ) -- translate to a language | ||
| if yearStr0==yearStr1 then -- most of languages use standard form of year | ||
| yearStr2 = tostring(year) | ||
| else -- some languages use different characters for numbers | ||
| yearStr2 = yearStr1 | ||
| zeroStr = mw.ustring.sub(yearStr1,1,1) -- get "0" in whatever language | ||
| for i=1,3 do -- trim leading zeros | ||
| if mw.ustring.sub(yearStr2,1,1)==zeroStr then | ||
| yearStr2 = mw.ustring.sub(yearStr2, 2, 5-i) | ||
| else | ||
| break | ||
| end | ||
| end | ||
| end | ||
| return string.gsub(datestr, yearStr1, yearStr2 ) -- in datestr replace long year with trimmed one | ||
| end | ||
|
|
||
| --------------------------------------------------------------------------------------- | ||
| -- Look up proper format string to be passed to {{#time}} parser function | ||
| -- INPUTS: | ||
| -- * datecode: YMDHMS, YMDHM, YMD, YM, MD, Y, or M | ||
| -- * day : Number between 1 and 31 (not needed for most languages) | ||
| -- * lang : language | ||
| -- OUTPUT: | ||
| -- * dFormat : input to {{#time}} function | ||
| local function getDateFormat(datecode, day, lang) | ||
| local function parseFormat(dFormat, day) | ||
| if dFormat:find('default') and #dFormat>10 then | ||
| -- special (and messy) case of dFormat code depending on a day number | ||
| -- then json contains a string with more json containing "default" field and 2 digit day keys | ||
| -- if desired day is not in that json than use "default" case | ||
| dFormat = dFormat:gsub('”','"') -- change fancy double quote to a straight one, used for json marking | ||
| local D = mw.text.jsonDecode( dFormat ) --com = mw.dumpObject(D) | ||
| day = string.format('d%02i',day) -- create day key | ||
| dFormat = D[day] or D.default | ||
| dFormat = dFormat:gsub("'", '"') -- change single quote to a double quote, used for {{#time}} marking | ||
| end | ||
| return dFormat | ||
| end | ||
|
|
||
| local T = {} | ||
| local tab = mw.ext.data.get('DateI18n.tab', lang) | ||
| for _, row in pairs(tab.data) do -- convert the output into a dictionary table | ||
| local id, _, msg = unpack(row) | ||
| T[id] = msg | ||
| end | ||
| local dFormat = T[datecode] | ||
| if dFormat=='default' and (datecode=='YMDHMS' or datecode=='YMDHM') then | ||
| -- for most languages adding hour:minute:second is done by adding ", HH:MM:SS to the | ||
| -- day precission date, those languages are skipped in DateI18n.tab and default to | ||
| -- English which stores word "default" | ||
| dFormat = parseFormat(T['YMD'], day).. ', H:i' | ||
| if datecode=='YMDHMS' then | ||
| dFormat = dFormat .. ':s' | ||
| end | ||
| else | ||
| dFormat = parseFormat(dFormat, day) | ||
| end | ||
| return dFormat | ||
| end | ||
|
|
||
| --------------------------------------------------------------------------------------- | ||
| -- Look up proper format string to be passed to {{#time}} parser function | ||
| -- INPUTS: | ||
| -- * month : month number | ||
| -- * case : gramatic case abbriviation, like "ins", "loc" | ||
| -- * lang : language | ||
| -- OUTPUT: | ||
| -- * dFormat : input to {{#time}} function | ||
| local function MonthCase(month, case, lang) | ||
| local T = {{},{},{},{},{},{},{},{},{},{},{},{}} | ||
| local tab = mw.ext.data.get('I18n/MonthCases.tab', lang) | ||
| for _, row in pairs(tab.data) do | ||
| local mth, cs, msg = unpack(row) | ||
| T[mth][cs] = msg | ||
| end | ||
| return T[month][case] | ||
| end | ||
|
|
||
| -- ================================================== | ||
| -- === External functions =========================== | ||
| -- ================================================== | ||
| local p = {} | ||
|
|
||
| --[[ ======================================================================================== | ||
| Date | ||
|
|
||
| This function is the core part of the ISOdate template. | ||
|
|
||
| Usage: | ||
| local Date = require('Module:DateI18n')._Date | ||
| local dateStr = Date({2020, 12, 30, 12, 20, 11}, lang) | ||
|
|
||
| Parameters: | ||
| * {year,month,day,hour,minute,second, tzhour, tzmin}: broken down date-time component strings or numbers | ||
| tzhour, tzmin are timezone offsets from UTC, hours and minutes | ||
| * lang: The language to display it in | ||
| * case: Language format (genitive, etc.) for some languages | ||
| * class: CSS class for the <time> node, use "" for no metadata at all | ||
| ]] | ||
| function p._Date(datevec, lang, case, class, trim_year) | ||
| -- make sure inputs are in the right format | ||
| if not lang or not mw.language.isValidCode( lang ) then | ||
| lang = mw.getCurrentFrame():callParserFunction( "int", "lang" ) -- get user's chosen language | ||
| end | ||
| if lang == 'be-tarsk' then | ||
| lang = 'be-x-old' | ||
| end | ||
|
|
||
| -- process datevec and extract timeStamp and datecode strings as well as numeric datenum array | ||
| local datecode, datenum = parserDatevec(datevec) | ||
| local year, month, day = datenum[1], datenum[2], datenum[3] | ||
| local timeStamp, datecode = getTimestamp(datecode, datenum) | ||
| if not timeStamp then -- something went wrong in parserDatevec | ||
| return '' | ||
| end | ||
| -- Commons [[Data:DateI18n.tab]] page stores prefered formats for diferent | ||
| -- languages and datecodes (specifying year-month-day or just year of month-day, etc) | ||
| -- Look up country specific format input to {{#time}} function | ||
| local dFormat = getDateFormat(datecode, day, lang) | ||
|
|
||
| -- By default the gramatical case is not specified (case=='') allowing the format to be specified | ||
| -- in [[Data:DateI18n.tab]]. You can overwrite the default grammatical case of the month by | ||
| -- specifying "case" variable. This is needed mostly by Slavic languages to create more complex | ||
| -- phrases as it is done in [[c:Module:Complex date]] | ||
| case = case or '' | ||
| if (lang=='qu' or lang=='qug') and (case=='nom') then | ||
| -- Special case related to Quechua and Kichwa languages. The form in the I18n is | ||
| -- Genitive case with suffix "pi" added to month names provided by {#time}} | ||
| -- in Nominative case that "pi" should be removed | ||
| -- see https://commons.wikimedia.org/wiki/Template_talk:Date#Quechua from 2014 | ||
| dFormat = dFormat:gsub('F"pi"', 'F') | ||
| elseif (case=='gen') then | ||
| dFormat = strReplace(dFormat, "F", "xg") | ||
| elseif (case=='nom') then | ||
| dFormat = strReplace(dFormat, "xg", "F") | ||
| elseif (case ~= '') then | ||
| -- see is page [[Data:I18n/MonthCases.tab]] on Commons have name of the month | ||
| -- in specific gramatic case in desired language. If we have it than replace | ||
| -- "F" and xg" in dFormat | ||
| local monthMsg = MonthCase(month, case, lang) | ||
| if monthMsg and monthMsg ~= '' then -- make sure it exists | ||
| dFormat = strReplace(dFormat, 'F', '"'..monthMsg..'"') -- replace default month with month name we already looked up | ||
| dFormat = strReplace(dFormat, 'xg', '"'..monthMsg..'"') | ||
| end | ||
| end | ||
|
|
||
| -- Translate the date using specified format | ||
| -- See https://www.mediawiki.org/wiki/Extension:Scribunto/Lua_reference_manual#mw.language:formatDate and | ||
| -- https://www.mediawiki.org/wiki/Help:Extension:ParserFunctions##time for explanation of the format | ||
| local datestr = mw.language.new(lang):formatDate( dFormat, timeStamp) -- same as using {{#time}} parser function | ||
|
|
||
| -- Special case related to Thai solar calendar: prior to 1940 new-year was at different | ||
| -- time of year, so just year (datecode=='Y') is ambiguous and is replaced by "YYYY or YYYY" phrase | ||
| if lang=='th' and datecode=='Y' and year<=1940 then | ||
| datestr = string.format('%04i หรือ %04i', year+542, year+543 ) | ||
| end | ||
|
|
||
| -- If year<1000 than either keep the date padded to the length of 4 digits or trim it | ||
| -- decide if the year will stay padded with zeros (for years in 0-999 range) | ||
| if year and year<1000 then | ||
| if type(trim_year)=='nil' then | ||
| trim_year = '100-999' | ||
| end | ||
| local trim = yesno(trim_year,nil) -- convert to boolean | ||
| if trim==nil and type(trim_year)=='string' then | ||
| -- if "trim_year" not a simple True/False than it is range of dates | ||
| -- for example '100-999' means to pad one and 2 digit years to be 4 digit long, while keeping 3 digit years as is | ||
| local YMin, YMax = trim_year:match( '(%d+)-(%d+)' ) | ||
| trim = (YMin~=nil and year>=tonumber(YMin) and year<=tonumber(YMax)) | ||
| end | ||
| if trim==true then | ||
| datestr = trimYear(datestr, year, lang) -- in datestr replace long year with trimmed one | ||
| end | ||
| end | ||
|
|
||
| -- append timezone if present | ||
| if datenum[7] and (datecode == 'YMDHMS' or datecode == 'YMDHM') then | ||
| -- use {{#time}} parser function to create timezone string, so that we use correct character set | ||
| local sign = (datenum[7]<0) and '−' or '+' | ||
| timeStamp = string.format("2000-01-01 %02i:%02i:00", math.abs(datenum[7]), datenum[8] or 0) | ||
| local timezone = mw.language.new(lang):formatDate( 'H:i', timeStamp) -- same as using {{#time}} parser function | ||
| datestr = string.format("%s %s%s", datestr, sign, timezone ) | ||
| end | ||
|
|
||
| -- html formating and tagging of date string | ||
| if class and class ~= '' and datecode~='M' and datecode~='MD'then | ||
| local DateHtmlTags = '<span style="white-space:nowrap"><time class="%s" datetime="%s">%s</time></span>' | ||
| datestr = DateHtmlTags:format(class, timeStamp, datestr) | ||
| end | ||
| return datestr | ||
| end | ||
|
|
||
| --[[ ======================================================================================== | ||
| Date | ||
|
|
||
| This function is the core part of the ISOdate template. | ||
|
|
||
| Usage: | ||
| {{#invoke:DateI18n|Date|year=|month=|day=|hour=|minute=|second=|tzhour=|tzmin=|lang=en}} | ||
|
|
||
| Parameters: | ||
| * year, month, day, hour, minute, second: broken down date-time component strings | ||
| * tzhour, tzmin: timezone offset from UTC, hours and minutes | ||
| * lang: The language to display it in | ||
| * case: Language format (genitive, etc.) for some languages | ||
| * class: CSS class for the <time> node, use "" for no metadata at all | ||
| ]] | ||
| function p.Date(frame) | ||
| local args = {} | ||
| for name, value in pairs( frame.args ) do | ||
| name = string.gsub( string.lower(name), ' ', '_') | ||
| args[name] = value | ||
| end | ||
| return p._Date( | ||
| { args.year, args.month, args.day, args.hour, args.minute, args.second, args.tzhour, args.tzmin }, | ||
| args.lang, -- language | ||
| args.case, -- allows to specify grammatical case for the month for languages that use them | ||
| args.class or 'dtstart', -- allows to set the html class of the time node where the date is included. This is useful for microformats. | ||
| args.trim_year or '100-999' -- by default pad one and 2 digit years to be 4 digit long, while keeping 3 digit years as is | ||
| ) | ||
| end | ||
|
|
||
| return p | ||
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Crash when
monthis nil. Ifdatecodeis 'Y' (year only),monthwill be nil (from line 252:datenum[2]). However, if acaseparameter is provided (line 277 check passes), the code callsMonthCase(month, case, lang)at line 281 with nil month. This causesT[month][case]at line 217 to attempt indexingT[nil], resulting in a crash.Spotted by Graphite Agent

Is this helpful? React 👍 or 👎 to let us know.
This comment came from an experimental review—please leave feedback if it was helpful/unhelpful. Learn more about experimental comments here.