Module:Language/data/ISO 639-5/make: Difference between revisions
Jump to navigation
Jump to search
en>Legoktm (Replace Module:No globals with require( "strict" )) |
m (1 revision imported) |
(No difference)
|
Latest revision as of 09:10, 18 December 2022
Reads a local copy of data from the table at https://www.loc.gov/standards/iso639-5/id.php, extracts the ISO 639-5 codes and their associated language names
Usage
To use this tool:
- open a blank sandbox page and paste this
{{#invoke:}}
into it at the top line:{{#invoke:Language/data/ISO 639-5/make|ISO_5_name_extract|file-date=YYYY-MM-DD
- go to the current Codes for the Representation of Names of Languages Part 5. Copy the content of the table on that page and paste it into the sandbox page below the
{{#invoke:}}
. - click Show preview
- wait
- get result
require('strict');
--[[--------------------------< I S O _ 5 _ N A M E _ E X T R A C T >------------------------------------------
{{#invoke:Language/data/ISO 639-5/make|ISO_5_name_extract|file-date=2013-02-11}}
reads a local copy of data from the table at https://www.loc.gov/standards/iso639-5/id.php, extracts
the ISO 639-5 codes and their associated language names
useful lines in the source table have the form:
<Identifier>\t<English name>\t<French name>\t<639-2>\t<Hierarchy>\t<Notes>\n
where:
<Identifier> is the 639-5 language code
<English name> is the English name
<French names> is the French name (not used here)
<639-2> is language group or remainder group or blank (not used here)
<Hierarchy> (not used here)
<Notes> (not used here)
for the file date use the date listed at the bottom of the source page in yyyymmdd numeric format without hyphens or spaces
]]
local function ISO_5_name_extract (frame)
local page = mw.title.getCurrentTitle(); -- get a page object for this page
local content = page:getContent(); -- get unparsed content
local content_table = {}; -- table of text lines from source
local out_table = {}; -- output table
local code, name;
local file_date = 'File-Date: ' .. frame.args["file-date"]; -- set the file date line from |file-date= (from the bottom of the source page)
content_table = mw.text.split (content, '[\r\n]'); -- make a table of text lines
for _, line in ipairs (content_table) do -- for each line
code, name = line:match ('^(%l%l%l)\t([%a %-,%(%)\226\128\145]+)\t'); -- \226\128\145 is non-breaking hyphen U+2011 (E2 80 91)
if code then
name = name:gsub ('\226\128\145', '-'); -- replace non-breaking hyphens with hyphen-minus
table.insert (out_table,
table.concat ({
'["', -- open code index
code,
'"] = {"', -- close code index; open name table
name,
'"}' -- close the names table
})
)
end
end
table.sort (out_table);
return "<br /><pre>-- " .. file_date .. "<br />return {<br />	" .. table.concat (out_table, ',<br />	') .. "<br />	}<br />" .. "</pre>";
end
--[[--------------------------< E X P O R T E D F U N C T I O N S >------------------------------------------
]]
return {
ISO_5_name_extract = ISO_5_name_extract
};