Module:Technique/Comparison
Jump to navigation
Jump to search
CodeDiscussionEditHistoryLinksLink count Subpages:DocumentationTestsResultsSandboxLive code All modules
Lua
Documentation for this module may be created at Module:Technique/Comparison/doc
Code
--[=[
TODO:
* for performance: consider not to preprocess and to read templates like
{{c}} (in en), {{wf}} (in fr), {{w}} (in nl) with rules
* consider to put information of the wikitable in a Lua table and concatenate,
e.g. have a header table with headers['wd1'] = '[[blah]]' etc.
]=]
require('strict') -- used for debugging purposes as it detects cases of unintended global variables
local getProductQid = require('Module:Technique').getProductQid
local material_LUT = require('Module:Artwork/Technique LUT')
local material_LUT1 = require('Module:Technique/WikidataLUT')
local synonyms = require('Module:Technique/synonyms')
local plurals = material_LUT1.plurals
local declension = require('Module:Declension')
local p = {}
-- provide the keys of a table in sorted order
local function getSortedKeys(inTable)
local sortedKeys = {}
for key,_ in pairs(inTable) do
table.insert(sortedKeys, key)
end
table.sort(sortedKeys)
return sortedKeys
end
local function getlanguageSelector(notpiol)
local languageSelector = mw.getCurrentFrame():expandTemplate{
title='LanguageSelector',
args = {'ar','ca','cs','da','de','el','en', 'en-ca', 'es','et','fi','fr',
'gl', 'he','hu','it','ja','mk','nds','nl','no','pl','pt',
'ro','ru','scn','sk','sl','sr','sv','sw','vec','vi','zh-hans',
'zh-hant',
icons='anything else',
}}
return languageSelector
end
local function syntaxHighlightLuaCode(code)
local res = '<syntaxhighlight lang="lua">' .. code .. '</syntaxhighlight>'
return mw.getCurrentFrame():preprocess(res)
end
local function termSearchLink(term, wordtype)
local wordtypeRegex = ''
if wordtype == 'nouns' then
wordtypeRegex = '((1|2|on|and[1-5]?|mounted|over) *=)?'
elseif wordtype == 'adjectives' then
wordtypeRegex = '(adj|color)(1|2|on|and[1-5]?|mounted|over)? *='
end
local outText = '[[File:Vector search icon.svg|10x10px|link='
.. 'https://commons.wikimedia.org/w/index.php?search='
.. mw.uri.encode('insource:"'
.. term
.. '" hastemplate:technique insource:/\\|'
.. wordtypeRegex
.. ' *'
.. term
.. ' *(}|\\|)/i', 'PATH')
.. ']]'
return outText
end
local function stripHtmlComments(inText)
return table.concat(mw.text.split(inText,'<!%-%-.-%-%->'))
end
local function escapeForLua(inText)
return inText:gsub('\\', '\\\\'):gsub("'", "\\'")
end
local function findTermsArea(inText)
local area = mw.ustring.match(inText, '{{#switch:%s*{{{1|}}}.-(|.*#default%s*=.)')
return area
end
local function analyzeTermLine(line)
local before, comment, after = mw.ustring.match(line, '^(.-)%s*<!%-%-%s*(.-)%s*%-%->%s*(.*)$')
if before then
line = before .. after
end
local left, right = mw.ustring.match(line,'^%s*|%s*(.-)%s*=%s*(.-)%s*$')
-- get rid of standard keys
local standardKeys = {['']=1, adj=1, basic=1, over=1, on=1, mounted=1, order=1, case=1, ['#default']=1}
if left and right and not standardKeys[left] then
local lineData = {left=left}
lineData.synonyms = mw.text.split(left, '%s*|%s*')
lineData.right = right
lineData.comment = comment
return lineData
end
end
-- parse a language subpage of [[Template:Technique]] and
-- return its mapping as a table
local function analyzeLangSubtemplate(wordtype, lang, selectFunction)
local pageWordtypePart
if wordtype == 'nouns' then
pageWordtypePart = ''
elseif wordtype == 'adjectives' then
pageWordtypePart = '/adjectives'
end
local langPage = mw.title.new('Technique/' .. lang .. pageWordtypePart, 'Template'):getContent()
local nounTerms = {}
local synTerms = {}
local mappingCount, totalMappingCount, parserFunctionCount, templateCount = 0, 0, 0, 0
langPage = stripHtmlComments(langPage)
local area = findTermsArea(langPage) or '' -- some languages don't have translations yet for adjectives
for line in mw.ustring.gmatch(area, '|[^\n]-\n') do
-- for some reason that gets slow with a simpler pattern, gsplit is even worse
local lineData = analyzeTermLine(line)
if lineData then
-- count mappings:
mappingCount = mappingCount + 1
-- if the value has a '{{' which indicates a template or a parser function
-- it has to be preprocessed:
if mw.ustring.find(lineData.right, '^[^{]*{{[^{]') then
-- count mappings with parser functions
if mw.ustring.find(lineData.right, '{{#') then
parserFunctionCount = parserFunctionCount + 1
end
-- count mappings with templates
-- add a '_' to find cases with '{{' in front
if mw.ustring.find('_' .. lineData.right, '[^{]{{[^{#]') then
templateCount = templateCount + 1
end
lineData.right = mw.getCurrentFrame():preprocess(lineData.right)
end
local synonyms = lineData.synonyms
for _,s in ipairs(synonyms) do
if not selectFunction or selectFunction(s) then
nounTerms[s] = lineData.right
totalMappingCount = totalMappingCount + 1
end
end
-- map to the first term given
for i = #synonyms, 2, -1 do
synTerms[synonyms[i]] = synonyms[1]
end
end
end
return nounTerms, mappingCount, parserFunctionCount, templateCount, totalMappingCount, synTerms
end
local function analyzeLangSubmodule(wordtype, lang, selectFunction)
local terms = require('Module:Technique/' .. lang)[wordtype]
local mappingCount, totalMappingCount, parserFunctionCount, templateCount = 0, 0, 0, 0
local synTerms = {}
local nounTerms = {}
for term in pairs(terms) do
if not selectFunction or selectFunction(term) then
-- count mappings:
mappingCount = mappingCount + 1
if type(terms[term]) == 'table' then
nounTerms[term] = terms[term].default -- or nounData.n -- nominative necessary?
elseif type(terms[term]) == 'string' then
nounTerms[term] = terms[term]
end
end
end
return nounTerms, mappingCount, parserFunctionCount, templateCount, totalMappingCount, synTerms
end
-- copying instead of this function would have done it too …
-- parse [[Template:Technique/lang]] to get all language codes linked from there
local function subpageLangs()
local content = mw.title.new('Technique/lang', 'Template'):getContent()
local subpageLangs = {}
for langCode in mw.ustring.gmatch(content, '{{fullurl:Template:Technique/(.-)}} ') do
table.insert(subpageLangs, langCode)
end
return subpageLangs
end
local function collectQIDs(terms)
local qids = {}
for _, term in pairs(terms) do
if term['qid1'] then
table.insert(qids, term['qid1'])
end
end
return qids
end
local function constructTechniqueSPARQL(languages, qids)
local vars = ''
local optionals = ''
local optionalform = [=[
OPTIONAL { ?item rdfs:label ?label%s. FILTER(LANG(?label%s)='%s') }
OPTIONAL { ?link%s schema:about ?item;
schema:isPartOf <https://%s.wikipedia.org/> . }
]=]
for i, lang in ipairs(languages) do
lang = mw.ustring.gsub(lang, '-', '')
-- remove "-" for variables names, e.g. use "zhhans", not "zh-hans"
vars = vars .. '?label' .. lang .. ' ?link' .. lang .. ' '
optionals = optionals .. string.format(optionalform, lang, lang, lang, lang, lang)
end
local values = ''
for i, q in ipairs(qids) do
values = values .. ' wd:' .. q
end
local queryform = [=[
SELECT ?item
%s
WHERE {
VALUES ?item { %s }
%s
}
LIMIT 10000
]=]
local sparqlQ = string.format(queryform, vars, values, optionals)
local queryURL = 'https://query.wikidata.org/#' .. mw.uri.encode(sparqlQ, 'PATH')
return queryURL
end
--[=[ for debugging:
local wdMap = {
Q207849 = {fr = {label = "peinture acrylique", link = "Peinture acrylique"}, en = {label = "acrylic paint", link = "Acrylic paint"}, },
Q123314 = {fr = {label = "agate", link = "Agate"}, en = {label = "agate", link = "Agate"}, },
Q143447 = {},
Q2144394 = {fr = {label = "", link = ""}, en = {label = "", link = ""}, },
Q127583 = {sk={label='zafír', link='barLink'}}, -- same label
Q42329 = {sk={label='vlna', link=nil}}, -- same label, no link
Q14298 = {sk={label='satén DIFFERENT', link='barLink'}}, -- different label
}
wdMap.Q207849.sk = {label='fooLabel', link='barLink'}
wdMap.Q123314.sk = {label=nil, link='barLink'}
wdMap.Q143447.sk = {label='fooLabel', link=nil}
wdMap.Q2144394.sk = {label='fooLabel', link='barLink'}
--]=]
-- @treturn string linkedLabel
local function getLinkedLabel(pagename, prefix, label)
-- prefix must be a string if pagename isn't nil
-- pagename and label can always be nil
local linkedLabel = ''
if pagename then
local labelpart = ''
if label then
labelpart = '|' .. label
end
linkedLabel = '[[:' .. prefix .. ':' .. pagename .. labelpart .. ']]'
else
linkedLabel = label or ''
end
return linkedLabel
end
-- finds wikilinks to 2 or 3 letter prefixed Wikipedia versions in given wikitext string
local function analyzeWikilink(wikitext)
local _, _, prefix, link, label = mw.ustring.find(wikitext,
'^%[%[:?w?:(%l%l%l?):([^%[%]]-)%|([^%[%]]-)%]%]%s*$')
-- pattern issues:
-- * something like '[[::foo|bar]]' would get matched
-- * something like '[[:en:foo[1]|bar]]' wouldn't get matched
-- if label couldn't be detected as a link label, take the bare text:
if not label then
label = wikitext
end
-- make first character of link uppercase as Wikipedia title
-- standard and stored titles in Wikidata:
if link then
link = mw.ustring.upper(mw.ustring.sub(link,1,1)) .. mw.ustring.sub(link,2)
end
return prefix, link, label
end
local function getBestSitelink(lang, qid, altQids)
local sitelink = mw.wikibase.getSitelink(qid, lang .. 'wiki')
local sitelinkIsAlt = false
if not sitelink and altQids then
for _,qid in ipairs(altQids) do
local link = mw.wikibase.getSitelink(qid, lang .. 'wiki')
if link then
sitelink = link
sitelinkIsAlt = true
break
end
end
end
return sitelink, sitelinkIsAlt
end
local function compareToWD(termTable, languages)
for _, lang in ipairs(languages) do
local translation = termTable[lang]
translation.wdSitelink, translation.wdSitelinkIsAlt = getBestSitelink(lang, termTable.qid1, termTable.altQids)
local map = material_LUT1.nouns[termTable.term]
local productQid = map and getProductQid(map)
local labelQid = productQid or termTable.qid1
translation.wdLabel = mw.wikibase.getLabelByLang(labelQid, lang)
translation.wdLinkedLabel = getLinkedLabel(
translation.wdSitelink, lang, translation.wdLabel)
if translation.wdSitelinkIsAlt then
translation.wdLinkedLabel = "''" .. translation.wdLinkedLabel
.. [['' (<span title="link from an alternative item">alt</span>)]]
end
if productQid then
translation.wdLinkedLabel = translation.wdLinkedLabel
.. ' (<span title="label from item for the product of the process">prl</span>)'
end
local prefix, link, commonsLabel = analyzeWikilink(translation['com'] or '')
local commonsLink
if prefix == lang then
commonsLink = link
end
-- get colors for marking comparison with Wikidata
-- (colors from https://en.wikipedia.org/wiki/Help:Using_colours#Wikimedia_colour_schemes)
-- note: commonsLink (if found) won't be ''!
if commonsLink and translation.wdSitelink == commonsLink then
-- they are same -> get a GREEN
translation.linksComparisonColor = '#CEF2E0'
elseif commonsLink then
-- they are different -> get a RED
translation.linksComparisonColor = '#F2CEE0'
end
if translation.wdLabel and translation.wdLabel ~= '' and commonsLabel and commonsLabel ~= '' then
if translation.wdLabel ~= commonsLabel:gsub('', '') then
-- ignore soft hyphens in comparison
-- they are different -> get a YELLOW
translation.labelsComparisonColor = '#faecc8'
else
-- they are same -> get a GREEN
translation.labelsComparisonColor = '#CEF2E0'
end
end
if commonsLabel and commonsLabel ~= '' and not translation.wdLabel then
translation.exportWdLabel = commonsLabel
end
if commonsLabel and commonsLabel ~= '' and translation.wdLabel
and commonsLabel ~= translation.wdLabel
and commonsLabel == mw.ustring.gsub(translation.wdLabel, '^%u', mw.ustring.lower) then
translation.exportWdLabelLowercase = commonsLabel
end
end
return termTable
end
-- source: [[Template:Technique/list]]
local function getTechniqueList(terms, wordtype, selectFunction)
local allTemplate = mw.title.new('Technique/list', 'Template'):getContent()
local extract
if wordtype == 'nouns' then
extract = mw.ustring.match(allTemplate, '<table(.-)</table>')
elseif wordtype == 'adjectives' then
extract = mw.ustring.match(allTemplate, '<table.-</table>.-<table(.-)</table>')
end
for term in mw.ustring.gmatch(extract, '<tr><td>(.-)</td><td>{{technique/{{int:lang}}') do
if selectFunction(term) then
terms[term] = {isInListTemplate='true'}
end
end
return terms
end
-- source: [[Module:Technique/WikidataLUT]]
local function getQIDs1(terms, selectFunction)
local counterWD1 = 0
for term, termData in pairs(material_LUT1.nouns) do
if selectFunction(term) then
if terms[term] then
terms[term]['qid1'] = termData['qid']
terms[term]['altQids'] = termData['altQids']
terms[term]['deprecated'] = termData['deprecated']
counterWD1 = counterWD1 + (termData['deprecated'] and 0 or 1)
else
if not termData.deprecated then
mw.log('<tr><td>' .. term
.. '</td><td>{{technique/{{int:lang}} | '
.. term .. '}}</td></tr>')
-- stuff in [[Module:Technique/WikidataLUT]] that perhaps shouldn't be there
-- log it in format for [[Template:Technique/list]]
end
end
end
end
return terms, counterWD1
end
-- source: [[Module:Technique/WikidataLUT]]
local function getQIDs1adjectives(terms, selectFunction)
-- TODO: this could become a general wordtype agnostic function
local counterWD1 = 0
for term, termData in pairs(material_LUT1.adjectives) do
if selectFunction(term) then
if terms[term] then
terms[term]['qid1'] = termData['qid']
terms[term]['deprecated'] = termData['deprecated']
counterWD1 = counterWD1 + (termData['deprecated'] and 0 or 1)
else
if not termData.deprecated then
mw.log('<tr><td>' .. term
.. '</td><td>{{technique/{{int:lang}}/adjectives | '
.. term .. '}}</td></tr>')
-- stuff in [[Module:Technique/WikidataLUT]] that perhaps shouldn't be there
-- log it in format for [[Template:Technique/list]]
end
end
end
end
return terms, counterWD1
end
-- source: [[Module:Artwork/Technique LUT]]
local function getQIDs2(terms)
for qid, term in pairs(material_LUT) do
term = mw.ustring.lower(term)
-- fix some uppercase terms in [[Module:Artwork/Technique LUT]]
if not terms[term] then
terms[term] = {}
end
terms[term]['qid'] = qid
end
return terms
end
-- source: language subpages of [[Template:Technique]]
local function getTechniqueLangPages(terms, wordtype, languages, selectFunction, system)
local langMappings = {}
local synTerms = {}
local counters = {}
counters.mappings = {}
counters.totalMappings = {}
counters.parserfunction = {}
counters.template = {}
counters.unlistedTerm = {}
local analyzeFunctionSelection = {
templates = analyzeLangSubtemplate,
modules = analyzeLangSubmodule,
}
local analyzeFunction = analyzeFunctionSelection[system]
for _, lang in ipairs(languages) do
langMappings[lang],
counters.mappings[lang],
counters.parserfunction[lang],
counters.template[lang],
counters.totalMappings[lang],
synTerms[lang] = analyzeFunction(wordtype, lang, selectFunction)
counters.unlistedTerm[lang] = 0
for term, value in pairs(langMappings[lang]) do
if selectFunction(term) and not terms[term] then
terms[term] = {}
end
if selectFunction(term) and not terms[term]['isInListTemplate'] then
counters.unlistedTerm[lang] = counters.unlistedTerm[lang] + 1
end
end
end
return terms, langMappings, synTerms, counters
end
local function makeLuaSynonyms(synonyms)
local synsKeys = getSortedKeys(synonyms)
local synonymPairs = {}
for _,v in ipairs(synsKeys) do
table.insert(synonymPairs, "['" .. escapeForLua(v) .. "'] = '" .. escapeForLua(synonyms[v]) .. "',")
end
local luaCode = table.concat(synonymPairs, '\n')
local styledLuaCode = string.format('<pre>%s</pre>', luaCode)
-- nicer, but slower:
-- local styledLuaCode = syntaxHighlightLuaCode(luaCode)
return styledLuaCode
end
local function colorCodeDifference(text, comparisonText)
local color = ''
if text and text == comparisonText then
-- they are same -> get a GREEN
color = '#CEF2E0'
elseif text and text ~= comparisonText then
-- they are different -> get a YELLOW
color = '#faecc8'
end
local colorCodedDifferenceString = '<span style="background-color:' .. color ..
'">' .. (text or '') .. '</span>'
return colorCodedDifferenceString
end
local function termsVisualizer(languages, wordtype, compare_to_WD, selectFunction, showQs, system)
local selectFunction = selectFunction or function(term) return true end
local system = system or 'templates'
-- collect terms from some sources
local terms = {} -- the term (noun) table for all sources
terms = getTechniqueList(terms, wordtype, selectFunction)
-- terms = getQIDs2(terms)
local terms, langMappings, synTerms, counters = getTechniqueLangPages(terms, wordtype, languages, selectFunction, system)
local counterWD1
if wordtype == 'nouns' then
terms, counterWD1 = getQIDs1(terms, selectFunction)
elseif wordtype == 'adjectives' then
terms, counterWD1 = getQIDs1adjectives(terms, selectFunction)
end
-- construct a SPARQL query showing information from Wikidata
local qids = collectQIDs(terms)
local queryURL = constructTechniqueSPARQL(languages, qids)
-- get a table of the keys of terms in sorted order to access them in that order
local sortedTerms = getSortedKeys(terms)
-- prepare the parts for the output wikitext which need language looping
local allHeaderCells = ''
local figuresCells = ''
local manyPFCount = 0
for _, lang in ipairs(languages) do
if compare_to_WD then
for _, term in ipairs(sortedTerms) do
terms[term][lang] = {['com'] = langMappings[lang][term]}
--[[ CAVE: seems to use a lot of memory
that's why langMappings[lang][term] is used directly and
this is only assigned here if compare_to_WD and thus needed
to hand the value over to function compareToWD() ]]
end
end
local pages = {templates = {}, modules = {}}
pages.templates.name = 'Template:Technique/' .. lang
.. ((wordtype == 'adjectives' and '/adjectives') or '')
pages.modules.name = 'Module:Technique/' .. lang
local timeDiff = math.ceil(tonumber(
mw.getCurrentFrame():preprocess(string.format(
'{{#expr: ({{#time: U | {{REVISIONTIMESTAMP:%s}} }}-{{#time: U | {{REVISIONTIMESTAMP:%s}} }})/(60*60*24)}}',
pages.modules.name, pages.templates.name))))
local timeColor = (timeDiff > 0 and '#CEF2E0') or '#F2CEE0'
local timeDiffOperator = (timeDiff > 0 and '>') or '<'
local timeText = string.format(
'\n<div style="background:%s">[[%s|M]]%s[[%s|T]]: %s <abbr title="days">d</abbr></div>',
timeColor, pages.modules.name, timeDiffOperator,
pages.templates.name,
tostring(math.abs(timeDiff)))
local langHeaderCells = ('! [[' .. pages[system].name .. '|' .. lang
.. ']] [[File:Commons-logo.svg|15x15px|link=]]\n')
if compare_to_WD then
langHeaderCells = langHeaderCells .. '!' .. lang ..
' [[File:Wikidata-logo.svg|15x15px|link=]]\n'
end
allHeaderCells = allHeaderCells .. langHeaderCells
local synsPrint = string.format([[
{| class="wikitable mw-collapsible mw-collapsed"
|+ class="nowrap" style="text-align:left;font-weight:normal; | synonyms:
|-
| %s
|}
]], makeLuaSynonyms(synTerms[lang]))
local parserfunctionRatio = math.ceil(counters.parserfunction[lang] / counters.mappings[lang] * 1000) / 10
local templateRatio = math.ceil(counters.template[lang] / counters.mappings[lang] * 1000) / 10
local manyPFColor = ''
if parserfunctionRatio > 20 then
manyPFColor = '#d0e5f5' -- a blue
manyPFCount = manyPFCount + 1
end
local cellTrailing = timeText .. '\n'
if system == 'templates' then
cellTrailing = ([=[ + ''<abbr title="synonyms">s</abbr>'' = ]=] ..
tostring(counters.totalMappings[lang]) .. '<br />' ..
'<abbr title="parser function ratio">PF</abbr>: ' ..
tostring(parserfunctionRatio) .. '<br />' ..
'<abbr title="template ratio">TP</abbr>: ' ..
tostring(templateRatio) .. '<br />' ..
'<abbr title="number of unlisted terms">UL</abbr>: ' ..
tostring(counters.unlistedTerm[lang]) .. '\n' .. synsPrint)
end
figuresCells = figuresCells ..
'!style="text-align:left;font-weight:normal;background:' ..
manyPFColor .. ';"| ' ..
'<abbr title="number of mappings">MP</abbr>: ' ..
tostring(counters.mappings[lang]) .. cellTrailing .. '\n'
if compare_to_WD then
figuresCells = figuresCells .. '!\n'
end
end
local counterWD1synonyms = 0
local counterWD1deprecated = 0
local termRows = ''
for _, term in pairs(sortedTerms) do
-- local labelWdLink = ''
local wdLink = ''
local qsString = ''
local qsStringLowercase = ''
if terms[term]['deprecated'] then
wdLink = 'deprecated'
counterWD1deprecated = counterWD1deprecated + 1
elseif terms[term]['qid1'] then
wdLink = '[[d:' .. terms[term]['qid1'] .. '|' .. terms[term]['qid1'] .. ']]'
-- here come many of: linksComparisonColor, labelsComparisonColor, wdLinkedLabel
-- CAVE: this is expensive!!!
if compare_to_WD then
terms[term].term = term
local compResult = compareToWD(terms[term], languages)
terms[term]['linksComparisonColor'] = compResult['linksComparisonColor']
terms[term]['labelsComparisonColor'] = compResult['labelsComparisonColor']
terms[term]['wdLinkedLabel'] = compResult['wdLinkedLabel']
terms[term]['exportWdLabel'] = compResult['exportWdLabel']
terms[term]['exportWdLabelLowercase'] = compResult['exportWdLabelLowercase']
-- TODO: clean this up! or understand it first!
-- actually, I don't understand why this works
end
elseif synonyms[wordtype][term] then
wdLink = "see ''" .. synonyms[wordtype][term] .. "''"
counterWD1synonyms = counterWD1synonyms + 1
elseif plurals[term] then
wdLink = "see singular ''" .. plurals[term] .. "''"
counterWD1synonyms = counterWD1synonyms + 1
end
-- construct cells for the languages
local langCells = ''
terms[term].counterItemsFromTranslations = 0
for _, lang in ipairs(languages) do
local translation = terms[term][lang]
local labelsComparisonColor = ''
local wdCell = ''
if compare_to_WD then
wdCell = '\n| '
if translation.wdLinkedLabel then
labelsComparisonColor = translation.labelsComparisonColor or ''
wdCell = wdCell .. 'style="background:' ..
(translation.linksComparisonColor or '') .. '"| ' ..
(translation.wdLinkedLabel)
elseif not synonyms[wordtype][term] and not plurals[term] and not terms[term]['deprecated'] then
local prefix, link, commonsLabel = analyzeWikilink(translation['com'] or '')
if prefix == lang then
local qid = mw.wikibase.getEntityIdForTitle(link, lang .. 'wiki')
if qid then
local wdLabel = mw.wikibase.getLabelByLang(qid, lang)
local diffedWdLabel = colorCodeDifference(wdLabel, commonsLabel)
local wdLabelString = '<abbr title="label">l</abbr>: ' .. diffedWdLabel
local productQid
local productStatements = mw.wikibase.getBestStatements(qid, 'P1056')
if #productStatements == 1 then
productQid = productStatements[1].mainsnak.datavalue.value.id
end
local productLabel = productQid and mw.wikibase.getLabelByLang(productQid, lang)
local diffedProductLabel = colorCodeDifference(productLabel, commonsLabel)
local productLabelString = productLabel
and ', <span title="label from item for the product of the process">prl</span>: '
.. diffedProductLabel
or ''
wdCell = wdCell .. 'data-sort-value="<' .. term
.. '"|<' .. qid .. '?> ([[:d:' .. qid .. '|d]]; '
.. wdLabelString .. productLabelString .. ')'
terms[term].counterItemsFromTranslations = terms[term].counterItemsFromTranslations + 1
end
end
else
wdCell = wdCell .. ' style="background-color:lightgray |'
end
if translation.exportWdLabel then
qsString = (qsString .. '||' .. terms[term]['qid1']
.. '|L' .. lang .. '|'
.. mw.uri.encode('"' .. translation.exportWdLabel .. '"', 'PATH'))
end
if translation.exportWdLabelLowercase then
qsStringLowercase = (qsStringLowercase .. '||'
.. terms[term]['qid1'] .. '|L' .. lang .. '|'
.. mw.uri.encode('"' .. translation.exportWdLabelLowercase .. '"', 'PATH'))
end
end
langCells = langCells .. string.format('\n|style="background:%s" title="' .. lang .. '" | %s%s',
labelsComparisonColor, (langMappings[lang][term] or ''), wdCell)
end
local inListYesNo = ''
if terms[term].isInListTemplate then
inListYesNo = 'yes'
else
inListYesNo = 'no'
end
local qsLink = ''
local qsLinkLowercase = ''
if showQs and qsString ~= '' and qsString ~= terms[term]['qid1'] then
qsLink = [=[ aqs:[[File:Commons to Wikidata QuickStatements.svg|15x15px|link=https://quickstatements.toolforge.org/index_old.html#v1=]=] .. qsString .. ']]'
end
if showQs and qsStringLowercase ~= '' and qsStringLowercase ~= terms[term]['qid1'] then
qsLinkLowercase = [=[ lcqs:[[File:Commons to Wikidata QuickStatements.svg|15x15px|link=https://quickstatements.toolforge.org/index_old.html#v1=]=] .. qsStringLowercase .. ']]'
end
local termRow ='|-' ..
'\n| ' .. term ..
'\n| ' .. inListYesNo .. ' ' .. termSearchLink(term, wordtype) ..
'\n| ' .. wdLink .. qsLink .. qsLinkLowercase ..
((wdLink == '' and compare_to_WD and ('<span title="number of suggestions">' ..
terms[term].counterItemsFromTranslations .. '</span>')) or '') ..
-- '\n| ' .. labelWdLink ..
langCells ..
'\n'
termRows = termRows .. termRow
end
-- construct wikitext for output
local languageSelector = ''
local comparisonDescription = ''
if compare_to_WD then
languageSelector = '<small>' .. getlanguageSelector() .. '</small>\n\n'
local languageSelectorNotpiol = getlanguageSelector('1')
local userLang = mw.getCurrentFrame():callParserFunction('int', 'lang')
comparisonDescription = string.format([=[
The [[File:Wikidata-logo.svg|15x15px|link=]] Wikidata columns show
how Wikidata IDs are automatically processed to nice linked text.
The Commons and Wikidata columns compare the labels and links from
[[Template:Technique]] subtemplates:
* If for a given language the '''labels''' match the <!--
-->[[File:Commons-logo.svg|15x15px|link=]] Commons column has a <!--
--><span style="background-color:#CEF2E0">green background</span>, <!--
-->if they don't a <!--
--><span style="background-color:#faecc8">yellow background</span>.
* If for a given language the '''links''' match the <!--
-->[[File:Wikidata-logo.svg|15x15px|link=]] Wikidata column has a <!--
--><span style="background-color:#CEF2E0">green background</span>, <!--
-->if Commons holds a link, but it doesn't match the one from Wikidata a <!--
--><span style="background-color:#F2CEE0">red background</span>.
* If the term isn't in [[Module:Technique/WikidataLUT]] or <!--
-->[[Module:Technique/synonyms]], but the language subtemplate contains a link <!--
-->to a Wikipedia page for the term, <!--
-->and if a Wikidata item can be found for the link, <!--
-->the item's ID is shown <in brackets>. <!--
-->It is followed by a link to the Wikidata item and the label it holds <!--
-->with the label's color signifying if it matches the label on Commons or not.
'''You can help by aligning information on Commons and Wikidata'''
which improves quality of both and helps
to migrate as much of the data for [[Template:Technique]] as possible to Wikidata
where it is easier to maintain and serves a wider use.
That way we get translations and links for more terms and for more languages.
Follow these steps:
# '''Select the language''' in which you want to align translations and links <!--
-->in the language selector: <small>%s</small>
# '''Sort the table''' by the Commons or Wikidata column for the chosen language.
# Now scan the cells of the columns and if a one is yellow or red <!--
-->'''fix the label or link/sitelink''' either on the language subtemplate <!--
-->[[Template:Technique/%s]] on Commons or <!--
-->at the Wikidata item that translates the term which is linked in the "WD1" column. <!--
--><small>(It is possible that the Wikidata item isn't a good choice for the term, <!--
-->it can be changed on [[Module:Technique/WikidataLUT]]. <!--
-->It will probably be a good idea to consult <!--
-->[[Template:Technique/translation dashboard]] <!--
-->for the translation in other languages before changing it.)</small>
]=], languageSelectorNotpiol, userLang)
end
local headerRow = string.format([=[
|-
! term
! [[Template:Technique/list|listed]]?
! [[Module:Technique/WikidataLUT|WD1]]
%s]=], allHeaderCells)
local counterWD1all = counterWD1 + counterWD1synonyms + counterWD1deprecated
local templatesCounterDescription = (system == 'templates') and [=[<br />parser function ratio (PF):<br />
template ratio (TP):<br />
number of unlisted terms (UL):]=] or ''
local outText = string.format([=[
== %s ==
%sThe first column of the table below shows
%s
translatable technique terms from [[Template:Technique/list]],
[[Module:Technique/WikidataLUT]] and
%s
language subtemplates of [[Template:Technique]]
(of those linked from [[Template:Technique/lang]] like [[Template:Technique/en]] or
[[Template:Technique/en/adjectives]] respectively).
The [[File:Commons-logo.svg|15x15px|link=]] Commons columns show
how the language subtemplates process the terms.
%s
<div style="font-size:smaller">In %s out of %s language subtemplates in more than 20%% of the mappings
parser functions were found.
That means that for those it is not possible to replace them
completely with Wikidata querying before
[[phab:T212843|Lua access to lexemes is possible]].
It should be possible to update the data from Wikidata with [%s this query].</div>
{|class="wikitable sortable" style="font-size:smaller;"
|+ Comparison of different renderings of techniques
%s
|-
! style="text-align:left;font-weight:normal; | number of mappings (MP): %s
!
! style="text-align:left;font-weight:normal;" | <abbr title="number of mappings">MP</abbr>: %s + %s <abbr title="synonyms">s</abbr> + %s <abbr title="deprecated">d</abbr> = %s
%s
%s
%s
|}
]=],
wordtype, languageSelector, tostring(#sortedTerms),
tostring(#languages), comparisonDescription, manyPFCount,
tostring(#languages), queryURL,
headerRow, templatesCounterDescription,
counterWD1, counterWD1synonyms, counterWD1deprecated,
counterWD1all, figuresCells, termRows, headerRow
)
return outText
end
-- ===========================================================================
-- === functions to convert the language subpages of
-- === [[Template:Technique]] to Lua versions
-- ===========================================================================
local function fillToLength(inString, minimalLength)
return inString .. mw.ustring.rep(' ', minimalLength - mw.ustring.len(inString))
end
-- make a Lua table string from a list that may contain nil elements,
-- which isn't possible with a simple table.concat.
-- expects only string values in the inList.
local function makeLuaTable(inList, keys)
local ps = {}
local keys = keys or getSortedKeys(inList)
local lastKey = 0
for _,key in ipairs(keys) do
local value = inList[key]
if key == lastKey+1 or key == '' then
lastKey = lastKey + 1
ps[#ps+1] = string.format("%s", value)
elseif type(key) == 'number' then
ps[#ps+1] = string.format("[%s]=%s", key, value)
else
ps[#ps+1] = string.format("%s=%s", key, value)
end
end
return '{' .. table.concat(ps, ', ') .. '}'
end
local function makeGrammarFormsLuaTable(formsTable, usedGrammaticalFeatures)
local formsAreIdentical = true
local keys = usedGrammaticalFeatures or getSortedKeys(formsTable)
local firstkey = keys[1]
for _,k in pairs(keys) do
if formsTable[k] ~= formsTable[firstkey] then
formsAreIdentical = false
end
end
local outFormsString
if formsAreIdentical then
outFormsString = string.format("{%s}", formsTable[firstkey])
else
outFormsString = makeLuaTable(formsTable, keys)
end
return outFormsString
end
local function analyzeParserFunctionSwitch(cases)
cases = cases or ''
local curlyBracketCount, squareBracketCount, currentPosition = 0, 0, 0
local isComment = false
local startPosition
local keys = {}
local lastDelimiter = '|'
local switchData = {}
for c in mw.ustring.gmatch(cases .. '|', '.') do
currentPosition = currentPosition + 1
local isFirstLevel = (curlyBracketCount == 0
and squareBracketCount == 0
and not isComment)
if c == '{' then
curlyBracketCount = curlyBracketCount + 1
elseif c == '}' then
curlyBracketCount = curlyBracketCount - 1
elseif c == '[' then
squareBracketCount = squareBracketCount + 1
elseif c == ']' then
squareBracketCount = squareBracketCount - 1
elseif c == '<' and mw.ustring.sub(cases, currentPosition, currentPosition+3) == '<!--' then
isComment = true
elseif c == '>' and mw.ustring.sub(cases, currentPosition-2, currentPosition) == '-->' then
isComment = false
elseif isFirstLevel and c == '|' and lastDelimiter == '|' and currentPosition == mw.ustring.len(cases)+1 then
-- default value as last value without a key "#default" given
switchData['#default'] = mw.text.trim(mw.ustring.sub(cases, startPosition, currentPosition-1))
elseif isFirstLevel and (c == '=' or c == '|') and lastDelimiter == '|' then
-- set key before '=' or in multi-assignment
keys[#keys+1] = mw.text.trim(mw.ustring.sub(cases, startPosition, currentPosition-1))
startPosition = currentPosition + 1
lastDelimiter = c
elseif isFirstLevel and c == '|' then
-- set value before '|'
if keys ~= {} then
for _,key in ipairs(keys) do
switchData[key] = mw.text.trim(mw.ustring.sub(cases, startPosition, currentPosition-1))
end
keys = {}
end
startPosition = currentPosition + 1
lastDelimiter = c
end
end
return switchData
end
local function getDefaultPrefixedLink(prefix, term)
term = plurals[term] or term
term = synonyms.nouns[term] or term
local termData = material_LUT1.nouns[term]
local link = termData and termData.qid and getBestSitelink(prefix, termData.qid, termData.altQids)
return (link and prefix .. ':' .. link)
end
local function makeLuaNounData(wikiTextNounData, term, lang)
local termData, prefix, prefixedLink, right
local warnings = {}
local canonicalTerm = synonyms.nouns[term] or term
local wdTermData = material_LUT1.nouns[canonicalTerm]
local primaryLangSubtag = mw.text.split(lang, '-', true)[1]
local primaryLangSubtemplate, primaryLangLink, primaryLangData
if primaryLangSubtag ~= lang then
primaryLangSubtemplate = 'Module:Technique/' .. primaryLangSubtag
primaryLangData = mw.title.new(primaryLangSubtemplate).exists and require(primaryLangSubtemplate).nouns[canonicalTerm]
primaryLangLink = primaryLangData and primaryLangData.link
end
local cases = mw.ustring.match(wikiTextNounData,'^{{#switch:%s*{{{query|}}}%s*|(.-)%s*}}%s*$')
if cases then
termData = analyzeParserFunctionSwitch(cases)
end
if termData then
local expectedKeys = {['#default']=1, L=1, l=1, d=1, p=1, a=1, ad=1, al=1, gender=1}
for k,_ in pairs(termData) do
if not expectedKeys[k] then
warnings[#warnings+1] = 'FIXME: unexpected key "' .. k .. '"'
end
end
termData.default = termData['#default'] --or 'WARNING: NO DEFAULT!!!'
if termData.gender == 'p' then
termData.gender = nil
termData.number = 'p'
elseif termData.gender then
local gender, number = mw.ustring.match(termData.gender,'^([mfn])([sp])$')
if gender then
termData.gender = gender
termData.number = number
end
end
local lastPefixedLink
expectedKeys['#default'] = nil
expectedKeys['gender'] = nil
expectedKeys['default'] = 1
for k,_ in pairs(expectedKeys) do
local fullCaseLink
if termData[k] then
local casePrefix, unprefixedCaseLink, label = analyzeWikilink(termData[k])
if casePrefix then
local prefixedCaseLink = casePrefix .. ':' .. unprefixedCaseLink
if lastPefixedLink and prefixedCaseLink ~= lastPefixedLink then
warnings[#warnings+1] = 'FIXME: mismatching links'
end
prefix = casePrefix
prefixedLink = prefixedCaseLink
lastPefixedLink = prefixedCaseLink
termData[k] = label
end
end
end
if prefix and (prefixedLink ~= (primaryLangLink or getDefaultPrefixedLink(prefix, term))) then
termData.link = prefixedLink
end
else
-- look what label [[Module:Technique/WikidataLUT]] provides
-- if available use product item for label provision
local wdLabelQid = wdTermData and (getProductQid(wdTermData) or wdTermData.qid)
local wdLabel = wdLabelQid and mw.wikibase.getLabelByLang(wdLabelQid, lang)
local primaryLangLabel = primaryLangData and primaryLangData.default or primaryLangData
local primaryLangWdLabel = wdLabelQid and mw.wikibase.getLabelByLang(wdLabelQid, primaryLangSubtag)
local fallbackLabel = wdLabel or primaryLangLabel or primaryLangWdLabel
local commonsPrefix, link, label = analyzeWikilink(wikiTextNounData)
if commonsPrefix then
prefix = commonsPrefix
prefixedLink = commonsPrefix .. ':' .. link
label = (label ~= fallbackLabel) and label
if prefixedLink == primaryLangLink or getDefaultPrefixedLink(commonsPrefix, term) then
if label then
return "'" .. escapeForLua(label) .. "'", ''
end
else
termData = {default = label, link = prefixedLink}
end
else
if wikiTextNounData ~= fallbackLabel then
return "'" .. escapeForLua(wikiTextNounData) .. "'", ''
end
end
end
if termData then
local termDataPairs = {}
for _,k in ipairs({'link', 'default', 'L', 'l', 'd', 'p', 'a', 'ad', 'al', 'gender', 'number'}) do
if termData[k] then
termDataPairs[#termDataPairs+1] = mw.ustring.lower(k) .. " = '" .. escapeForLua(termData[k]) .. "'"
end
end
right = '{' .. table.concat(termDataPairs, ', ') .. '}'
end
return right, table.concat(warnings, ', ')
end
local function makeLuaAdjectiveData(wikiTextData, lang)
local warnings = {}
local luaAdjectiveData
local before, cases, behind = mw.ustring.match(wikiTextData,'^(.-){{%s*#switch:%s*{{{agreement|?}}}%s*|(.-)%s*}}(.*)$')
if cases then
local caseData = analyzeParserFunctionSwitch(cases)
local usedGrammaticalFeatures = {
default = {numbers={'', 'p'}, genders={'m', 'f'}, cases={''}},
ar = {numbers={''}},
da = {numbers={''}, genders={'c', 'n', 'p'},
out = {numbers={'', 'p'}, genders={'c', 'n'}}
},
de = {numbers={''}, genders={'m', 'f', 'n', 'p'}, cases={'', 'd'},
out = {numbers={'', 'p'}, genders={'m', 'f', 'n'}}
},
et = {numbers={''}, genders={''}, cases={'', 'ad', 'al'}},
mk = {genders={'m', 'f', 'n'}},
nds = {numbers={''}, genders={'m', 'f', 'n', 'p'}, cases={'', 'd'},
out = {numbers={'', 'p'}, genders={'m', 'f', 'n'}}
},
pl = {numbers={'s', 'p'}, genders={'m', 'f', 'n'}, cases={'', 'l'}},
ro = {genders={'m', 'f', 'n'}},
ru = {numbers={''}, genders={'m', 'f', 'n', 'p'}, cases={'', 'd', 'p', 'a'},
out = {numbers={'', 'p'}, genders={'m', 'f', 'n'}}
},
}
for _,feature in ipairs({'numbers', 'genders', 'cases'}) do
if usedGrammaticalFeatures[lang] and usedGrammaticalFeatures[lang][feature] then
usedGrammaticalFeatures[feature] = usedGrammaticalFeatures[lang][feature]
else
usedGrammaticalFeatures[feature] = usedGrammaticalFeatures.default[feature]
end
end
local caseTable = {}
for key,value in pairs(caseData) do
local number, gender, case
local keystring = ''
for _,g in ipairs(usedGrammaticalFeatures.genders) do
local keystring = keystring .. g
local genderConvert = {['']=1, m=1, f=2, n=3, c=1, p='p'}
g = genderConvert[g]
for _,n in ipairs(usedGrammaticalFeatures.numbers) do
local keystring = keystring .. n
if n == '' then n = 's' end
if g == 'p' then n = 'p' end
for _,c in ipairs(usedGrammaticalFeatures.cases) do
local keystring = keystring .. c
if key == keystring then
caseTable[n] = caseTable[n] or {}
caseTable[n][c] = caseTable[n][c] or {}
if g ~= 'p' then
if caseTable[n][c][g] then
-- theoretically possible e.g. in Danish
warnings[#warnings+1] = 'ERROR: VALUE GIVEN TWICE'
end
caseTable[n][c][g] = value
else
caseTable[n][c] = {value, value, value}
end
number = n
gender = g
case = c
end
end
end
end
if (not number or not gender or not case) and key ~= '#default' then
warnings[#warnings+1] = 'FIXME: unexpected key "' .. key .. '"'
end
end
local numberStrings = {}
local regular = declension.langlist[lang] and declension.langlist[lang].adjective
--[[
if regular then -- was needed for pl
for number,_ in pairs(regular) do
for case,v in pairs(regular[number]) do
regular[number][type(case)=='string' and mw.ustring.lower(case) or case] = v
end
end
end
--]]
local out = usedGrammaticalFeatures[lang] and usedGrammaticalFeatures[lang].out
usedGrammaticalFeatures.numbers = out and out.numbers or usedGrammaticalFeatures.numbers
local fallbackCaseTableTail
fallbackCaseTableTail = caseTable['s'] or caseTable[1] or ''
fallbackCaseTableTail = fallbackCaseTableTail['m'] or fallbackCaseTableTail[1] or fallbackCaseTableTail
fallbackCaseTableTail = fallbackCaseTableTail['n'] or fallbackCaseTableTail[1] or fallbackCaseTableTail
if caseData['#default'] and caseData['#default'] ~= fallbackCaseTableTail then
table.insert(usedGrammaticalFeatures.numbers, 'default')
end
usedGrammaticalFeatures.genders = out and out.genders or usedGrammaticalFeatures.genders
local isRegular = true
for _,n in ipairs(usedGrammaticalFeatures.numbers) do
if n == '' then n = 's' end
local caseStrings = {}
for _,c in ipairs(usedGrammaticalFeatures.cases) do
local genderStrings = {}
for _,g in ipairs(usedGrammaticalFeatures.genders) do
local genderConvert = {['']=1, m=1, f=2, n=3, c=1}
g = genderConvert[g]
local form
local caseTableTail
if caseTable[n] then caseTableTail = caseTable[n] end
if caseTableTail and caseTableTail[c or ''] then caseTableTail = caseTableTail[c or ''] end
if caseTableTail and caseTableTail[g] then form = caseTableTail[g] end
genderStrings[g] = form or caseData['#default'] or ''
local numberTranslate = {s='singular', p='plural', default='default'}
local regularForm = regular and declension.selectAdjectiveForm(regular, {number=numberTranslate[n], case=mw.ustring.lower((c=='' and 'n') or c), gender=g})
if type(regularForm) == 'string' then
regularForm = escapeForLua(regularForm)
end
if genderStrings[g] ~= regularForm then
-- mw.log(before,n,c,g)
isRegular = false
end
genderStrings[g] = "'" .. genderStrings[g] .. "'"
end
local gendersLuaTableString = makeGrammarFormsLuaTable(genderStrings)
caseStrings[mw.ustring.lower(c)] = gendersLuaTableString
end
local casesLuaTableString = makeGrammarFormsLuaTable(caseStrings, usedGrammaticalFeatures.cases)
numberStrings[#numberStrings+1] = ((n~='' and (n .. '=')) or '') .. casesLuaTableString
end
local luaSwitchString
if isRegular then
luaSwitchString = 'regular()'
else
luaSwitchString = '{' .. table.concat(numberStrings, ', ') .. '}'
end
local parts = {before ~= '' and "'" .. escapeForLua(before) .. "'" or nil}
parts[#parts+1] = luaSwitchString
parts[#parts+1] = behind ~= '' and "'" .. escapeForLua(behind) .. "'" or nil
luaAdjectiveData = mw.ustring.format("{parts = {%s}}", table.concat(parts, ', '))
end
return luaAdjectiveData, table.concat(warnings, ', ')
end
local function makeLuaLine(lineData, wordtype, lang)
local value, warning
if wordtype == 'nouns' then
value, warning = makeLuaNounData(lineData.value, lineData.key, lang)
elseif wordtype == 'adjectives' then
value, warning = makeLuaAdjectiveData(lineData.value, lang)
if not value then
value = "'" .. escapeForLua(lineData.value) .. "'"
end
end
local commentString = ''
if lineData.comment then
commentString = ' -- ' .. lineData.comment
end
local warningString = ''
if warning ~= '' then
warningString = '\n-- ' .. warning
end
if value then
return fillToLength("['" .. escapeForLua(lineData.key) .. "']", 20)
.. ' = ' .. value .. ',' .. commentString .. warningString
end
end
local function findComments(inText)
local comments = {}
for comment in mw.ustring.gmatch(inText, '<!%-%-%s*(.-)%s*%-%->') do
comments[#comments+1] = comment
end
local outText = mw.text.trim(table.concat(mw.text.split(inText,'<!%-%-.-%-%->')))
return outText, comments
end
local function processSecondAdjectiveSwitch(inText, key)
local area, rest = mw.ustring.match(inText, '^{{#switch: {{{1|}}}(.-)}}(.*)')
if area then
area = mw.ustring.match(area, '^%s*<!%-%-.-%-%->(.*)$') or area
area = mw.ustring.match(area, '^%s*<!%-%-.-%-%->(.*)$') or area
local mappings2 = analyzeParserFunctionSwitch(area)
return (mappings2[key] or 'ERROR: TERM NOT FOUND IN SWITCH') .. rest
end
end
local function processTermsArea(inText, wordtype, lang, suppressundifferentiated)
local text = mw.ustring.match(inText, '{{#switch: {{{1|}}}.-|(.*)\n%s*|%s*#default%s*=') or ''
text = table.concat(mw.text.split(text,'\n%s*<!%-%-.-%-%->'))
-- ugly hotfix for nds adjectives
local mappings = analyzeParserFunctionSwitch(text)
if lang == 'nds' and wordtype == 'adjectives' then
local regularsText = mw.ustring.match(inText, '{{#switch: {{{1|}}}<!%-%- regular words %-%->.-|(.*)\n%s*|%s*#default%s*=')
local mappings2 = analyzeParserFunctionSwitch(regularsText)
for k,v in pairs(mappings2) do
mappings[k] = v .. '{{#switch: {{{agreement|}}}|m=en|f=e|n=e|md=en|fd=e|nd=en|pd=en|#default=en }}'
end
end
local standardKeys = {['']=1, adj=1, basic=1, over=1, on=1, mounted=1, order=1, case=1, ['#default']=1}
for k,_ in pairs(standardKeys) do
mappings[k] = nil
end
local outLines = {}
local mappings2 = {}
for key,value in pairs(mappings) do
if wordtype == 'adjectives' then
value = processSecondAdjectiveSwitch(value, key) or value
end
local canonicalTerm = synonyms[wordtype][key] or key
if not mappings2[canonicalTerm] then
mappings2[canonicalTerm] = value
elseif mappings2[canonicalTerm] and mappings2[canonicalTerm] ~= value then
outLines[#outLines+1] = '-- ERROR: MISMATCH: DATA FOR SYNONYM "' .. key
.. '" AND NORMALIZED TERM "'
.. canonicalTerm .. '" DIFFERS:'
.. '\n-- synonym: ' .. value
.. '\n-- normalized: ' .. mappings2[canonicalTerm]
end
end
local sortedTerms = getSortedKeys(mappings2)
for _,key in ipairs(sortedTerms) do
local value = mappings2[key]
local comments, valueComments, bareKey, bareValue
bareKey, comments = findComments(key)
bareValue, valueComments = findComments(value)
for _,v in ipairs(valueComments or {}) do
comments[#comments+1] = v
end
local commentsString = table.concat(comments, '; ')
commentsString = (commentsString ~= '') and commentsString
local termData = {key=bareKey, value=bareValue, comment=commentsString}
outLines[#outLines+1] = makeLuaLine(termData, wordtype, lang)
for k,v in pairs(mappings2) do
local undifferentiatedConditions = (value == v)
and (synonyms[wordtype][k] or k) ~= (synonyms[wordtype][key] or key)
and not suppressundifferentiated
if undifferentiatedConditions then
outLines[#outLines+1] = mw.ustring.format(
'-- ERROR: UNDIFFERENTIATED: for the terms "%s" and "%s" the same mapping is used:\n-- %s',
key, k, v)
end
end
end
local outText = table.concat(outLines, '\n')
return outText
end
local function convert(args)
local lang = args.lang
local workingArea = mw.title.new('Technique/' .. lang, 'Template'):getContent()
local nouns = processTermsArea(workingArea, 'nouns', lang, args.suppressundifferentiated)
local workingArea = mw.title.new('Technique/' .. lang .. '/adjectives', 'Template'):getContent()
local adjectives = processTermsArea(workingArea, 'adjectives', lang, args.suppressundifferentiated)
local res = syntaxHighlightLuaCode(string.format(
'p.nouns = {\n%s\n}\n\np.adjectives = {\n%s\n}\n\nreturn p',
nouns, adjectives))
return res
end
function p.convert(frame)
local args = frame.args
local userLang = frame:callParserFunction('int', 'lang')
local lang, suppressundifferentiated
-- local lang = 'pl'
-- local lang = 'de'
-- local suppressundifferentiated = true
local lang = lang or args.lang or userLang
local suppressundifferentiated = suppressundifferentiated or args.suppressundifferentiated
local result = getlanguageSelector() .. convert({lang=lang, suppressundifferentiated=suppressundifferentiated})
return result
end
-- ===========================================================================
-- === functions to be called from wikitext
-- === and small direct helper functions for them
-- ===========================================================================
local function cutTable(inTable, expressionNumber)
local outTable = {}
for i,v in ipairs(inTable) do
if i <= 25 then
table.insert(outTable,v)
end
end
return outTable
end
local function deleteValue(inTable, value)
local outTable = {}
for _,k in ipairs(inTable) do
if k ~= value then
table.insert(outTable, k)
end
end
return outTable
end
local function getSubtemplateFromLexemes(languageCode, system)
-- TODO: implement usage of "system"
local languageCode = languageCode or 'ca'
local system = system or 'templates' -- TODO: to be implemented
local values = ''
for term, termData in pairs(material_LUT1.nouns) do
values = values .. (termData.qid and ' ("' .. term .. '" wd:' .. termData.qid .. ')' or '')
end
local queryform = [=[
SELECT
# ?term ?lexeme ?lemma ?gender ?genderLabel ?item ?itemLabel
?out
WHERE
{
VALUES (?term ?item) { %s }
?lexeme ontolex:sense/wdt:P5137 ?item .
?lang wdt:P218 "%s" .
?lexeme dct:language ?lang .
?lexeme wikibase:lemma ?lemma .
?lexeme wdt:P5185 ?gender .
VALUES (?gender ?genderLetter) { (wd:Q499327 "m") (wd:Q1775415 "f") (wd:Q1775461 "n") }
BIND(CONCAT("| ", ?term, " = \u007B\u007B#switch:\u007B\u007B\u007Bquery|\u007D\u007D\u007D | gender=", ?genderLetter, " | #default=", ?lemma, "\u007D\u007D") AS ?out)
# TODO: consider pluralia tantum!
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
ORDER BY ?out
]=]
-- VALUES line should look like: VALUES (?term ?item) { ("foo" wd:Q39782) ("bar" wd:Q123314) }
local sparqlQ = string.format(queryform, values, languageCode)
local queryURL = 'https://query.wikidata.org/#' .. mw.uri.encode(sparqlQ, 'PATH')
return '[' .. queryURL .. ' Create a language subtemplate by querying lexemes (download CSV)]'
-- return mw.getCurrentFrame():preprocess('{{SPARQL |query=' .. sparqlQ .. '}}') -- needs pipe escaping
end
function p.getSubtemplateFromLexemes(frame)
local args = frame.args
local userLang = frame:callParserFunction('int', 'lang')
local languageCode = args.language or userLang
local system = args.system
return getlanguageSelector() .. getSubtemplateFromLexemes(languageCode, system)
end
function p.getSynonyms(frame)
local args = frame.args
local userLang = frame:callParserFunction('int', 'lang')
local lang
-- local lang = 'en'
local lang = lang or args.lang or userLang
local wordtype = args.wordtype or 'adjectives'
local _,_,_,_,_, synTerms = analyzeLangSubtemplate(wordtype, lang, nil)
return makeLuaSynonyms(synTerms)
end
function p.comparison(frame)
local args = frame.args
local userLangs = frame:callParserFunction('int', 'lang')
userLangs = mw.text.split(userLangs, '%s*,%s*')
local languages = {'en'}
--[[ for debugging
local allLanguages = {'ar','ca','cs','da','de','el','en','es','et','fi','fr',
'gl', 'he','hu','it','ja','mk','nds','nl','nb','pl','pt',
'ro','ru','scn','sk','sl','sr','sv','sw','vec','vi','zh'}
local lowercaseLanguages = {'ca','cs','da','el','en','es','et','fi','fr',
'gl', 'hu','it','mk','nl','nb','pl','pt',
'ro','ru','scn','sk','sl','sr','sv'}
local languages = cutTable(allLanguages, 4)
local languages = {'en', 'fr', 'pl', 'ru'}
local languages = {'en', 'de', 'fr', 'es', 'it', 'pl', 'ru'}
args.uselangfirst = 'yes'
local languages = allLanguages
--]]
languages = args.languages and mw.text.split(args.languages, '%s*,%s*') or languages
-- put the user language in the first columns
if args.uselangfirst == 'yes' then
for i = #userLangs, 1, -1 do
languages = deleteValue(languages, userLangs[i])
table.insert(languages, 1, userLangs[i])
end
end
local selectFunction
local startTerm = args.startTerm
local endTerm = args.endTerm
selectFunction = function (term)
local term = mw.ustring.lower(term)
local startTerm = startTerm or 'a'
local endTerm = endTerm or 'al'
local result = (term >= startTerm and string.sub(term, 1, #endTerm) <= endTerm)
return result
end
if not startTerm and not endTerm then
selectFunction = function(term) return true end
end
local showQs
-- local showQs = true
showQs = args.showQs or showQs
local system = args.system
-- system = 'modules'
local res = ('__TOC__\n'
.. termsVisualizer(languages, 'nouns', true, selectFunction, showQs, system)
.. '\n'
.. termsVisualizer(languages, 'adjectives', true, selectFunction, showQs, system))
return res
end
function p.translation_dashboard(frame)
local args = frame.args
local languages = subpageLangs()
-- local languages = {'ar','ca','cs','da','de','el','en','es','et','fi','fr','he','hu'}
-- local languages = cutTable(subpageLangs(), 25)
if args.uselangfirst == 'yes' then
local userLang = frame:callParserFunction('int', 'lang')
languages = deleteValue(languages, userLang)
table.insert(languages, 1, userLang)
end
local wordtype = args.wordtype or 'nouns'
local system = args.system or 'templates'
return termsVisualizer(languages, wordtype, false, nil, false, system)
end
return p