Module:Interwiki from P460

From Wikimedia Commons, the free media repository
Jump to navigation Jump to search
Lua
CodeDiscussionEditHistoryLinksLink count Subpages:DocumentationTestsResultsSandboxLive code All modules

Documentation for this module may be created at Module:Interwiki from P460/doc

Code

-- Original at https://www.wikidata.org/wiki/Module:Interwiki_from_P460
-- in case of changes, please edit https://www.wikidata.org/wiki/Module:Interwiki_from_P460 then copy changes here.

-- adapted from https://pl.wikipedia.org/wiki/Modu%C5%82:%C5%81atki by [[pl:User:Paweł Ziemian]]

-- Use with:
--      {{#invoke:Interwiki from P460|Interwiki}}  -- uses sitelinks on items used as values with P460 (same as)
--      {{#invoke:Interwiki from P460|InterwikiP1889}}  -- uses sitelinks on items used as values with P1889 (different from)
--      {{#invoke:Interwiki from P460|InterwikiP1889fn}}  -- uses sitelinks on items used as values with P1889 (different from) if qualified with "criterion used" and Q27924673

local insert = table.insert
local concat = table.concat
local sort = table.sort
local getEntityObject = mw.wikibase.getEntityObject
local currentTitle = mw.title.getCurrentTitle()
local htmldecode -- lazy initialisation, used conditionally in m.completeiw() below

local m = {}

local langlist = {'aa', 'ab', 'ace', 'af', 'ak', 'als', 'am', 'an', 'ang', 'ar', 'arc', 'ary', 'arz', 'as', 'ast', 'av', 'ay', 'az', 'ba', 'bar', 'bat-ltg', 'bat-smg', 'bcl', 'be', 'be-tarask', 'be-x-old', 'bej', 'bg', 'bh', 'bi', 'bjn', 'bm', 'bms', 'bn', 'bo', 'bpy', 'br', 'bs', 'bug', 'bxr', 'c', 'ca', 'cbk-zam', 'cdo', 'ce', 'ceb', 'ch', 'cho', 'chr', 'chy', 'ckb', 'co', 'cr', 'crh', 'crs', 'cs', 'csb', 'cu', 'cv', 'cy', 'd', 'da', 'dag', 'de', 'de-at', 'de-ch', 'diq', 'dsb', 'dv', 'dz', 'ee', 'egl', 'el', 'eml', 'en', 'en-ca', 'en-gb', 'en-us', 'enm', 'eo', 'es', 'ess', 'esu', 'et', 'eu', 'ext', 'fa', 'ff', 'fi', 'fiu-vro', 'fj', 'fo', 'fon', 'fr', 'frp', 'frr', 'fur', 'fy', 'ga', 'gaa', 'gag', 'gan', 'gcr', 'gd', 'gil', 'gl', 'glk', 'gn', 'got', 'grc', 'grc-koi', 'gsw', 'gu', 'gv', 'ha', 'hak', 'haw', 'he', 'hi', 'hif', 'hil', 'ho', 'hr', 'hsb', 'ht', 'hu', 'hy', 'hyw', 'hz', 'ia', 'id', 'ie', 'ig', 'ii', 'ik', 'ilo', 'inh', 'io', 'is', 'it', 'iu', 'ja', 'jam', 'jbo', 'jv', 'ka', 'kaa', 'kab', 'kbd', 'kg', 'khw', 'ki', 'kj', 'kk', 'kl', 'km', 'kn', 'ko', 'koi', 'kr', 'krc', 'krj', 'ks', 'ksh', 'ku', 'kv', 'kw', 'ky', 'la', 'lad', 'lb', 'lbe', 'lez', 'lfn', 'lg', 'li', 'lij', 'lld', 'lmo', 'ln', 'lo', 'lt', 'ltg', 'lv', 'lzh', 'lzz', 'map-bms', 'mdf', 'mg', 'mh', 'mhr', 'mi', 'min', 'mk', 'ml', 'mn', 'mo', 'mr', 'mrj', 'ms', 'ms-arab', 'mt', 'mus', 'mwl', 'my', 'myv', 'mzn', 'na', 'nah', 'nan', 'nap', 'nb', 'nds', 'nds-nl', 'ne', 'new', 'ng', 'nl', 'nn', 'no', 'non', 'nov', 'nrf', 'nqo', 'nrm', 'nso', 'nv', 'ny', 'oc', 'om', 'or', 'os', 'pa', 'pag', 'pam', 'pap', 'paw', 'pcd', 'pdc', 'pdt', 'peo', 'pfl', 'pi', 'pih', 'pl', 'pms', 'pnb', 'pnt', 'ps', 'pt', 'pt-br', 'qu', 'qya', 'rgn', 'rki', 'rm', 'rmy', 'rn', 'ro', 'roa-rup', 'roa-tara', 'ru', 'rue', 'rup', 'rw', 'sa', 'sah', 'sat', 'sc', 'scn', 'sco', 'sd', 'se', 'sg', 'sgs', 'sh', 'shi', 'si', 'sid', 'simple', 'sk', 'sl', 'sla', 'sli', 'sm', 'smn', 'sms', 'sn', 'so', 'sq', 'sr', 'sr-cyrl', 'sr-ec', 'sr-el', 'sr-latn', 'srn', 'ss', 'st', 'stq', 'su', 'sv', 'sw', 'syc', 'szl', 'ta', 'te', 'tet', 'tg', 'th', 'ti', 'tk', 'tl', 'tn', 'to', 'tok', 'tokipona', 'tpi', 'tr', 'ts', 'tt', 'tt-cyrl', 'tt-latn', 'tum', 'tvl', 'tw', 'ty', 'udm', 'ug', 'uk', 'ur', 'uz', 've', 'vec', 'vep', 'vi', 'vls', 'vo', 'w', 'wa', 'war', 'wbl', 'wo', 'wuu', 'wym', 'xal', 'xh', 'xmf', 'yai', 'yi', 'yo', 'yue', 'za', 'zea', 'zh', 'zh-classical', 'zh-cn', 'zh-hans', 'zh-hant', 'zh-hk', 'zh-min-nan', 'zh-mo', 'zh-simple', 'zh-sg', 'zh-tw', 'zh-wuu', 'zh-yue', 'zu', 'zun'}
-- list from [[Module:Lang/data]]

local iwprefers = { -- remaps preferred codes for interwiki links
	['bat-smg'] = 'sgs', -- standard BCP47 code is working now, and preferred
	['be-x-old'] = 'be-tarask', -- standard BCP47 code is working now, and preferred
	['bho'] = 'bh', -- both are correct, but Wikimedia assumes that 'bh' refers to just 'bho'
	['bms'] = 'map-bms', -- incorrect/conflicting code 'map-bms' according ISO 639 and BCP47, but still required by Wikimedia
	['commons'] = 'c',
	['de-at'] = 'de', -- standard BCP47 code is working now, and preferred (merged Wikis)
	['de-ch'] = 'de', -- standard BCP47 code is working now, and preferred (merged Wikis)
	['en-ca'] = 'en', -- standard BCP47 code is working now, and preferred (merged Wikis)
	['en-gb'] = 'en', -- standard BCP47 code is working now, and preferred (merged Wikis)
	['en-us'] = 'en', -- standard BCP47 code is working now, and preferred (merged Wikis)
	['egl'] = 'eml', -- standard BCP47 code is working now, and preferred (merged Wikis)
	['fiu-vro'] = 'vro', -- standard BCP47 code is working now, and preferred (merged Wikis)
	['gsw'] = 'als', -- incorrect/conflicting code 'als' according ISO 639 and BCP47, but still required by Wikimedia
	['ms-arab'] = 'ms', -- standard BCP47 code is working now, and preferred (merged Wikis)
	['nb'] = 'no', -- both are correct, but Wikimedia assumes that 'no' refers to just 'nb', separating 'nn' (there are fallbacks between 'nb' and 'nn')
	['nrf'] = 'nrm', -- incorrect/conflicting code 'nrm' according ISO 639 and BCP47, but still required by Wikimedia
	['pt-br'] = 'pt', -- standard BCP47 code is working now, and preferred (merged Wikis)
	['rgn'] = 'eml', -- standard BCP47 code is working now, and preferred (merged Wikis)
	['rki'] = 'my', -- both are correct, but only 'my' is working for now (merged Wikis)
	['roa-rup'] = 'rup', -- standard BCP47 code is working now, and preferred
	['tokipona'] = 'tok', -- standard BCP47 code is working now, and preferred
	['sr-cyrl'] = 'sr', -- both are standard (merged Wikis)
	['sr-ec'] = 'sr', -- legacy code of Wikimedia, 'sr-cyrl' is standard (merged Wikis)
	['sr-el'] = 'sr', -- legacy code of Wikimedia, 'sr-cyrl' is standard (merged Wikis)
	['sr-latn'] = 'sr', -- both are standard (merged Wikis)
	['tt-cyrl'] = 'tt', -- both are standard (merged Wikis)
	['tt-latn'] = 'tt', -- both are standard (merged Wikis)
	['w'] = 'en',
	['wikidata'] = 'd',
	['wikipedia'] = 'en',
	['zh-classical'] = 'lzh', -- standard BCP47 code is working now, and preferred
	['zh-cn'] = 'zh', -- standard BCP47 code is working now, and preferred (merged Wikis)
	['zh-hans'] = 'zh', -- standard BCP47 code is working now, and preferred (merged Wikis)
	['zh-hant'] = 'zh', -- standard BCP47 code is working now, and preferred (merged Wikis)
	['zh-hk'] = 'zh', -- standard BCP47 code is working now, and preferred (merged Wikis)
	['zh-min-nan'] = 'nan', -- standard BCP47 code is working now, and preferred
	['zh-mo'] = 'zh', -- standard BCP47 code is working now, and preferred (merged Wikis)
	['zh-simple'] = 'zh', -- only the standard BCP47 code is working
	['zh-sg'] = 'zh', -- standard BCP47 code is working now, and preferred (merged Wikis)
	['zh-tw'] = 'zh', -- standard BCP47 code is working now, and preferred (merged Wikis)
	['zh-wuu'] = 'wuu', -- standard BCP47 code is working now, and preferred
	['zh-yue'] = 'yue', -- standard BCP47 code is working now, and preferred
}

local iwsort = { -- iws to be sorted 1st, by remapping them to smaller sort keys
	d = '', c = ' ', en = '0', simple = '1',
}

local knownLanguages = {}
for i = 1, #langlist do
	knownLanguages[langlist[i]] = true
end
knownLanguages['c'] = nil -- disable local project
knownLanguages['d'] = nil -- disable links to Wikidata (that do not generate an interwiki in sidebar)

function m.completeiw(qid, prop, linking, filterknown)
	local data = mw.wikibase.getEntityObject(qid)
	if not data then
		return '' -- no data -> category?
	end
	
	local iws = {}
	local titles = {}
	local addiw = function(iw, title)
		iw = iw:lower():gsub('_', '-')
		if iwprefers[iw] then
			iw = iwprefers[iw]
		end
		if not filterknown or knownLanguages[iw] then
			if not titles[iw] then
				insert(iws, iw)
				titles[iw] = { title }
			else
				insert(titles[iw], title)
			end
		end
	end

	local extractLinks = function(data)
		if data and data.sitelinks then
			for k, v in pairs(data.sitelinks) do
				if k == 'wikidata' then
					addiw('d', v.title)
				elseif k:sub(-4) == 'wiki' then
					addiw(k:sub(1, -5), v.title)
				end
			end
		end
	end

	extractLinks(data)

	if prop == 'P1420' then
		if data.claims and data.claims.P1420 then
			for _, v in ipairs(data.claims.P1420) do
				if v.mainsnak.snaktype == 'value' then
					local seeid = v.mainsnak.datavalue.value.id
					if seeid then
						local seedata = mw.wikibase.getEntityObject(seeid)
						extractLinks(seedata)
					end
				end
			end
		end
	elseif prop == 'P1889' then 
		if data.claims and data.claims.P1889 then
			for _, v in ipairs(data.claims.P1889) do
				if v.mainsnak.snaktype == 'value' then
					local seeid = v.mainsnak.datavalue.value.id
					if seeid then
						local seedata = mw.wikibase.getEntityObject(seeid)
						extractLinks(seedata)
					end
				end
			end
		end
	elseif prop == 'P1889P1013Q27924673' then
		local passed = false
		if data.claims and data.claims.P1889 then
			for _, v in ipairs(data.claims.P1889) do
				if v.mainsnak.snaktype == 'value' then
					local seeid = v.mainsnak.datavalue.value.id
					if seeid and v.qualifiers and v.qualifiers.P1013 then
						for _, w in ipairs(v.qualifiers.P1013) do
							if w.snaktype == 'value' then
								local qualid = w.datavalue.value.id
								if qualid == 'Q27924673' then 
									local seedata = mw.wikibase.getEntityObject(seeid)
									extractLinks(seedata)
									break
								end
							end
						end
					end
				end
			end
		end
	elseif prop == 'P460' then 
		if data.claims and data.claims.P460 then
			for _, v in ipairs(data.claims.P460) do
				if v.mainsnak.snaktype == 'value' then
					local seeid = v.mainsnak.datavalue.value.id
					if seeid then
						local seedata = mw.wikibase.getEntityObject(seeid)
						extractLinks(seedata)
					end
				end
			end
		end
	end

	if filterknown then
		-- lazy initialization
        htmldecode = htmldecode or require('Module:HTMLEntities').htmldecode
		-- Parse (partially) the page content to detect interwiki links that may still not be in Wikidata
		-- FIXME: may still detect false positives (incomplete parsing) and omit transcluded/generated interwikis 

		local content = htmldecode(
				currentTitle:getContent() -- may be costly
				:gsub('<!%-%-.-%-%->', ''):gsub('<!%-%-.*$', '') -- strip HTML comments
				:gsub('<includeonly%s*>.-</includeonly%s*>', ''):gsub('<includeonly%s*>.*$', '') -- strip "onlyinclude" sections
		)
		for iw, title in content:gmatch('%[%[([%-0-9A-Za-z]+):([^%|%]]+)%|?[^%]]*%]%]') do
			addiw(iw, title)
		end
	end

	sort(iws, function(a, b)
			return (iwsort[a] or a) < (iwsort[b] or b)
		end)

	local column = linking and ':' or ''
	for i = 1, #iws do
		local iw = iws[i]
		local title = titles[iw][1]
        local label = linking and ('|' .. iw .. ':<bdi>' .. title .. '</bdi>') or ''
		iws[i] = '[[' .. column .. iw .. ':' .. title .. label .. ']]'
	end
	return concat(iws, linking and ', ' or '')
end

function m.InterwikiP1420(frame)
	return m.completeiw(frame.args.id, 'P1420', false, true)
end	

function m.InterwikiP1889(frame)
	return m.completeiw(frame.args.id, 'P1889', false, true)
end	

function m.InterwikiP1889fn(frame)
	return m.completeiw(frame.args.id, 'P1889P1013Q27924673', false, true)
end	

function m.Interwiki(frame)
	return m.completeiw(frame.args.id, 'P460', false, true)
end	

return m