Tài liệu mô đun[tạo]
local U = mw.ustring.char

-- diacritics
local grave     = U(0x300)
local acute     = U(0x301)
local double_acute = U(0x30B)
local tilde     = U(0x303)
local macron    = U(0x304)
local dgrave    = U(0x30F)
local invbreve  = U(0x311)

--[[ Name is the "canonical name" used on Wiktionary. Article is the Wikipedia article. Script is the ISO 15924 code. ]]

local data = {
		['languages'] = {
		["ab"] = {
			["name"] = "Abkhaz",
		},
		["ang"] = {
			["name"] = "Old English", "Tiếng Anh cổ",
			["article"] = {"Tiếng Anh cổ"},
			["scripts"] = {"Latn"},
			-- Remove macrons, acutes, and overdots
			["replacements"] = {
				["[ĀÁ]"] = "A",
				["[āá]"] = "a",
				["[ǢǼ]"] = "Æ",
				["[ǣǽ]"] = "æ",
				["Ċ"]    = "C",
				["ċ"]    = "c",
				["[ĒÉ]"] = "E",
				["[ēé]"] = "e",
				["Ġ"]    = "G",
				["ġ"]    = "g",
				["[ĪÍ]"] = "I",
				["[īí]"] = "i",
				["[ŌÓ]"] = "O",
				["[ōó]"] = "o",
				["[ŪÚ]"] = "U",
				["[ūú]"] = "u",
				["[ȲÝ]"] = "Y",
				["[ȳý]"] = "y",
				},
			},
		["ar"] = {
			["name"] = "Arabic", "Ả Rập",
			["article"] = "Tiếng Ả Rập",
			["scripts"] = { "Arab" },
				--[[ ālif with wasla is replaced by ālif;
				taṭwīl, fatḥatan, ḍammatan, kasratan,
				fatḥa, ḍamma, kasra,
				shadda, sukūn, and superscript (dagger) ālif are removed. ]]
			["direction"] = "rtl", -- Should be in the script data module.
			["replacements"] = {
				[U(0x0671)] = U(0x0627),
				["["..U(0x0640)..U(0x064B)..U(0x064C)..U(0x064D)
					..U(0x064E)..U(0x064F)..U(0x0650)
					..U(0x0651)..U(0x0652)..U(0x0670).."]"] = "",
				},
			},
		["bn"] = {
			["name"] = "Bengali", "Bengal",
			["article"] = "Tiếng Bengal",
			["scripts"] = { "Beng" },
			},
		["de"] = {
			["name"] = "German", "Đức",
			["article"] = "Tiếng Đức",
			["scripts"] = { "Latn" },
			--[[
			["replacements"] = {
				["ae"]    = "ä",
				["oe"]    = "ö",
				["ue"]    = "ü",
				["A[Ee]"] = "Ä",
				["O[Ee]"] = "Ö",
				["U[Ee]"] = "Ü",
			},
			]]
			},
		["en"] = {
			["name"] = "English", "Anh",
			["article"] = "Tiếng Anh",
			["scripts"] = { "Latn" },
			},
		["es"] = {
			["name"] = "Spanish", "Tây Ban Nha",
			["article"] = "Tiếng Tây Ban Nha",
			["scripts"] = { "Latn" },
			},
		["fr"] = {
			["name"] = "French", "Pháp",
			["article"] = "Tiếng Pháp",
			["scripts"] = { "Latn" },
			},
		["frm"] = {
			["name"] = "Middle French", "Pháp Trung Đại",
			["article"] = "Tiếng Pháp Trung Đại",
			["scripts"] = { "Latn" },
			},
		["gem-pro"] = {
			["name"] = "Proto-Germanic", "Tiền German",
			["article"] = "Ngữ chi Tiền German",
			["script"] = { "Latn" },
			["type"] = "reconstructed",
			["replacements"] = {},
			},
		["grc"] = {
			["name"] = "Ancient Greek", "Hy Lạp cổ đại",
			["article"] = "Tiếng Hy Lạp cổ đại",
			["scripts"] = { "Grek" },
			["replacements"] = {
				-- Vowels with macrons or breves are replaced with plain letters.
				["[ᾱᾰ]"] = "α",
				["[ᾹᾸ]"] = "Α",
				["[ῑῐ]"] = "ι",
				["[ῙῘ]"] = "Ι",
				["[ῡῠ]"] = "υ",
				["[ῩῨ]"] = "Υ",
				["ϐ"]    = "β",
				["ϵ"]    = "ε",
				["ϑ"]    = "θ",
				["ϰ"]    = "κ",
				["ϱ"]    = "ρ",
				["ϲ"]    = "σ",
				["ϕ"]    = "φ",
				},
			},
		["grk-pro"] = {
			["name"] = "Proto-Hellenic",
			["Wikipedia_name"] = "Proto-Greek",
			["article"] = "Proto-Greek language",
			["script"] = { "Latn" },
			["type"] = "reconstructed",
			["replacements"] = {},
			},
		["hi"] = {
			["name"] = "Hindi",
			["article"] = "Hindi",
			["scripts"] = { "Deva" },
			},
		["ine-pro"] = {
			["name"] = "Proto-Indo-European",
			["article"] = "Proto-Indo-European language",
			["script"] = { "Latn" },
			["type"] = "reconstructed",
			["replacements"] = {},
			},
		["ja"] = {
			["name"] = "Japanese",
			["article"] = "Japanese language",
			["scripts"] = { "Jpan" },
			},
		["la"] = {
			["name"] = "Latin",
			["article"] = "Latin",
			["scripts"] = { "Latn" },
			["replacements"] = {
				-- Vowels with macrons, breves, or diaereses are replaced with plain letters.
				["[ĀĂ]"]  = "A",
				["[āă]"]  = "a",
				["[ĒĔ]"]  = "E",
				["[ēĕë]"] = "e",
				["[ĪĬÏ]"] = "I",
				["[īĭï]"] = "i",
				["[ŌŎ]"]  = "O",
				["[ōŏ]"]  = "o",
				["[ŪŬÜ]"] = "U",
				["[ūŭü]"] = "u",
				["Ȳ"]     = "Y",
				["ȳ"]     = "y"
				},
			},
		["mul"] = {
			["name"] = "Translingual",
			["article"] = "",
			["script"] = { "" },
			},
		["orv"] = {
			["name"] = "Old East Slavic",
			["article"] = "Old East Slavic",
			["script"] = { "Cyrs" },
			["replacements"] = {
				[U(0x484)] = "",
				},
			},
		["pt"] = {
			["name"] = "Portuguese",
			["article"] = "Portuguese language",
			["scripts"] = { "Latn" },
			},
		["pa"] = {
			["name"] = "Punjabi",
			["article"] = "Punjabi language",
			["scripts"] = { "Guru", "Arab", }
			},
		["ru"] = {
			["name"] = "Russian",
			["article"] = "Russian language",
			["scripts"] = { "Cyrl" },
			-- Combining acute accent is removed.
			["replacements"] = { [U(0x0301)] = "", }
			},
		["sla-pro"] = {
			["name"] = "Proto-Slavic", -- also Common Slavic
			["type"] = "reconstructed",
			["scripts"] = { "Latn" },
			["replacements"] = {
				["[ÀÁÃĀȀȂ]"] = "A",
				["[àáãāȁȃ]"] = "a",
				["[ÈÉẼĒȄȆ]"] = "E",
				["[èéẽēȅȇ]"] = "e",
				["[ÌÍĨĪȈȊ]"] = "I",
				["[ìíĩīȉȋ]"] = "i",
				["[ÒÓÕŌȌȎŐ]"] = "O", 
				["[òóõōȍȏő]"] = "o",
				["[ÙÚŨŪȔȖŰ]"] = "U",
				["[ùúũūȕȗű]"] = "u",
				["[ỲÝỸȲ]"] = "Y",
				["[ỳýỹȳ]"] = "y",
				["Ǭ"] = "Ǫ",
				["ǭ"] = "ǫ",
				["[" .. grave .. acute .. double_acute .. tilde .. macron .. dgrave .. invbreve .. "]"] = "",
				},
			},
		["ur"] = {
			["name"] = "Urdu",
			["article"] = "Urdu",
			["scripts"] = { "Arab" },
			},
		["zh"] = {
			["name"] = "Chinese",
			["article"] = "Chinese language",
			["scripts"] = { "Hani" },
			},
		["xcl"] = {
			["name"] = "Old Armenian",
			["article"] = "Classical Armenian",
			["script"] = { "Armn" },
			["replacements"] = {
				["[՞՜՛՟]"] = "",
				["և"] = "եւ",
				},
			},
	},

-- Here, keys (for example, "gem") are Wikipedia language codes used in
-- {{lang}}, and values (for example, "gem-pro") are the equivalent Wiktionary
-- code.
-- Subtags are not currently supported.
	["redirects"] = {
		["aae"] = "sq",
		["aiq"] = "fa",
		["aln"] = "sq",
		["als"] = "sq",
		["azb"] = "az",
		["azj"] = "az",
		["bgn"] = "bal",
		["bs"] = "sh",
		["bxr"] = "bua",
		["ciw"] = "oj",
		["cnr"] = "sh",
		["fil"] = "tl",
		["fuf"] = "ff",
		["gem"] = "gem-pro", -- Not correct, but is commonly used.
		["hak"] = "zh",
		["hbo"] = "he",
		["hr"] = "sh",
		["ine"] = "ine-pro", -- Not correct, but might be commonly used.
		["kjv"] = "sh",
		["nan"] = "zh",
		["prs"] = "fa",
		["rn"] = "rw",
		["sli"] = "gmw-ecg",
		["sr"] = "sh",
		["src"] = "sc",
		["sro"] = "sc",
		["tw"] = "ak",
		["wae"] = "gsw",
		["wep"] = "nds-de",
		["yue"] = "zh",
		["xno"] = "fro",
	},
}

--[[

	[""] = {
		["name"] = "",
		["article"] = "",
		["script"] = { "" },
		},

	[""] = {
		["name"] = "",
		["article"] = "",
		["script"] = { "" },
		["replacements"] = {
			},
		},

]]

return data