Modül:ja

Vikisözlük sitesinden
Modül belgelemesi [Düzenle] [Tazele]


local export = {}
local find = mw.ustring.find
local length = mw.ustring.len
local trim = mw.text.trim
local split = mw.text.split
local sub, gsub = mw.ustring.sub, mw.ustring.gsub
local match, gmatch = mw.ustring.match, mw.ustring.gmatch
local to_cp, to_char = mw.ustring.codepoint, mw.ustring.char

local Jpan = require("Modül:alfabeler").getirKodaGore("Jpan")
local lang = require("Modül:diller").getirKodaGore("ja")

-- note that arrays loaded by mw.loadData cannot be directly used by gsub
local data = mw.loadData("Modül:ja/veri")

-- Unicode normalization often converts these to the corresponding CJK Unified Ideographs characters
local compat_ideo = mw.ustring.char(0xF900) .. "-" .. mw.ustring.char(0xFAD9)

export.data = {
	joyo_kanji = data.joyo_kanji,
	jinmeiyo_kanji = data.jinmeiyo_kanji,
	grade1 = data.grade1,
	grade2 = data.grade2,
	grade3 = data.grade3,
	grade4 = data.grade4,
	grade5 = data.grade5,
	grade6 = data.grade6
}

function export.hira_to_kata(text)
	if type(text) == "table" then text = text.args[1] end

	return (gsub(text, '[ぁ-ゖ]', function(char) return to_char(to_cp(char) + 96) end))
end

function export.kata_to_hira(text)
	if type(text) == "table" then text = text.args[1] end

	return (gsub(text, '[ァ-ヶ]', function(char) return to_char(to_cp(char) - 96) end))
end

function export.fullwidth_to_halfwidth(text)
	if type(text) == "table" then text = text.args[1] end

	text = gsub(text, ' ', ' ')
	return (gsub(text, '[!-~]', function(char) return to_char(to_cp(char) - 65248) end))
end

function export.kana_to_romaji(text, options)
	-- options: no_diacritics, keep_period, hist

	if type(text) == "table" then
		text = text.args[1]
	end

	if not options then options = {} end

	local tracking_has_percent = find(text, '%%')
	local text_old = trim(require('Modül:ja/k2r-old').kana_to_romaji(text, options.no_diacritics, options.keep_period))

	-- conversions
	text = gsub(text, '(%-)([はハ])$', '%1㊟㈛㊟%2') -- は as suffix and appearing at the end of string
	text = gsub(text, '(%-)([はハ]) ', '%1㊟㈛㊟%2 ') -- は as suffix and appearing mid-sentence
	text = gsub(text, '%', '㊟㌫㊟') -- at [[見込む]], for example; avoid collision with % used in our ruby syntax
	text = gsub(text, '\'\'\'', '㊟⒝㊟')
	text = gsub(text, '<u>', '㊟㋑⒰㊟')
	text = gsub(text, '</u>', '㊟㋺⒰㊟')

	-- avoid tampering with existing latin text: store it away
	local escape = {}
	local id = 0
	for latin in gmatch(text, "[a-z]+") do
		escape[id] = latin
		text = gsub(text, latin, "㊟㊕㊕㊟" .. id .. "㊟㊕㊕㊟")
		id = id + 1
	end

	-- special preformatting
	text = gsub(text, 'ヶげつ', 'かげつ')
	text = gsub(text, 'ヶ(㊟[㋑㋺⒝⒰]+㊟)げつ', 'か%1げつ') -- 「'''ヶ'''げつ」
	text = gsub(text, 'ヶ', 'が')
	text = gsub(text, '(.)ゝ', '%1%1')
	text = gsub(text, '(.)ゞ', function(char) return mw.ustring.toNFC(char .. char .. '゙') end) -- unicode hax

	-- [[Wiktionary:Grease_pit/2017/May#Formatting_for_individual_Japanese_readings]]
	if options.hist then
		text = gsub(text, 'づ', 'du')
		text = gsub(text, 'ぢ', 'di')
		text = gsub(text, 'を', 'wo')
		text = gsub(text, '([やゆよわゐゑを])', '㊟⒳㊟%1')
	end

	text = export.hira_to_kata(text)
	text = gsub(text, '.', function(char) return data.kr[char] or char end)
	text = export.fullwidth_to_halfwidth(text)

	if options.hist then
		text = gsub(text, 'ou', 'o.u')
		text = gsub(text, '([iu])㊟⒳㊟', '') -- くゐやう kwyau
		text = gsub(text, '㊟⒳㊟', '') -- ゑつ wetsu
	end

	-- markup
	text = gsub(text, '%%', '.') -- ruby "percent sign" syntax
	text = gsub(text, '([ッ¤])%.', '%1') -- 「し を ぼっ.す」; 「るい%じん%えん」→「rui.jin¤.en¤」

	-- 「テェェェ」→「テェーー」 (avoid funky romaji effected by the "(テュ→)teユ→tyu" line below)
	text = gsub(text, '(ァ)(ァ+)', function(a,b) return a .. mw.ustring.rep('ー', length(b)) end)
	text = gsub(text, '(ィ)(ィ+)', function(a,b) return a .. mw.ustring.rep('ー', length(b)) end)
	text = gsub(text, '(ゥ)(ゥ+)', function(a,b) return a .. mw.ustring.rep('ー', length(b)) end)
	text = gsub(text, '(ェ)(ェ+)', function(a,b) return a .. mw.ustring.rep('ー', length(b)) end)
	text = gsub(text, '(ォ)(ォ+)', function(a,b) return a .. mw.ustring.rep('ー', length(b)) end)

	-- (ゲェ→)geェ→gee (note that this causes things like ウゥ→ū and ギィ→gī)
	text = gsub(text, '[aiueo][ァィゥェォ]', {['aァ']='aa',['iィ']='ii',['uゥ']='uu',['eェ']='ee',['oォ']='oo',})

	-- (クヮ→)kuヮ→kwa
	text = gsub(text, '[u]([ヮ])', {['ヮ']='wa',})

	-- (クァ→)kuァ→kwa, (トァ→)toァ→twa, (ウィ→)uィ→wi
	text = gsub(text, '[uo]([ァィェォ])', {['ァ']='wa',['ィ']='wi',['ェ']='we',['ォ']='wo',})
	-- (ツァ→)cwa→ca
	text = gsub(text, '([fvcsz])w', '%1')

	-- (テュ→)teユ→tyu, (ギェ→)giェ→gye
	text = gsub(text, '[aiueo]([ャュェョ])', {['ャ']='ya',['ュ']='yu',['ェ']='ye',['ョ']='yo',})
	-- (ジュ→)jyu→ju
	text = gsub(text, '([xjq])y', '%1')

	-- (ティ→)teィ→ti (essentially forget about the vowel in between)
	text = gsub(text, '[aiueo]([ァィゥェォ])', {['ァ']='a',['ィ']='i',['ゥ']='u',['ェ']='e',['ォ']='o',})

	-- chouonpu and sokuon
	while find(text, '[aiueo]ー') or find(text, 'ッ *[bcdfghjklmnpqrstvwxyz]') or find(text, 'ッ㊟[㋑㋺⒝⒰]+㊟[bcdfghjklmnpqrstvwxyz]') do
		text = gsub(text, '([aiueo])ー', '%1%1')
		text = gsub(text, 'ッ( *)([bcdfghjklmnpqrstvwxyz])', '%2%1%2')
		text = gsub(text, 'ッ(㊟[㋑㋺⒝⒰]+㊟)([bcdfghjklmnpqrstvwxyz])', '%2%1%2')
	end
	-- deal with leftover sokuon not used as geminate
	text = gsub(text, 'ッ', 'h')

	-- (ん→)n¤
	text = gsub(text, '¤([aiueoy])', "'%1")
	text = gsub(text, '¤', '')

	-- は
	text = gsub(text, "([^a-z.㊟])ha([^a-z.㊟])", "%1wa%2")
	text = gsub(text, "([^a-z.㊟])ha$", "%1wa")
	text = gsub(text, "^ha([^a-z.㊟])", "wa%1")
	-- へ
	text = gsub(text, "([^a-z.㊟])he([^a-z.㊟])", "%1e%2")
	text = gsub(text, "([^a-z.㊟])he$", "%1e")
	text = gsub(text, "^he([^a-z.㊟])", "e%1")
	-- change only when
	--   ① not flanked by a-z or a period ("^sore wa nani$", "^hyappou no .he hitotsu$")
	--   ② at the end of the string and not preceded by a-z or a period ("^are wa$")
	--   ③ at the beginning of the string and not followed by a-z or a period ("^he ikou$") [not sure this is actually necessary, but I suppose it is consistent with ②]
	-- this also means that "^ha$" becomes "ha"
	-- period can be used next to the kana (either side) to force the "dumb" romanization (i.e. "ha", "he")

	-- fix sh, ch, ts
	text = gsub(text, '([xqc]*)([xqc])', function(geminate,main)
		--「めちゃ」→「mecha」
		--「めっちゃ」→「metcha」
		--「めっっちゃ」→「mettcha」
		local corresp_geminate_form = {['x']='s',['q']='t',['c']='t'}
		local corresp_main = {['x']='sh',['q']='ch',['c']='ts'}
		return (geminate and mw.ustring.rep(corresp_geminate_form[main], length(geminate))) .. corresp_main[main]
	end
	)

	-- macrons
	if not options.no_diacritics then
		text = gsub(text, 'oo', 'ō')
		text = gsub(text, 'aa', 'ā')
		text = gsub(text, 'ee', 'ē')
		text = gsub(text, 'ou', 'ō')
		text = gsub(text, 'uu', 'ū')
		text = gsub(text, 'ii', 'ī')
	end

	-- remove markup and convert real periods
	if not options.keep_period then
		text = gsub(text, '%.', '')
		text = gsub(text, '。', '◆.◇')
	end

	-- 
	text = gsub(text, '◇◆', '')
	text = gsub(text, '◆◇', '')
	text = gsub(text, ' *◆ *', '')
	text = gsub(text, ' *◇ *', ' ')

	-- restore latin text
	text = gsub(text, "㊟㊕㊕㊟(%d+)㊟㊕㊕㊟", function(id) return escape[tonumber(id)] end)

	-- clean up spaces
	text = trim(text)
	text = gsub(text, ' +', ' ')

	-- uppercase markup
	text = gsub(text, "(%^)(㊟⒝㊟)", "%2%1") -- move ^ to an effective position if placed before bold markup
	text = gsub(text, "(%^)( )", "%2%1") -- same but with spaces
	text = gsub(text, '%^(.)', mw.ustring.upper) -- uppercase conversion

	-- clean up spaces again
	text = gsub(text, ' +', ' ')

	-- conversions
	text = gsub(text, '㊟⒝㊟', '\'\'\'')
	text = gsub(text, '㊟㋑⒰㊟', '<u>')
	text = gsub(text, '㊟㋺⒰㊟', '</u>')
	text = gsub(text, '㊟㈛㊟', '')
	text = gsub(text, '㊟㌫㊟', '%')

	-- comparison with old kana_to_romaji() code
	text_old = gsub(text_old, '%(ba%)', ' (ba)') -- avoid flooding the tracking template with na-adjectives. ← this really should be looked at though
	text_old = gsub(text_old, ' ”', '”') -- and spacing around quotation marks
	if text ~= text_old then
		if mw.ustring.lower(text) == mw.ustring.lower(text_old) then
			--require('Modül:debug').track('ja/k2r diff caps')
		elseif find(text_old, 'ッ') then
			--require('Modül:debug').track('ja/k2r diff w xtu')
		elseif tracking_has_percent then
			--require('Modül:debug').track('ja/k2r diff pc')
		else
			--require('Modül:debug').track('ja/k2r diff')
		end
		mw.log('new]' .. text .. '[')
		mw.log('old]' .. text_old .. '[')
	end

	if find(text, '[ぁ-ー]') then
		--require('Modül:debug').track('ja/k2r failure')
	end

	return text
end

-- removes spaces and hyphens from input
-- intended to be used when checking manual romaji to allow the
-- insertion of spaces or hyphens in manual romaji without appearing "wrong"
function export.rm_spaces_hyphens(f)
	local text = type(f) == 'table' and f.args[1] or f
	text = gsub(text, ' ', '')
	text = gsub(text, '-', '')
	text = gsub(text, '%.', '')
	text = gsub(text, '&nbsp;', '')
	text = gsub(text, '\'', '')
	return text
end

function export.romaji_to_kata(f)
	local text = type(f) == 'table' and f.args[1] or f
	text = gsub(text, '.', function (char) return data.rd[char] or char end)
	text = gsub(text, 'kk', 'ッk')
	text = gsub(text, 'ss', 'ッs')
	text = gsub(text, 'tt', 'ッt')
	text = gsub(text, 'pp', 'ッp')
	text = gsub(text, 'bb', 'ッb')
	text = gsub(text, 'dd', 'ッd')
	text = gsub(text, 'gg', 'ッg')
	text = gsub(text, 'jj', 'ッj')
	text = gsub(text, 'tc', 'ッc')
	text = gsub(text, 'tsyu', 'ツュ')
	text = gsub(text, 'ts[uoiea]', {['tsu']='ツ',['tso']='ツォ',['tsi']='ツィ',['tse']='ツェ',['tsa']='ツァ'})
	text = gsub(text, 'sh[uoiea]', {['shu']='シュ',['sho']='ショ',['shi']='シ',['she']='シェ',['sha']='シャ'})
	text = gsub(text, 'ch[uoiea]', {['chu']='チュ',['cho']='チョ',['chi']='チ',['che']='チェ',['cha']='チャ'})
	text = gsub(text, "n[uoiea']?", {['nu']='ヌ',['no']='ノ',['ni']='ニ',['ne']='ネ',['na']='ナ',['n']='ン',["n'"]='ン'})
	text = gsub(text, '[wvtrpsmlkjhgfdbzy][yw]?[uoiea]', function (char) return data.rk[char] or char end)
	text = gsub(text, 'u', 'ウ')
	text = gsub(text, 'o', 'オ')
	text = gsub(text, 'i', 'イ')
	text = gsub(text, 'e', 'エ')
	text = gsub(text, 'a', 'ア')
	return text
end

-- expects: any mix of kanji and kana
-- determines the script types used
-- e.g. given イギリス人, it returns Kana+Hani
function export.script(f)
	text, script = type(f) == 'table' and f.args[1] or f, {}

	if find(text, '[ぁ-ゖ]') or find(text, '[𛅐-𛅒]') then
		table.insert(script, 'Hira')
	end
	-- TODO: there are two kanas.  This should insert Kata.
	if find(text, '[ァ-ヺー]') or find(text, '[𛅤-𛅦]') then
		table.insert(script, 'Kana')
	end
	-- 一 is unicode 4e00, previously used 丁 is 4e01
	if find(text, '[㐀-䶵一-鿌' .. compat_ideo .. '𠀀-𯨟]') then
		table.insert(script, 'Hani')
	end
	-- matching %a should have worked but matched the end of every string
	if find(text, '[a-zA-ZāēīōūĀĒĪŌŪa-zA-Z]') then
		table.insert(script, 'Romaji')
	end
	if find(text, '[0-90-9]') then
		table.insert(script, 'Number')
	end
	if find(text, '[〆々]') then
		table.insert(script, 'Abbreviation')
	end

	return table.concat(script, '+')
end

-- when counting morae, most small hiragana belong to the previous mora,
-- so for purposes of counting them, they can be removed and the characters
-- can be counted to get the number of morae.  The exception is small tsu,
-- so data.nonmora_to_empty maps all small hiragana except small tsu.
function export.count_morae(text)
	if type(text) == "table" then
		text = text.args[1]
	end
	-- convert kata to hira (hira is untouched)
	text = export.kata_to_hira(text)
	-- remove all of the small hiragana such as ょ except small tsu
	text = gsub(text,'.',function (char) return data.nonmora_to_empty[char] or char end)
	-- remove zero-width spaces
	text = gsub(text, '‎', '')
	-- return number of characters, which should be the number of morae
	return length(text)
end

-- accepts: any mix of kana
-- returns: a hiragana sort key designed for WMF software
-- this is like sort() but doesn't return |sort=sortkey,
-- just the sort key itself, but unlike sort(), this
-- replaces the long vowel mark with its vowel
function export.jsort(text)
	if type(text) == "table" then
		text = text.args[1]
	end
	local textsub = ''
	local convertedten = ''
	local result = ''
	local len = 1

	-- remove western spaces, hyphens, and periods
	-- diff=41967612: also remove caret
	text = gsub(text, '[ %-%.%^]', '')

	text = export.kata_to_hira(text)

	-- if the first character has dakuten, replace it with the corresponding
	-- character without dakuten and add an apostrophe to the end, e.g.
	-- がす > かす'
	if gsub(sub(text,1,1),'.',function (char) return data.dakuten[char] or char end) == '' then
		len = length(text)
		textsub = sub(text,2,len)
		convertedten = gsub(sub(text,1,1),'.',function (char) return data.tenconv[char] or char end)
		text = (convertedten .. textsub .. "'")
	else
		-- similar thing but with handuken and two apostrophes, e.g. ぱす -> はす''
		if gsub(sub(text,1,1),'.',function (char) return data.handakuten[char] or char end) == '' then
			len = length(text)
			textsub = sub(text,2,len)
			convertedten = gsub(sub(text,1,1),'.',function (char) return data.tenconv[char] or char end)
			text = (convertedten .. textsub .. "''")
		end
	end
	-- replace the long vowel mark with the vowel that it stands for
	for key,value in pairs(data.longvowels) do
		text = gsub(text,key,value)
	end
	return text
end

-- returns a sort key with |sort= in front, e.g.
-- |sort=はつぐん' if given ばつぐん
function export.sort(f)
	local text = type(f) == 'table' and f.args[1] or f
	local textsub = ''
	local convertedten = ''
	local result = ''
	local len = 1
	local kyreplace = ''
	kyreplace = gsub(text,'[ァ-ヺ]', '')
	if kyreplace == '' then
		result = ('|' .. 'sort' .. '=')
	end
	text = export.kata_to_hira(text)
	if gsub(sub(text,1,1),'.',function (char) return data.dakuten[char] or char end) == '' then
		if kyreplace == '' then else result = ('|' .. 'sort' .. '=') end
		len = length(text)
		textsub = sub(text,2,len)
		convertedten = gsub(sub(text,1,1),'.',function (char) return data.tenconv[char] or char end)
		result = (result .. convertedten .. textsub .. "'")
	else
		if gsub(sub(text,1,1),'.',function (char) return data.handakuten[char] or char end) == '' then
			if kyreplace == '' then else result = ('|' .. 'sort' .. '=') end
			len = length(text)
			textsub = sub(text,2,len)
			convertedten = gsub(sub(text,1,1),'.',function (char) return data.tenconv[char] or char end)
			result = (result .. convertedten .. textsub .. "''")
		else
			if kyreplace == '' then
				result = (result .. text)
			end
		end
	end
	return result
end

-- returns the "stem" of a verb or -i adjective, that is the term minus the final character
function export.definal(f)
	return sub(f.args[1],1,(length(f.args[1])-1))
end

function export.remove_ruby_markup(text)
	return (string.gsub(text, "[%^%-%. %%]", ""))
end

-- do the work of Template:ja-kanji
function export.kanji(frame)
	local PAGENAME = mw.title.getCurrentTitle().text
	-- only do this if this entry is a kanji page and not some user's page
	if match(PAGENAME, "[㐀-䶵一-鿌\239\164\128-\239\171\153𠀀-𯨟]") then
		local args = frame:getParent().args
		local grade = args["grade"] or args["sınıf"] or ""
		local rs = args["rs"] or ""
		local shin = args["shin"] or ""
		local kyu = args["kyu"] or ""
		local head = args["head"] or args["baş"] or ""

		local wikitext = {}
		local categories = {}

		local catsort = (rs ~= "") and rs or PAGENAME

		-- display the kanji itself at the top at 275% size
		table.insert(wikitext, '<div><span lang="ja" class="Jpan" style="font-size:275%; line-height: 100%;">' .. (args["head"] or PAGENAME) .. '</span></div>')

		-- display information for the grade

		-- if grade was not specified, determine it now
		if grade == "" then
			grade = tostring(export.kanji_grade(PAGENAME))
		end

		table.insert(wikitext, "(''")
		if grade == "1" then table.insert(wikitext, "[[w:Kyōiku kanji|sınıf 1 “Kyōiku” kanji]]")
		elseif grade == "2" then table.insert(wikitext, "[[w:Kyōiku kanji|sınıf 2 “Kyōiku” kanji]]")
		elseif grade == "3" then table.insert(wikitext, "[[w:Kyōiku kanji|sınıf 3 “Kyōiku” kanji]]")
		elseif grade == "4" then table.insert(wikitext, "[[w:Kyōiku kanji|sınıf 4 “Kyōiku” kanji]]")
		elseif grade == "5" then table.insert(wikitext, "[[w:Kyōiku kanji|sınıf 5 “Kyōiku” kanji]]")
		elseif grade == "6" then table.insert(wikitext, "[[w:Kyōiku kanji|sınıf 6 “Kyōiku” kanji]]")
		elseif grade == "7" or grade == "c" then table.insert(wikitext, "[[w:Jōyō kanji|günlük kullanım “Jōyō” kanji]]")
		elseif grade == "8" or grade == "n" then table.insert(wikitext, "[[w:Jinmeiyō kanji|kişi adları “Jinmeiyō” kanji]]")
		elseif grade == "9" or grade == "uc" then table.insert(wikitext, "[[w:Hyōgai kanji|yaygın olmayan “Hyōgai” kanji]]")
		elseif grade == "0" or grade == "r" then table.insert(wikitext, "[[w:Radical|Radical]]")
		else
			table.insert(categories, "[[Kategori:Japonca ilgilenmeyi bekleyen sözcükler/kanji sınıfı]]")
		end

		-- link to shinjitai if shinjitai was specified, and link to kyujitai if kyujitai was specified

		if kyu ~= "" then
			table.insert(wikitext, ",&nbsp;")
			table.insert(wikitext, '[[shinjitai]] kanji, [[kyūjitai]] biçimi <span lang="ja" class="Jpan">[[' .. kyu .. '#Japonca|' .. kyu .. ']]</span>')
		elseif shin ~= "" then
			table.insert(wikitext, ",&nbsp;")
			table.insert(wikitext, '[[kyūjitai]] kanji, [[shinjitai]] biçimi <span lang="ja" class="Jpan">[[' .. shin .. '#Japonca|' .. shin .. ']]</span>')
		end
		table.insert(wikitext, "'')")

		-- add categories
		table.insert(categories, "[[Kategori:Han karakterleri|" .. catsort .. "]]")
		table.insert(categories, "[[Kategori:Japonca Han karakterleri|" .. catsort .. "]]")
		if grade == "1" then table.insert(categories, "[[Kategori:Sınıf 1 kanjileri|" .. catsort .. "]]")
		elseif grade == "2" then table.insert(categories, "[[Kategori:Sınıf 2 kanjileri|" .. catsort .. "]]")
		elseif grade == "3" then table.insert(categories, "[[Kategori:Sınıf 3 kanjileri|" .. catsort .. "]]")
		elseif grade == "4" then table.insert(categories, "[[Kategori:Sınıf 4 kanjileri|" .. catsort .. "]]")
		elseif grade == "5" then table.insert(categories, "[[Kategori:Sınıf 5 kanjileri|" .. catsort .. "]]")
		elseif grade == "6" then table.insert(categories, "[[Kategori:Sınıf 6 kanjileri|" .. catsort .. "]]")
		elseif grade == "7" or grade == "c" then table.insert(categories, "[[Kategori:Günlük kullanım kanjileri|" .. catsort .. "]]")
		elseif grade == "8" or grade == "n" then table.insert(categories, "[[Kategori:Kişi adları kanjileri|" .. catsort .. "]]")
		elseif grade == "9" or grade == "uc" then table.insert(categories, "[[Kategori:Yaygın olmayan kanjiler|" .. catsort .. "]]")
		elseif grade == "0" or grade == "r" then table.insert(categories, "[[Kategori:CJKV radicals| ]]")
		end

		-- error category
		if rs == "" then table.insert(categories, "[[Kategori:Japonca ilgilenmeyi bekleyen sözcükler/radical and strokes]]") end

		return table.concat(wikitext, "") .. table.concat(categories, "\n")
	end
end

local grade1_pattern = ('[' .. data.grade1 .. ']')
local grade2_pattern = ('[' .. data.grade2 .. ']')
local grade3_pattern = ('[' .. data.grade3 .. ']')
local grade4_pattern = ('[' .. data.grade4 .. ']')
local grade5_pattern = ('[' .. data.grade5 .. ']')
local grade6_pattern = ('[' .. data.grade6 .. ']')
local secondary_pattern = ('[' .. data.secondary .. ']')
local jinmeiyo_kanji_pattern = ('[' .. data.jinmeiyo_kanji .. ']')
local hyogaiji_pattern = ('[^' .. data.joyo_kanji .. data.jinmeiyo_kanji .. ']')

function export.kanji_grade(kanji)
	if type(kanji) == "table" then
		kanji = kanji.args[1]
	end

	if find(kanji, hyogaiji_pattern) then return 9
	elseif find(kanji, jinmeiyo_kanji_pattern) then return 8
	elseif find(kanji, secondary_pattern) then return 7
	elseif find(kanji, grade6_pattern) then return 6
	elseif find(kanji, grade5_pattern) then return 5
	elseif find(kanji, grade4_pattern) then return 4
	elseif find(kanji, grade3_pattern) then return 3
	elseif find(kanji, grade2_pattern) then return 2
	elseif find(kanji, grade1_pattern) then return 1
	end

	return false
end

return export