Module:Categorizer
Documentation for this module may be created at Module:Categorizer/doc
-- version 202103111200 from master @kabwiki
local grapheme_ids = {
'Q2545446', --grapheme
'Q19776628', --Latin-script letter
'Q9788', --letter
'Q9398093', --vowel letter
'Q3841820', --consonant letter
'Q19793459', --Greek letter
'Q19793988', --Cyrillic letter
'Q41713761', --Arabic letter
'Q19793980', --Semitic letter
'Q41799439', --letter of the Ge'ez script
'Q21092339', --mathematical symbol
'Q41885931', --IPA symbol
'Q41798684', --kana character
'Q2373910', --syllabogram
'Q53764732', --CJK character
'Q10617810', --punctuation mark
'Q1668151', --punctuation mark
'Q3241972', -- character
'Q188725', --ligature
'Q36975', --glyph
}
local number_ids = {
'Q21199', -- natural number
'Q12503', -- integer
}
local year_ids = {
'Q577', -- year
'Q3186692', --calendar year
'Q3311614', --century leap year
'Q19828', --leap year
'Q2378962', --century year
}
local human_ids = {
'Q5', -- human
'Q15632617', -- fictional human
'Q15773317', -- television character
'Q95074', -- fictional character
'Q21070598', -- narrative entity
'Q21070568', -- human who may be fictional
'Q178885', -- deity
'Q13405593', -- nature deity
'Q235113', --angel
}
local language_ids = {
'Q34770', -- language
'Q33742', -- natural language
'Q38058796', -- extinct language
'Q2315359', --historical language
'Q45762', --dead language
'Q33384', -- dialect
'Q17376908', --languoid
'Q4536530', --language
'Q152559', --macrolanguage
}
local lanfamily_ids = {
'Q25295', -- language family
'Q11820611', --language subfamily
}
local ethnic_ids = {
'Q41710', -- ethnic group
}
local taxon_ids = {
'Q16521', -- taxon
'Q713623', -- clade
'Q23038290', -- fossil taxon
'Q310890', -- monotypic taxon
'Q855769', -- strain
'Q75913269', -- group or class of strains
}
local country_ids = {
'Q6256', -- country
'Q3624078', -- sovereign state
'Q3024240', -- historical country
'Q417175', --kingdom
'Q1048835', -- political territorial entity
'Q15634554', --state with limited recognition
'Q107390', --federated state
'Q35657', -- state of the United States*
'Q34876', -- province*
'Q10864048', --first-level administrative country subdivision*
'Q6465', -- department of France*
'Q36784', -- region of France*
'Q859869', --region of Niger*
'Q162620', --province of Spain*
'Q467745', --union territory of India
'Q15239622', --disputed territory
'Q1970725', --natural region
'Q1620908', --historical region
'Q3502482', --cultural region
'Q2577883', --occupied territory
}
local city_ids = {
'Q5119', -- capital
'Q515', -- city
'Q2264924', -- port city
'Q1549591', -- big city
'Q15253706', -- like a city
'Q7930989', -- city/town
'Q174844', -- megacity
'Q200250', -- metropolis
'Q149621', -- district
'Q1637706', --city with millions of inhabitants
'Q123705', -- neighborhood
'Q532', -- village
'Q5084', -- hamlet
'Q21507948', -- former village
'Q350895', -- abandoned village
'Q486972', -- human settlement
'Q22674925', -- former populated place
'Q10354598', -- rural settlement
'Q1093829', -- city of the United States
'Q3327870', -- municipal corporation in the United States
'Q21518270', -- state or insular area capital in the United States
'Q54935504', --city of Switzerland
'Q137773', -- ward of Japan
'Q2989398', -- commune of Algeria
'Q484170', -- commune of France
'Q23759397', --municipality of Bangladesh
'Q328584', --municipality of Slovenia
'Q203300', --municipality of Liechtenstein
'Q2590631', --municipality of Hungary
'Q2074737', -- municipality of Spain
'Q605291', -- municipality of Niger
'Q41067667', -- municipality of Tunisia
'Q3327862', -- urban commune of Morocco
'Q2989470', -- commune of Mauritania
'Q17318027', -- rural commune of Morocco
'Q15284', -- municipality
'Q3266850', -- commune
'Q515483', -- baladiyah
'Q30059', --arrondissement
'Q1639634', --local government area of Nigeria*
'Q2914501', --department of Niger*
'Q240601', --province of Algeria
'Q2614970', --district of Algeria
}
local city_properties = {
'P150', -- contains administrative territorial entity
'P1376', -- capital of
'P190', --twinned administrative body
}
local musical_band_ids = {
'Q215380', -- band
'Q56816954', -- heavy metal band
'Q2088357' -- musical ensemble
}
local album_ids = {
'Q482994', -- album
'Q208569' -- studio album
}
local tv_ids = {
'Q5398426', --television series
'Q63952888', -- anime television series
'Q581714', -- animated series
'Q11424', --film
'Q17517379', --animated short film
'Q202866', --animated film
'Q24862', --short film
}
local book_ids = {
'Q47461344', -- written work
'Q179461', -- religious text
'Q7725634', --literary work
'Q14406742', -- comic book series
'Q21198342', -- manga series
'Q10901350', -- anime and manga *
}
local int_work_ids = {
'Q15621286', -- intellectual work
'Q196600', -- media franchise
'Q1344', -- opera
'Q58483083', --dramatico-musical work
'Q2188189', --musical work
'Q838948', -- work of art
'Q7889', --video game
}
local religion_ids = {
'Q9174', -- religion
'Q6957341', -- major religious group
'Q1189816', -- ethnic religion
'Q13414953', -- religious denomination
'Q1192063', -- Islamic schools and branches
'Q19097', -- sect
'Q879146', -- Christian denomination
'Q1530022', -- religious organization
'Q3001185', -- Jewish denomination
'Q5839321', -- religious school of thought
'Q222516', -- school of Buddhism
'Q1826286', --religious movement
'Q209928', --madhhab
}
local script_ids = {
'Q8192', -- writing system
'Q29517555', -- natural script
'Q1191702', -- constructed script
'Q4004706', -- unicase alphabet
'Q65045986', -- bicameral alphabet
'Q9779', -- alphabet
'Q185087', --abjad
'Q1049394', -- phonetic writing system
'Q1191127', --featural writing system
}
local school_ids = {
'Q3918', --university
'Q1663017', --engineering school
'Q38723', --higher education institution
'Q847027', --grande école
'Q3354859', --collegiate university
'Q875538', --public university
'Q15936437', -- research university
'Q265662', --national university
'Q3914', --school
'Q57775519', --upper secondary school
'Q2511322', --lycée
'Q4671277', --academic institution
'Q31855', --research institute
'Q845392', -- polytechnic
'Q1371037', --institute of technology
}
local hydrography_ids = {
'Q4022', --river
'Q23397', --lake
'Q165', -- sea
'Q949819', --ship canal
'Q12284', -- canal
'Q1267889', -- waterway
'Q1233637', -- river mouth
'Q124714', --spring (water)
'Q1322134', --gulf
'Q9430', --ocean
'Q15324', --body of water
'Q9019918', 'Q131681', 'Q4862338', 'Q3267675', 'Q204324', 'Q4366834', 'Q8261440', 'Q211302','Q13586859','Q3215290','Q47053','Q43197',
'Q39594','Q187223','Q940023','Q5926864','Q37901','Q1210950','Q3705882','Q2507626','Q31615','Q33837','Q13137873','Q2936105','Q204894',
'Q17018380','Q6341928','Q1140477','Q491713',
}
local orography_ids = {
'Q46831', --mountain range
'Q8502', --mountain
'Q1437459', --non-geologically related mountain range
'Q39816', --valley
'Q271669', -- landform
'Q75520', --plateau
'Q34763', -- peninsula
'Q55818', -- impact crater
'Q3240715', --crater
'Q5107', --continent
'Q205895', --landmass
'Q150784', --canyon
'Q55462971', --fluvial landform
'Q133056' --mountain pass
}
local software_ids = {
'Q9135', -- operating system
'Q218616', -- proprietary software
'Q20983788', -- free operating system
'Q7397', -- software
'Q341', -- free software
'Q6368', -- web browser
'Q20825628', -- GNU package
}
local disease_ids = {
'Q12136', -- disease
'Q18123741', -- infectious disease
'Q179630', -- syndrome
'Q12136', -- notifiable disease
'Q506680', -- endemic disease
'Q169872', -- symptom
'Q1441305', --clinical sign
}
local disease_properties = {
'P1995', --health specialty
'P780', --symptoms
'P923', --medical examinations
'P924', --possible treatment
'P2176', --drug used for treatment
-- 'P8656', --Symptom Ontology ID
-- 'P5082', --Store medisinske leksikon ID
-- 'P1692', --ICD-9-CM ()
-- 'P1748', --NCI Thesaurus ID
}
local website_ids = {
'Q35127', --website
'Q171', --wiki
'Q15633582', --MediaWiki website
'Q327349', --web directory
'Q4182287', --web search engine
'Q10876391', -- Wikipedia language edition
'Q19967801', --online service
'Q1273203', --email service provider
'Q1668024', --service on internet
'Q1343205', --file hosting service
'Q1210425', --internet hosting service
'Q17232649', --news website
'Q62694393', --URL shortener
'Q193424', --web service
}
local organization_ids = {
'Q4830453', --business
'Q6881511', --enterprise
'Q17990971', --public enterprise
'Q18388277', --technology company
'Q1589009', --privately held company
'Q1110794', --daily newspaper
'Q1153191', --online newspaper
'Q11032', --newspaper
'Q43229', --organization
'Q484652', --international organization
'Q245065', --intergovernmental organization
'Q15265344', --broadcaster
'Q2001305', --television channel
'Q14350', --radio station
'Q7188', --government
'Q7210356', --political organisation
'Q7278', -- political party
'Q46970', --airline
'Q157031', --foundation
'Q708676', --charitable organization
'Q29918292', --cultural organization
'Q48204', --voluntary association
'Q15911314', --association
'Q5193377', --cultural institution
'Q163740', --nonprofit organization
'Q17127659', --terrorist organization
'Q1788992', --criminal organization
'Q772547', --armed forces
'Q17149090', --armed organization
'Q207320', --paramilitary
'Q1673189', --irregular military
'Q20857065', -- United States federal agency
'Q327333', --government agency
'Q22687', --bank
'Q66344', --central bank
'Q895526', --organ
'Q476028', --association football club
'Q17270000', --football club
'Q847017', --sports club
'Q4438121', --sports organization
'Q6979593', --national association football team
'Q1194951', --national sports team
'Q15944511', --association football team
'Q988108', --club
'Q15899789', --principal organ of the United Nations
'Q15285626', --organization established by the United Nations
}
local organization_properties = {
'P749', -- parent organization
'P355', --subsidiary
'P159', --headquarters location
'P1454', --legal form
}
local building_ids = {
'Q1802963', -- mansion
'Q3947', --house
'Q52177058', --civic building
'Q16831714', --government building
'Q41176', -- building
'Q294422', --public building
'Q1021645', --office building
'Q79146420', --multistorey building
'Q11755959', --multi-storey urban building
'Q18142', --high-rise building
'Q4989906', --monument
'Q811979', --architectural structure
'Q11303', --skyscraper
'Q162875', --mausoleum
'Q6023295', --funerary structure
'Q44539', --temple
'Q867143', --Roman temple
'Q5393308', --Buddhist temple
'Q842402', --Hindu temple
'Q96352513', --religious building ruin
'Q24398318', --religious building
'Q1370598', --place of worship
'Q120560', --minor basilica
'Q163687', --basilica
'Q16970', -- church building
'Q1088552', --Catholic church building
'Q108325', --chapel
'Q56750657', --hermitage
'Q56242063', --Protestant church building
'Q56242250', -- anglican or episcopal cathedral
'Q56242045', --Anglican church
'Q58079064', -- protestant cathedral
'Q32815', --mosque
'Q1154710', --association football stadium
'Q12819564', --station
'Q1248784', --airport
'Q644371', --international airport
'Q94993988', --commercial traffic aerodrome
'Q62447', --aerodrome
'Q9259', --UNESCO World Heritage Site
'Q88291', --citadel
'Q57821', --fortification
'Q1784293', -- cordon
'Q1440300', --observation tower
'Q12518', --tower
'Q33506', --museum
'Q200334', --bell tower
'Q72926449', --church tower
'Q797765', --inclined tower
'Q16560', --palace
'Q15911738', --hydroelectric power station
'Q159719', --power station
'Q3497167', --gravity dam
'Q12323', --dam
'Q483110', --stadium
'Q641226', --arena
}
local chemical_ids = {
'Q11173', -- chemical compound
}
local food_ids = { -- unused for the time being, most wikidata classification use these ids 'as subclass of' instead of 'instance of'
'Q2095', --food
'Q6460735', -- meals
'Q13276', --cake
'Q5159627', --confection
'Q182940', --dessert
'Q951964', --food product
'Q26902770',--Food products
'Q4498085', --bakery product ()
'Q25403900', --food ingredient ()
'Q746549', --dish
'Q41415', --soup
'Q1401891', --viennoiserie ()
'Q1470834', --fruit preparation ()
'Q772630', --main course ()
'Q13270', --biscuit ()
'Q13266', --cookie ()
'Q477248', --pastry ()
}
local food_properties = {
'P5456', --TasteAtlas ID (P5456)
'P2012', -- cuisine (P2012)
'P1821', --Open Food Facts food category ID (P1821)
}
local event_ids = {
'Q1190554', --occurrence
'Q1656682', --event
'Q27968055', --recurrent event edition
'Q12184', --pandemic
'Q44512', --epidemic
'Q3241045', --disease outbreak
'Q838718', --city fire
'Q168983', --conflagration
'Q3839081', --disaster
'Q381072', --crisis
'Q290178', --economic crisis
'Q13418847', --historical event
'Q625298', --peace treaty
'Q131569', --treaty
'Q321839', --accord
'Q2006324', --agreement
'Q7157512', --peace conference
'Q1644573', --pilgrimage
'Q375011', --religious festival
'Q132241', --festival
'Q4801521', --arts festival
'Q1751626', --theatre festival
'Q868557', --music festival
'Q23902005', --literary festival
'Q1197685', --public holiday
'Q60075825', --Christian holy day
'Q94920', --Jewish holiday
'Q11483816', --annual event
'Q2558684', --world day
'Q2673813', --rest day
'Q1445650', --holiday
'Q1062856', --anniversary
'Q200538', --party
'Q59544', --intangible cultural heritage
'Q210272', --cultural heritage
'Q1914636', --activity
'Q273120', --protest
'Q10931', --revolution
'Q124734', --rebellion
'Q3109572', -- civil resistance
'Q754479', --nonviolent resistance
'Q1673271', --regime change
'Q1510761', --social change
'Q8465', --civil war
'Q198', --war
'Q350604', --armed conflict
'Q180684', --conflict
'Q750215', --mass murder
'Q41397', --genocide
'Q4817637', --atrocity crime
'Q2223653', --terrorist attack
'Q1139665', --political murder
'Q16738832', --criminal case
'Q188055', --siege
'Q273976', --blockade
'Q645883', --military operation
'Q28972820', --operation
'Q53706', --robbery
'Q1371150', --hostage crisis
'Q18608583', --recurring sporting event
'Q15275719', --recurring event
'Q2495862', --congress
'Q2761147', --meeting
'Q2618461', --legislative election
'Q1076105', --general election
'Q40231', --election
'Q5257307', --prize
'Q7191', --Nobel Prize
'Q618779', --award
'Q378427', --literary award
'Q17701409', --economics award
'Q11448906', --science award
'Q15229207', --sports award
'Q19020', --Academy Awards
'Q96474685', --award for best original music
'Q11796413', --decoration
'Q193622', --order
'Q56291528', --state order
'Q3302125', --state decoration
'Q1788716', --military decoration
'Q973011', --campaign medal
'Q131647', --medal
}
local astr_object_ids = {
'Q634', --planet
'Q13205267', --planet of the Solar System
'Q844911', --superior planet
'Q3504248', --inner planet
'Q3901935', --inferior planet
'Q2199', --dwarf planet
'Q16873378', --planetary body
'Q3132741', --substellar object
'Q400144', --planemo
'Q2537', --natural satellite
'Q1297322', --satellite
'Q121750', --gas giant
'Q21857994', --giant planet
'Q30014', --outer planet
'Q30250610', --object in the outer Solar System
'Q6999', --astronomical object
}
local categories = {
["number"] = number_ids,
["grapheme"] = grapheme_ids,
["year"] = year_ids,
["human"] = human_ids,
["language"] = language_ids,
["language family"] = lanfamily_ids,
["ethnic group"] = ethnic_ids,
["taxon"] = taxon_ids,
["city"] = city_ids,
["country"] = country_ids,
["musical band"] = musical_band_ids,
["album"] = album_ids,
["tv"] = tv_ids,
["book"] = book_ids,
["intellectual work"] = int_work_ids,
["religion"] = religion_ids,
["script"] = script_ids,
["school"] = school_ids,
["hydrography"] = hydrography_ids,
["orography"] = orography_ids,
["software"] = software_ids,
["disease"] = disease_ids,
["website"] = website_ids,
["organization"] = organization_ids,
["building"] = building_ids,
["chemical"] = chemical_ids,
["event"] = event_ids,
["astronomical object"] = astr_object_ids,
["food"] = food_ids,
}
local p_categories = {
["organization"] = organization_properties,
["disease"] = disease_properties,
["food"] = food_properties,
["city"] = city_properties,
}
local queryindex = {
"number",
"grapheme",
"year",
"human",
"language",
"language family",
"ethnic group",
"taxon",
"city",
"country",
"musical band",
"album",
"tv",
"book",
"intellectual work",
"religion",
"script",
"school",
"hydrography",
"orography",
"software",
"disease",
"website",
"organization",
"building",
"chemical",
"event",
"astronomical object",
"food"
}
local p = {}
local lualinq = require "Module:LuaLinq"
function belongsTo(item_ids, category_ids)
return lualinq.main(category_ids):any(function(x) return lualinq.main(item_ids):any(function(y) return x == y; end) end)
end
function findCategoryName(item_ids)
for _,k in ipairs(queryindex) do
if belongsTo(item_ids, categories[k]) then return k end
end
return nil
end
function isTaxonCommonName(item)
return lualinq.main(item.claims["P31"]):where(function(c) return c.mainsnak.datavalue.value['id'] == "Q55983715" and c.qualifiers ~= nil and lualinq.main(c.qualifiers["P642"]):first().datavalue.value['id'] ~= nil ; end):any()
end
function findCategoryNameFromProperties(item)
for _,k in ipairs(queryindex) do
local pc = p_categories[k]
if pc then
for __,l in ipairs(pc) do
if item.claims[l] then return k end
end
end
end
return nil
end
local function isSet(var)
return not (var == nil or (type(var) == 'string' and mw.text.trim(var) == ''))
end
local function getEntityId(args, pargs, unnamed)
pargs = pargs or {}
local id = args.item or args.from or (unnamed and mw.text.trim(args[1] or '') or nil)
if not isSet(id) then
id = pargs.item or pargs.from or (unnamed and mw.text.trim(pargs[1] or '') or nil)
end
if isSet(id) then
if string.find(id, ":") then -- remove prefix as Property:Pid
id = mw.text.split(id, ":")[2]
end
else
id = mw.wikibase.getEntityIdForCurrentPage()
end
return id
end
function p.main(frame)
local args = frame.args or frame -- via invoke or require
local pargs = frame.args and frame:getParent().args or {}
local id = getEntityId(args, pargs)
if id == nil then
return 'entity not found'
end
local item = mw.wikibase.getEntity(id)
if item == nil or item.claims == nil then
return 'entity not found bis'
end
local queryable_ids = lualinq.main(item.claims["P31"]):select(function(c) return c.mainsnak.datavalue.value['id']; end)
local name = nil
if queryable_ids:any() then
name = findCategoryName(queryable_ids:toArray())
if name == nil then --try once more with parent
name = findCategoryName(lualinq.main(mw.wikibase.getEntity(queryable_ids:first()).claims["P279"]):select(function(c) return c.mainsnak.datavalue.value['id']; end):toArray())
end
if name == nil and isTaxonCommonName(item) then
name = "taxon common name"
end
end
if name == nil then
name = findCategoryNameFromProperties(item)
end
return name
end
-- Aliases
function p.find(frame) return p.main(frame) end
function p.findCategory(frame) return p.main(frame) end
return p