U
    e%F                  u  @   s  d Z ddlZddlmZ ddlmZ edZdDddZdEd
dZ	dd Z
dd ZdFddZdd Zdd Ze  eejZddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.d/d0d1d2d3d4d5d6d7d8d9d:d;d<d=d>d?d@dAdBdCdDdEdFdGdHdIdJdKdLdMdNdOdPdQdRdSdTdUdVdWdXdYdZd[d\d]d^d_d`dadbdcdddedfdgdhdidjdkdldmdndodpdqdrdsdtdudvdwdxdydzd{d|d}d~ddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddΜZdddddddddddddddddddddddddddddddddddddddddddddddddd ddddddddd	d
dddddddddddddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.d/d0d1d2d3d4d5d6d7d8d9d:d;d<d=d>d?d@dAdBdCdDdEdFdGdHdIdJdKdLdMdNdOdPdQdRdSdTdUdVdWdXdYdZd[d\d]d^d_d`dadbdcdddedfdgdhdidjdkdldmdndodpdqdrdsdtdudvdwdxdydzd{d|d}d~dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddÐdĐdŐdƐdǐdȐdɐdʐdːd̐d͐dΐdϐdАdѐdҐdӐdԐdՐd֐dאdؐdِdڐdېdܐdݐdސdߐddddddddddddddddddddddddddddddddd ddddddddd	d
dddddddddddddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.d/d0d1d2d3d4d5d6d7d8d9d:d;d<d=d>d?d@dAdBdCtZeeZeeZdS (G  a  
Translate between language names and language codes.

The iso639-3 language codes were downloaded from the registration authority at
https://iso639-3.sil.org/

The iso639-3 codeset is evolving, so retired language codes are kept in the
"iso639retired" dictionary, which is used as fallback by the wrapper functions
"langname" and "langcode", in order to support the lookup of retired codes.

The "langcode" function returns the current iso639-3 code if there is one,
and falls back to the retired code otherwise. As specified by BCP-47,
it returns the shortest (2-letter) code by default, but 3-letter codes
are also available:

    >>> import nltk.langnames as lgn
    >>> lgn.langname('fri')          #'fri' is a retired code
    'Western Frisian'

    The current code is different from the retired one:
    >>> lgn.langcode('Western Frisian')
    'fy'

    >>> lgn.langcode('Western Frisian', typ = 3)
    'fry'

    N)warn)bcp47z[a-z][a-z][a-z]?fullc                 C   s   |  d}|d  }t|r|tkr0t| S |tkrpt| }td|d|dd d|g|dd  } t	| }|d	kr|S |r| d
d S ntd|dd dS )z
    Convert a composite BCP-47 tag to a language name

    >>> from nltk.langnames import langname
    >>> langname('ca-Latn-ES-valencia')
    'Catalan: Latin: Spain: Valencian'

    >>> langname('ca-Latn-ES-valencia', typ="short")
    'Catalan'
    -r   zShortening z to    
stacklevel   Nr   :zCould not find code in )
splitlowercodepattern	fullmatchiso639retirediso639shortr   joinr   name)tagtyptagscodecode2r    r   O/var/www/html/assets/scripts/venv/lib/python3.8/site-packages/nltk/langnames.pylangname.   s    


r   r   c                 C   sV   | t jkr0t j|  }|dkr,|tkr,t| }|S | tkr@t|  S td| dd dS )ai  
    Convert language name to iso639-3 language code. Returns the short 2-letter
    code by default, if one is available, and the 3-letter code otherwise:

    >>> from nltk.langnames import langcode
    >>> langcode('Modern Greek (1453-)')
    'el'

    Specify 'typ=3' to get the 3-letter code:

    >>> langcode('Modern Greek (1453-)', typ=3)
    'ell'
       zCould not find language in r   r   N)r   langcode
iso639longiso639code_retiredr   )r   r   r   r   r   r   r   K   s    

r   c                 C   s
   t j|  S )z^
    Convert BCP-47 tag to Wikidata Q-code

    >>> tag2q('nds-u-sd-demv')
    'Q4289225'
    )r   wiki_q)r   r   r   r   tag2qi   s    r    c                 C   s   t |  S )z^
    Convert Wikidata Q-code to BCP-47 tag

    >>> q2tag('Q4289225')
    'nds-u-sd-demv'
    )
wiki_bcp47)qcoder   r   r   q2tags   s    r#   c                 C   s   t t| |S )z
    Convert Wikidata Q-code to BCP-47 (full or short) language name

    >>> q2name('Q4289225')
    'Low German: Mecklenburg-Vorpommern'

    >>> q2name('Q4289225', "short")
    'Low German'
    )r   r#   )r"   r   r   r   r   q2name}   s    
r$   c                 C   s   t t| S )zd
    Convert simple language name to Wikidata Q-code

    >>> lang2q('Low German')
    'Q25433'
    )r    r   )r   r   r   r   lang2q   s    r%   c                 C   s:   t |  t t|  kr.dd |  D S td dS )z3Return inverse mapping, but only if it is bijectivec                 S   s   i | ]\}}||qS r   r   ).0keyvalr   r   r   
<dictcomp>   s      z inverse_dict.<locals>.<dictcomp>z1This dictionary has no bijective inverse mapping.N)lenkeyssetvaluesitemsr   )Zdicr   r   r   inverse_dict   s    r/   ZaaabafZakamarZanasavZaeZayazbaZbmbeZbnZbiZbobsbrbgcacschZcecuZcvkwcocrcydadeZdvZdzeleneoeteueefofafjfifrfyffgdgaglgvZgnguZhtZhashhehzhiZhohrhuhyZigioiiiuieiaidikisitZjvjaklknkskakrkkkmZkirwkykvZkgkoZkjZkuloZlalvZlilnltZlbZluZlgmhmlmrmkZmgmtZmnmimsZmynanvnrndngnenlnnnbnonyocZojoromospapiplptZpsZquZrmroZrnruZsgsasiskslsesmZsnsdsostessqscsrssZsuswsvtytatttetgtlthtitotntsZtktrZtwZugukuruzveviZvowaZwoxhyiZyoZzazhzu)ZaarZabkZafrZakaZamhZaraargasmZavaZaveZaymZazeZbakZbamZbelZbenZbisZbodZbosZbreZbulcatZcesZchaZcheZchuZchvZcorcosZcreZcymZdanZdeudivZdzoZellZengZepoZestZeuseweZfaoZfasZfijZfinZfraZfryZfulZglaZgleZglgZglvZgrnZgujZhatZhauZhbsZhebZherZhinZhmoZhrvZhunZhyeZiboZidoZiiiZikuZileZinaindZipkZislZitaZjavZjpnkalZkanZkasZkatZkauZkazZkhmZkikZkinZkirZkomZkonZkorZkuaZkurZlaoZlatZlavZlimZlinZlitZltzZlubZlugZmahZmalmarZmkdZmlgZmltmonZmriZmsaZmyaZnauZnavZnblZndeZndoZnepZnldZnnoZnobZnorZnyaZociZojiZoriZormZosspanZpliZpolZporZpusZqueZrohZronrunZrusZsagsansinZslkZslvZsmeZsmoZsnaZsndZsomZsotZspaZsqiZsrdZsrpZsswsunZswaZsweZtahZtamZtattelZtgkZtglZthaZtirZtonZtsnZtsoZtukZturZtwiZuigZukrZurdZuzbZvenZvieZvolZwlnZwolZxhoZyidZyorZzhaZzhoZzulzWestern FrisianZ	AuvergnatZGasconZLimousinZLanguedocienu
   Provençalu   Amapá CreoleZBoganu   BanawázBelgian Sign LanguagezSouthern ZhuangZChittagonianz
Falam Chinu   JaruáraZKohoroxitariZMoinbaZAikuzTlalitzlipa NahuatlzSoutheastern Puebla NahuatlZ
OccidentalZTomyangzPatla-Chicontla Totonacu   MiarrãZYinglishzLachirioag ZapotecZAtuencezNavarro-Labourdin BasquezSouletin BasqueZAmeraxZAheZAariZAmikoanaZArafundiZAdzeraZPamonaZBisuZBengkuluz
Hmong NjuazBakung KenyahzSarawak BisayazBahau River KenyahZBuxinhuaZBuyangzNorthern Zhuangu	   CarútanazDarang Dengz
Land DayakZEnimZIzerez
Geman DengzGarreh-AjuranZItutangzLahu ShizKayan River KenyahzNorth KorowaiZKruiZKatinganZLematangZLintangZBerawanu   Northern NambikuárazSouthwest GbayaZ	ArakaneseZMimiZMadangzKhiamniungan NagaZNyaduZOganZOrokaivazIpeka-TapuiazSouthern PesisirZPenesakZ	PalembangzLower PokomoZPubianZRanauZ	RajbanshiZRawasZSemendozSindang KelingiZSelakozKahumamahon SaluanZSerawaiZTarpiaZSubaZSungkaiZSizakizSouthern MarakwetZTanjongzTutong 1zUpper Baram Kenyahz
Kayu Agungz	Muko-MukoZWareZKahayanzMahakam KenyahZKunfalzDayao YizMuji YizPula YizPuwa Yiz	Wumeng YizYuanjiang-Mojiang YizMalay (individual language)ZMundarizSilt'ezOld PersianZSerbianZCroatianZSakanZ	MoldavianZAariyau   Cubulco Achíz Yepocapa Southwestern CakchiquelZChumashzNorthern CakchiquelzSouth Central CakchiquelzEastern CakchiquelzSouthern Cakchiquelu!   Santa María De Jesús Cakchiquelz Santo Domingo Xenacoj Cakchiquelz"Acatenango Southwestern CakchiquelzWestern Cakchiquelu   Ixtatán Chujz	Tila Cholu   Cunén QuichézEmiliano-RomagnoloZ	EuropantozGamo-Gofa-DawrozSoutheastern Huastecu   San Luís Potosí Huastecz
Nebaj IxilzChajul IxilzWestern JacaltecozSouthern MamzTajumulco MamZTacaneczCentral Mamu   Todos Santos Cuchumatán MamzEastern Pokomamu   Western PokomchízSouthern Pokomamu   Papavôu   Joyabaj Quichéu   West Central Quichéu   Eastern Quichéu   San Andrés QuichéZ	Malinguatz
Santa CruzzToala'u   Bachajón TzeltalzChamula Tzotzilu   Chenalhó Tzotzilu   San Andrés Larrainzar TzotzilzWestern Tzutujilu   Huixtán Tzotzilu   Zinacantán TzotzilZVatratazChan Santa Cruz MayaZNyengZShakaraZParananzAlbay BicolanoZFinalligzBeti (Cameroon)Z	ChorotegaZChimakumZDarkhatZDarwaziZ	GabutamonzMohegan-Montauk-NarragansettzCataelano MandayazSangab Mandayau   CalózSanglechi-IshkashimiZ
SurigaononzSumo-MayangnaZ	TangshewiZWagawagazAyi (China)zSouthern Betsimisaraka MalagasyzDhanwar (India)zKolum So DogonZMaheiZNaxiZNootkaZTingalZTakpaZTunenZ
BandjigaliZ	ChaungthazKhumi Awa ChinzNisi (India)zWalo Kumbe DogonZ	ElpaputihZGarawaZGelaoZHoruruZIbilozJarawa (Nigeria)ZKadozUpper Tanudan Kalingaz
Paku KarenzKenuzi-DongolaZLuhuZOmatizDurango NahuatlZPaluZPongyongZSansuzSouth WemaleZWororaZWiranguZYangbyeZYendangZYampheZDanZDjiwarlizAghu TharnggaluZTalurzIzi-Ezaa-Ikwo-MgboZMeaZMalakhelZMaykulanZMudburazForest ManinkaZNguraZNgarlaZPanangZPiruZTanggaZWintuZXiandaoz
Yir YorontZYosZEmokz	Gugu MiniZLenguaZLamamzMaskoy Pidginz
Purum Nagau	   SanapanáZYughZAramanikZAdapu   ǂKxʼauǁʼeinz$Bemba (Democratic Republic of Congo)z$Borna (Democratic Republic of Congo)ZBuyaZDazau   Mangetti Dune ǃXungzGbati-riZ	ImeraguenZKakauhuaz
Sara DunjoZKwakZKakihumZLingkhimZMaligoZNgongu   ǃOǃungzMirpur PanjabiZSongaZThezSouthwestern TamangZUokhaZSubizYiddish Sign LanguagezMator-Taygi-KaragasZYanghozBaga MboteniZBhatolaZ	ChipiajesZCaguaZCoyaimazChilean QuechuaZCumeralZDhuwalzEastern GurungzSouthern GondiZGeyZIapamaZIranunZKassengZCoximazKota Marudu TinagasZKunggarazCoastal KadazanzTambunan DusunZNijadaliZ
NatagaimasZOmejesZPalumataZPonaresZPaozPray 3ZRunaZSavarazTempasuk DusunzTai Hang TongZTidongu	   Tai MènezTinoc KallahanZTomedeszKamba (Brazil)u   Kabixíu	   XipináwaZKarahawyanau   YaríZJengZKataangZKrimzLua'zPu KoZRienzRennellese Sign LanguageZSokZShinabozLyons Sign LanguageZMediakZMosiroZNdaktupzNataoran AmisZAsasZDirariz	Hun-SaareZLuiZKhlorZMarambazMina (India)ZNingyeZArmazTayabas AytazBabalia Creole ArabicZ	BarbacoasZCaucaZChamariZDegaruZDororozEastern KarnicZGuliguliZKhalajzNepali KuruxzKui (India)ZLumbeeZNarauZPalpaZ	SemandangZTapebaZKarrangaZ	TasmanianZZiriyaZThudamZBikaruzVaghat-Ya-Bijim-LegeriZBalauZGejiZMuyaZNgoniZ	PapitalaizIja-ZubaZWarapuzJudeo-Tunisian ArabicZ
ChungmbokozLaka (Nigeria)zLango (South Sudan)ZPiniZSamaZSebuyauzKulon-PazehZWardujiZWyandot(t  friZauvZgscZlmsZlncZprvZamdZbghZbnhZbvsZccyZcitZflmZjapZkobZmobZmzfZnhjZnhsZoccZtmxZtotZxmiZyibZztcZatfZbqeZbszZaexZaheZaizZaknZarfZazrZbcxZbiiZbkeZbluZbocZbsdZbwvZbxtZbyuZccxZcruZdatZdykZeniZfizgenZgghZituZkdsZknhZkrgZkrqZkxgZlmtZlntZlodZmbgZmdoZmhvZmivZmqdZnkyZnxjZognZorkZpajZpecpenZplmZpojZpunZraeZrjbZrwsZsddZsdiZsklZslbZsrjsufZsuhZsuuZszkZtleZtnjZttxZubmZvkyZvmoZwreZxahZxkmZxufZyioZymjZyplZypwZywmZyymZmlyZmuwZxstZopeZsccZscrZxskZmolZaayaccZcbmZchsZckcZckdZckeZckfZckiZckjZckkZckwZcnmZctiZcunemlZeurZgmoZhsfZhvaZixiZixjZjaimmsZmpfZmtzZmvcZmvjZpoaZpobZpouZppvZqujZqutZquuZqxiZsicZstcZtlzZtzbZtzcZtzeZtzsZtztZtzuZtzzZvlrZyusZnfgZnfkZagpZbhkZbkbZbtbZcjrZcmkZdrhZdrwZgavZmofZmstZmytZrmrZsglZsulsumZtnfZwgwZayxZbjqZdhaZdklZmjaZnbfZnooZtieZtkkZbazZbjdZccqZckaZdapZdwlZelpZgbcZgioZhrrZibijarZkdvZkghZkppZkzhZlcqZmgxZnlnZpbzZpgyZscaZtlwZunpZwiwZybdyenZymaZdafZdjlZggrZilwZiziZmegZmldZmntZmwdZmyqZnbxZnlrZpcrZpprZtggZwitZxiaZyiyZyosZemoZggmlegZlmmZmhhZpuzZsapZyuuZaamZadpZaueZbmyZbxxZbyyZdzdZgfxZgtiZimeZkbfZkojZkwqZkxeZliiZmwjZnnxZounZpmuZsgoZthxZtsfZuokZxsjZydsZymtZynhZbgmZbtlZcbeZcbhZcoyZcquZcumZdujZggnZggoZguvZiapZillZkgcZkoxZktrZkvsZkzjZkztZnadntsZomeZpmcZpodZppaZpryZrnaZsvrZtduZthctidtmpZtneZtoeZxbaZxbxZxipZxkhZyriZjegZkgdZkrmZprbZpukZrieZrsiZskkZsnhZlsgZmwxZmwyZncpZaisZasdZditZdudZlbaZlloZmydZmyiZnnsZaohZayyZbbzZbpbZccaZcdgZdguZdrrZekcZgliZkjfZkxlZkxuZlmzZnxuZplpZsdmZtbbZxrqZxtzZzirZthwZbicZbijZblgZgjiZmvmZngopatZvkiZwraZajtZcugZlaklnoZpiiZsmdZsnbZuunZwrdZwya)r   )r   )r   )__doc__rewarningsr   Znltk.corpusr   compiler   r   r   r    r#   r$   r%   r/   Zload_wiki_qr   r!   r   r   r   r   r   r   r   r   <module>
   s  



  >  
  z