Skip to content

Commit

Permalink
Add support for Hindi language
Browse files Browse the repository at this point in the history
Copied from stale PR here: #442
  • Loading branch information
bryananderson committed Dec 17, 2024
1 parent 2b4d0bb commit b66644c
Show file tree
Hide file tree
Showing 5 changed files with 511 additions and 6 deletions.
Binary file added .coverage
Binary file not shown.
3 changes: 2 additions & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ Besides the numerical argument, there are two main optional arguments, ``to:`` a
* ``fr_CH`` (French - Switzerland)
* ``fr_DZ`` (French - Algeria)
* ``he`` (Hebrew)
* ``hi`` (Hindi)
* ``hu`` (Hungarian)
* ``id`` (Indonesian)
* ``is`` (Icelandic)
Expand Down Expand Up @@ -163,4 +164,4 @@ added Lithuanian support, but didn't take over maintenance of the project.
I am thus basing myself on Marius Grigaitis' improvements and re-publishing
``pynum2word`` as ``num2words``.

Virgil Dupras, Savoir-faire Linux
Virgil Dupras, Savoir-faire Linux
12 changes: 7 additions & 5 deletions num2words/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,12 @@
lang_CS, lang_CY, lang_DA, lang_DE, lang_EN, lang_EN_IN,
lang_EN_NG, lang_EO, lang_ES, lang_ES_CO, lang_ES_CR,
lang_ES_GT, lang_ES_NI, lang_ES_VE, lang_FA, lang_FI, lang_FR,
lang_FR_BE, lang_FR_CH, lang_FR_DZ, lang_HE, lang_HU, lang_ID,
lang_IS, lang_IT, lang_JA, lang_KN, lang_KO, lang_KZ, lang_LT,
lang_LV, lang_NL, lang_NO, lang_PL, lang_PT, lang_PT_BR,
lang_RO, lang_RU, lang_SK, lang_SL, lang_SR, lang_SV, lang_TE,
lang_TET, lang_TG, lang_TH, lang_TR, lang_UK, lang_VI)
lang_FR_BE, lang_FR_CH, lang_FR_DZ, lang_HE, lang_HI, lang_HU,
lang_ID, lang_IS, lang_IT, lang_JA, lang_KN, lang_KO, lang_KZ,
lang_LT, lang_LV, lang_NL, lang_NO, lang_PL, lang_PT,
lang_PT_BR, lang_RO, lang_RU, lang_SK, lang_SL, lang_SR,
lang_SV, lang_TE, lang_TET, lang_TG, lang_TH, lang_TR, lang_UK,
lang_VI)

CONVERTER_CLASSES = {
'am': lang_AM.Num2Word_AM(),
Expand Down Expand Up @@ -84,6 +85,7 @@
'tet': lang_TET.Num2Word_TET(),
'hu': lang_HU.Num2Word_HU(),
'is': lang_IS.Num2Word_IS(),
'hi': lang_HI.Num2Word_HI(),
}

CONVERTES_TYPES = ['cardinal', 'ordinal', 'ordinal_num', 'year', 'currency']
Expand Down
201 changes: 201 additions & 0 deletions num2words/lang_HI.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
# -*- encoding: utf-8 -*-
# Copyright (c) 2003, Taro Ogawa. All Rights Reserved.
# Copyright (c) 2013, Savoir-faire Linux inc. All Rights Reserved.

# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
# MA 02110-1301 USA

from __future__ import unicode_literals

import string

from num2words.base import Num2Word_Base


class Num2Word_HI(Num2Word_Base):
"""
Hindi (HI) Num2Word class
"""

_irregular_ordinals = {
0: "शून्य",
1: "पहला",
2: "दूसरा",
3: "तीसरा",
4: "चौथा",
6: "छठा",
}
_irregular_ordinals_nums = {
0: "०",
1: "१ला",
2: "२रा",
3: "३रा",
4: "४था",
6: "६ठा",
}
_hindi_digits = "०१२३४५६७८९" # 0-9
_digits_to_hindi_digits = dict(zip(string.digits, _hindi_digits))
_regular_ordinal_suffix = "वाँ"

def setup(self):
# Note: alternative forms are informal
self.low_numwords = [
"निन्यानवे",
"अट्ठानवे",
"सत्तानवे", # alternative "सतानवे"
"छियानवे",
"पचानवे",
"चौरानवे",
"तिरानवे",
"बानवे",
"इक्यानवे",
"नब्बे",
"नवासी",
"अट्ठासी",
"सतासी",
"छियासी",
"पचासी",
"चौरासी",
"तिरासी",
"बयासी",
"इक्यासी",
"अस्सी",
"उनासी", # alternative "उन्नासी"
"अठहत्तर", # alternative "अठहतर"
"सतहत्तर", # alternative "सतहतर"
"छिहत्तर", # alternative "छिहतर"
"पचहत्तर", # alternative "पचहतर"
"चौहत्तर", # alternative "चौहतर"
"तिहत्तर", # alternative "तिहतर"
"बहत्तर", # alternative "बहतर"
"इकहत्तर", # alternative "इकहतर"
"सत्तर",
"उनहत्तर", # alternative "उनहतर"
"अड़सठ", # alternative "अड़सठ"
"सड़सठ", # alternative "सड़सठ"
"छियासठ",
"पैंसठ",
"चौंसठ",
"तिरसठ",
"बासठ",
"इकसठ",
"साठ",
"उनसठ",
"अट्ठावन", # alternative "अठावन"
"सत्तावन", # alternative "सतावन"
"छप्पन",
"पचपन",
"चौवन",
"तिरेपन", # alternative "तिरपन"
"बावन",
"इक्यावन",
"पचास",
"उनचास",
"अड़तालीस", # alternative "अड़तालीस"
"सैंतालीस",
"छियालीस", # alternative "छयालिस"
"पैंतालीस",
"चौवालीस", # alternative "चवालीस"
"तैंतालीस", # alternative "तैतालीस"
"बयालीस",
"इकतालीस",
"चालीस",
"उनतालीस",
"अड़तीस", # alternative "अड़तीस"
"सैंतीस",
"छत्तीस", # alternative "छतीस"
"पैंतीस",
"चौंतीस",
"तैंतीस",
"बत्तीस", # alternative "बतीस"
"इकत्तीस", # alternative "इकतीस"
"तीस",
"उनतीस",
"अट्ठाईस", # alternative "अट्ठाइस"
"सत्ताईस", # alternative "सताइस"
"छब्बीस",
"पच्चीस",
"चौबीस",
"तेईस", # alternative "तेइस"
"बाईस",
"इक्कीस", # alternative "इकीस"
"बीस",
"उन्नीस",
"अट्ठारह", # alternative "अठारह"
"सत्रह",
"सोलह",
"पंद्रह",
"चौदह",
"तेरह",
"बारह",
"ग्यारह",
"दस",
"नौ",
"आठ",
"सात",
"छः", # alternative "छह"
"पाँच", # alternative "पांच"
"चार",
"तीन",
"दो",
"एक",
"शून्य",
]

self.mid_numwords = [(100, "सौ")]
self.high_numwords = [
(11, "ख़रब"),
(9, "अरब"),
(7, "करोड़"), # alternative "करोड़"
(5, "लाख"),
(3, "हज़ार"), # alternative "हज़ार"
]
self.pointword = "दशमलव"
self.negword = "माइनस "

def set_high_numwords(self, high):
for n, word in self.high_numwords:
self.cards[10**n] = word

def merge(self, lpair, rpair):
ltext, lnum = lpair
rtext, rnum = rpair
if lnum == 1 and rnum < 100:
return rtext, rnum
elif 100 > lnum > rnum:
return "%s-%s" % (ltext, rtext), lnum + rnum
elif lnum >= 100 > rnum:
return "%s %s" % (ltext, rtext), lnum + rnum
elif rnum > lnum:
return "%s %s" % (ltext, rtext), lnum * rnum
return "%s %s" % (ltext, rtext), lnum + rnum

def to_ordinal(self, value):
if value in self._irregular_ordinals:
return self._irregular_ordinals[value]

# regular Hindi ordinals are derived from cardinals
# by modifying the last member of the expression.
cardinal = self.to_cardinal(value)
return cardinal + self._regular_ordinal_suffix

def _convert_to_hindi_numerals(self, value):
return "".join(map(self._digits_to_hindi_digits.__getitem__,
str(value)))

def to_ordinal_num(self, value):
if value in self._irregular_ordinals_nums:
return self._irregular_ordinals_nums[value]

return self._convert_to_hindi_numerals(value) \
+ self._regular_ordinal_suffix
Loading

0 comments on commit b66644c

Please sign in to comment.