You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
It looks like the transliterateBrahmic function was written before some scripts were added to Unicode, which are recognized by Javascript as multi-byte characters (e.g., grantha). This is probably why you're having trouble with transliterating from "superscripted" Tamil. To be able to transliterate from multi-byte or multi-character "Brahmic" scripts, you'll need to change the transliterateBrahmic function to mirror transliterateRoman. I'm using an older version of Sanscript.js, but it should look something like this:
var transliterateBrahmic = function(data, map, options) {
var buf = [],
consonants = map.consonants,
hadRomanConsonant = false,
letters = map.letters,
marks = map.marks,
dataLength = data.length,
maxTokenLength = map.maxTokenLength,
tempLetter,
tokenBuffer = '',
toRoman = map.toRoman,
skippingTrans = false;
for (var i = 0, L; (L = data.charAt(i)) || tokenBuffer; i++) {
// Fill the token buffer, if possible.
var difference = maxTokenLength - tokenBuffer.length;
if (difference > 0 && i < dataLength) {
tokenBuffer += L;
if (difference > 1) {
continue;
}
}
// Match all token substrings to our map.
for (var j = 0; j < maxTokenLength; j++) {
var token = tokenBuffer.substr(0,maxTokenLength-j);
if((tempLetter = marks[token]) !== undefined && !skippingTrans) {
buf.push(tempLetter);
hadRomanConsonant = false;
tokenBuffer = tokenBuffer.substr(maxTokenLength-j);
break;
}
else if((tempLetter = letters[token])) {
if (hadRomanConsonant) {
buf.push('a');
hadRomanConsonant = false;
}
buf.push(tempLetter);
hadRomanConsonant = toRoman && (token in consonants);
tokenBuffer = tokenBuffer.substr(maxTokenLength-j);
break;
} else if (j === maxTokenLength - 1) {
if (hadRomanConsonant) {
buf.push('a');
hadRomanConsonant = false;
}
buf.push(token);
tokenBuffer = tokenBuffer.substr(1);
}
}
}
if (hadRomanConsonant) {
buf.push('a');
}
return buf.join('');
};
To make this work with your version of Sanscript.js, you'll need to change buf.push('a') to buf.push(map.toSchemeA). You'll probably also want to add in a check for the "#" character to skip transliteration.
It might also be nice to re-name the "transliterateBrahmic" function to something like "transliterateAbugida", since that's the issue here (it's about the inherent vowel) and "transliterateRoman" to something like "transliterateAlphabet" (no inherent vowel).
The text was updated successfully, but these errors were encountered:
Hi,
It looks like the transliterateBrahmic function was written before some scripts were added to Unicode, which are recognized by Javascript as multi-byte characters (e.g., grantha). This is probably why you're having trouble with transliterating from "superscripted" Tamil. To be able to transliterate from multi-byte or multi-character "Brahmic" scripts, you'll need to change the transliterateBrahmic function to mirror transliterateRoman. I'm using an older version of Sanscript.js, but it should look something like this:
To make this work with your version of Sanscript.js, you'll need to change
buf.push('a')
tobuf.push(map.toSchemeA)
. You'll probably also want to add in a check for the "#" character to skip transliteration.It might also be nice to re-name the "transliterateBrahmic" function to something like "transliterateAbugida", since that's the issue here (it's about the inherent vowel) and "transliterateRoman" to something like "transliterateAlphabet" (no inherent vowel).
The text was updated successfully, but these errors were encountered: