-
Notifications
You must be signed in to change notification settings - Fork 87
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add machine_translate_pofile management command #263
base: main
Are you sure you want to change the base?
Conversation
Allows using Wagtail Localize's configured machine translator to translate a PO file. Useful for testing translation.
I think this one needs a rebase then some tests and docs. Let me know if you'd like me to do this. |
@kaedroho if you have the time, then yes please! |
c7c2117
to
1fc56b0
Compare
I have implemented this in my own project with machine translations. It allows for translating your entire project's PO files - you might want to add the fuzzy flag. You might want to take pluralization into account. Just my thoughts: You are changing from django.core.management.base import BaseCommand, CommandParser
from django.conf import settings
from django.utils.html import strip_tags
from django.utils import timezone
from wagtail.models import Locale
from wagtail_localize.machine_translators import get_machine_translator
from wagtail_localize.models import Translation
import polib, os, glob, re
from wagtail_ai.translations.base import BaseAITranslator
def get_translation_locale(po: polib.POFile, options: dict = None) -> tuple[Translation, Locale, Locale]:
translation = None
# Retrieve source and target locale from PO file metadata
# Optionally retrieve the translation object
if po and ("X-WagtailLocalize-TranslationID" in po.metadata):
translation = po.metadata["X-WagtailLocalize-TranslationID"]
translation = Translation.objects.get(uuid=translation)
source_locale = Locale.objects.get(language_code=translation.source.locale.language_code)
target_locale = Locale.objects.get(language_code=translation.target_locale.language_code)
# Source and target provided by custom attributes in PO file metadata
elif po and ("X-Src-Lang" in po.metadata and "X-Dst-Lang" in po.metadata):
source_locale = Locale.objects.get(language_code=po.metadata["X-Src-Lang"])
target_locale = Locale.objects.get(language_code=po.metadata["X-Dst-Lang"])
elif po and ("X-Machine-Translator-Source" in po.metadata and "X-Machine-Translator-Target" in po.metadata):
source_locale = Locale.objects.get(language_code=po.metadata["X-Machine-Translator-Source"])
target_locale = Locale.objects.get(language_code=po.metadata["X-Machine-Translator-Target"])
# Source and target provided by command line argument
elif options and ("source" in options and "target" in options) and (options["source"] and options["target"]):
source_locale = Locale.objects.get(language_code=options["source"])
target_locale = Locale.objects.get(language_code=options["target"])
elif options and ("target" in options) and (options["target"]):
source = settings.LANGUAGE_CODE
target = options["target"]
source_locale = Locale.objects.get(language_code=source)
target_locale = Locale.objects.get(language_code=target)
# Source and target does not exist
else:
raise RuntimeError(f"Could not determine source and target locale for {po.fpath}, please specify --source and --target")
return translation, source_locale, target_locale
is_html_re = re.compile(r"</?\s*[a-z-][^>]*\s*>|(\&(?:[\w\d]+|#\d+|#x[a-f\d]+);)")
def is_html(text: str) -> bool:
return bool(is_html_re.search(text))
class Command(BaseCommand):
help = "Run a wagtail_localize machine translator on a PO file based on settings defined in settings.py"
def add_arguments(self, parser: CommandParser) -> None:
parser.add_argument("-d", "--data", type=str, default=None, nargs="+")
parser.add_argument("-s", "--source", type=str, default=None, help="Source locale")
parser.add_argument("-t", "--target", type=str, default=None, help="Target locale")
parser.add_argument("-p", "--project", type=bool, default=False, help="Translate all PO files in project directory")
return super().add_arguments(parser)
def handle(self, *args, **options):
self.data = options["data"]
self.translator: BaseAITranslator = get_machine_translator()
if options["project"]:
return self.translate_project(options=options)
if not self.data:
self.stderr.write("No files specified")
return
for file in self.data:
try:
po = polib.pofile(file)
filename = os.path.splitext(os.path.basename(file))[0]
file_dir = os.path.dirname(file)
translation, source_locale, target_locale = get_translation_locale(
po, options=options,
)
self.stdout.write(f"Translating {file} from {source_locale} to {target_locale}")
# Check if translator supports source and target locale
if not self.translator.can_translate(source_locale, target_locale):
self.stderr.write(f"Translator {self.translator.display_name} does not support {source_locale} to {target_locale}: {file}")
continue
# Range over all entries in PO file, if set a check if any entries are translated
any_traslated = self.translate_po(
source_locale=source_locale,
target_locale=target_locale,
po=po,
)
if not any_traslated:
self.stderr.write(f"No entries translated in {file}")
continue
# Set PO file metadata
po.metadata["X-Machine-Translator"] = self.translator.display_name
po.metadata["X-Machine-Translator-Creation"] = str(timezone.now().isoformat())
po.metadata["X-Machine-Translator-Source"] = source_locale.language_code
po.metadata["X-Machine-Translator-Target"] = target_locale.language_code
# Save PO file
self.stdout.write(f"Saving {file}")
po.save(
os.path.join(
file_dir,
f"{filename}_machine_translated.po",
),
)
# wagtail_localize translation exists, import PO file
if translation:
self.stdout.write(f"Importing {file} into {translation}")
warnings = translation.import_po(
po,
delete=False,
user=None,
translation_type="machine",
tool_name=self.translator.display_name,
)
if warnings:
for warning in warnings:
self.stderr.write(f"Warning received importing {file}: {warning}")
else:
self.stdout.write(f"No wagtail_localize translation found for {file}, skipping PO import")
# Catch exceptions and log errors to user.
except OSError as e:
self.stderr.write(f"Could not open: {file} ({e})")
continue
except Locale.DoesNotExist as e:
self.stderr.write(f"Could not find locale {source_locale} or {target_locale}: {file} ({e})")
continue
except Translation.DoesNotExist as e:
self.stderr.write(f"Could not find translation {translation}: {file} ({e})")
continue
def translate_project(self, options: dict):
_, source_locale, target_locale = get_translation_locale(
po=None,
options=options,
)
if not source_locale or not target_locale:
self.stderr.write("Please specify --source and --target")
return
fpath = os.path.join(
settings.BASE_DIR,
f"**/locale/{target_locale.language_code}/LC_MESSAGES/*.po"
)
fpath = fpath.replace("\\", os.sep)
fpath = fpath.replace("/", os.sep)
self.stdout.write(f"Translating all PO files in project from {source_locale} to {target_locale}, looking in {fpath}")
globbed = glob.glob(fpath)
if not globbed:
self.stderr.write(f"No PO files found in {fpath}")
return
self.stdout.write(f"Found {len(globbed)} PO files in {fpath}")
for file in globbed:
self.stdout.write(f"\tTranslating {file}")
for file in globbed:
po = polib.pofile(file)
translated = self.translate_po(
source_locale=source_locale,
target_locale=target_locale,
po=po,
)
if translated:
self.stdout.write(f"Saving translations for {file}")
po.save(file)
else:
self.stderr.write(f"No entries translated in {file}")
def translate_po(self, source_locale: Locale, target_locale: Locale, po: polib.POFile) -> bool:
any_traslated = False
for index in range(len(po)):
entry = po[index]
# Skip already translated entries
if entry.translated():
# if settings.DEBUG:
# self.stdout.write(f"Skipping entry {index + 1} in {file}")
continue
po[index] = self.translate_entry(
source_locale=source_locale,
target_locale=target_locale,
entry=entry,
)
any_traslated = True
return any_traslated
def translate_entry(self, source_locale: Locale, target_locale: Locale, entry: polib.POEntry) -> polib.POEntry:
# Entry might be pluralized, translate accordingly
if entry.msgid_plural:
msg_id = entry.msgid
msg_id_plural = entry.msgid_plural
if not self.translator.supports_html and is_html(msg_id):
msg_id = strip_tags(msg_id)
msg_id_plural = strip_tags(msg_id_plural)
entry.msgstr_plural = {
"0": self.translator.translate_text(
source_locale=source_locale,
target_locale=target_locale,
text=msg_id,
),
"1": self.translator.translate_text(
source_locale=source_locale,
target_locale=target_locale,
text=msg_id_plural,
),
}
else:
text = entry.msgid
if not self.translator.supports_html and is_html(text):
text = strip_tags(text)
entry.msgstr = self.translator.translate_text(
source_locale=source_locale,
target_locale=target_locale,
text=text
)
return entry
|
Allows using Wagtail Localize's configured machine translator to translate a PO file. Useful for testing translation.