diff --git a/languages/eng-cat/corpus.yml b/languages/eng-cat/corpus.yml index 7d7c8d0..50a43ba 100644 --- a/languages/eng-cat/corpus.yml +++ b/languages/eng-cat/corpus.yml @@ -1,20 +1,16 @@ source_files: - - corpus-raw/aina-eng-cat-1.en - - corpus-raw/aina-eng-cat-2.en - - corpus-raw/aina-eng-cat-3.en - - corpus-raw/aina-eng-cat-4.en - - corpus-raw/aina-eng-cat-5.en - - corpus-raw/aina-eng-cat-6.en - - corpus-raw/aina-eng-cat-7.en - - corpus-raw/aina-eng-cat-8.en + - corpus-raw/nllb-eng-cat-1.en + - corpus-raw/nllb-eng-cat-2.en + - corpus-raw/nllb-eng-cat-3.en + - corpus-raw/nllb-eng-cat-4.en + - corpus-raw/nllb-eng-cat-5.en + - corpus-raw/nllb-eng-cat-6.en target_files: - - corpus-raw/aina-eng-cat-1.ca - - corpus-raw/aina-eng-cat-2.ca - - corpus-raw/aina-eng-cat-3.ca - - corpus-raw/aina-eng-cat-4.ca - - corpus-raw/aina-eng-cat-5.ca - - corpus-raw/aina-eng-cat-6.ca - - corpus-raw/aina-eng-cat-7.ca - - corpus-raw/aina-eng-cat-8.ca + - corpus-raw/nllb-eng-cat-1.ca + - corpus-raw/nllb-eng-cat-2.ca + - corpus-raw/nllb-eng-cat-3.ca + - corpus-raw/nllb-eng-cat-4.ca + - corpus-raw/nllb-eng-cat-5.ca + - corpus-raw/nllb-eng-cat-6.ca diff --git a/languages/get-corpuses.sh b/languages/get-corpuses.sh index c425ea5..9188680 100755 --- a/languages/get-corpuses.sh +++ b/languages/get-corpuses.sh @@ -1,7 +1,7 @@ #!/bin/bash #git clone https://github.com/Softcatala/parallel-catalan-corpus corpus-raw -git clone --single-branch --branch eng-cat-aina --depth 1 https://github.com/Softcatala/parallel-catalan-corpus corpus-raw +git clone --single-branch --branch eng-cat-nllb --depth 1 https://github.com/Softcatala/parallel-catalan-corpus corpus-raw cd corpus-raw declare -a arr=("eng-cat" "deu-cat" "ita-cat" "fra-cat" "spa-cat" "nld-cat" "por-cat" "jpn-cat" "glg-cat" "oci-cat" "eus-cat") diff --git a/languages/train-all-to-cat.sh b/languages/train-all-to-cat.sh index b37b53a..5b4f5c8 100755 --- a/languages/train-all-to-cat.sh +++ b/languages/train-all-to-cat.sh @@ -1,13 +1,13 @@ #!/bin/bash declare -a arr=("eng-cat" "deu-cat" "fra-cat" "ita-cat" "spa-cat" "por-cat" "nld-cat" "jpn-cat" "glg-cat" "oci-cat" "eus-cat") -#declare -a arr=("eng-cat") +declare -a arr=("eng-cat") for dirname in "${arr[@]}"; do echo Processing $dirname pushd $dirname # Done at pre-process-all.sh - #./preprocess.sh + ./preprocess.sh rm -r -f run/ ./voc.sh ./train.sh