Skip to content

Commit

Permalink
nllb
Browse files Browse the repository at this point in the history
  • Loading branch information
jordimas committed Aug 15, 2024
1 parent 81ea7fe commit f414a4b
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 19 deletions.
28 changes: 12 additions & 16 deletions languages/eng-cat/corpus.yml
Original file line number Diff line number Diff line change
@@ -1,20 +1,16 @@
source_files:
- corpus-raw/aina-eng-cat-1.en
- corpus-raw/aina-eng-cat-2.en
- corpus-raw/aina-eng-cat-3.en
- corpus-raw/aina-eng-cat-4.en
- corpus-raw/aina-eng-cat-5.en
- corpus-raw/aina-eng-cat-6.en
- corpus-raw/aina-eng-cat-7.en
- corpus-raw/aina-eng-cat-8.en
- corpus-raw/nllb-eng-cat-1.en
- corpus-raw/nllb-eng-cat-2.en
- corpus-raw/nllb-eng-cat-3.en
- corpus-raw/nllb-eng-cat-4.en
- corpus-raw/nllb-eng-cat-5.en
- corpus-raw/nllb-eng-cat-6.en

target_files:
- corpus-raw/aina-eng-cat-1.ca
- corpus-raw/aina-eng-cat-2.ca
- corpus-raw/aina-eng-cat-3.ca
- corpus-raw/aina-eng-cat-4.ca
- corpus-raw/aina-eng-cat-5.ca
- corpus-raw/aina-eng-cat-6.ca
- corpus-raw/aina-eng-cat-7.ca
- corpus-raw/aina-eng-cat-8.ca
- corpus-raw/nllb-eng-cat-1.ca
- corpus-raw/nllb-eng-cat-2.ca
- corpus-raw/nllb-eng-cat-3.ca
- corpus-raw/nllb-eng-cat-4.ca
- corpus-raw/nllb-eng-cat-5.ca
- corpus-raw/nllb-eng-cat-6.ca

2 changes: 1 addition & 1 deletion languages/get-corpuses.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/bin/bash

#git clone https://github.com/Softcatala/parallel-catalan-corpus corpus-raw
git clone --single-branch --branch eng-cat-aina --depth 1 https://github.com/Softcatala/parallel-catalan-corpus corpus-raw
git clone --single-branch --branch eng-cat-nllb --depth 1 https://github.com/Softcatala/parallel-catalan-corpus corpus-raw
cd corpus-raw

declare -a arr=("eng-cat" "deu-cat" "ita-cat" "fra-cat" "spa-cat" "nld-cat" "por-cat" "jpn-cat" "glg-cat" "oci-cat" "eus-cat")
Expand Down
4 changes: 2 additions & 2 deletions languages/train-all-to-cat.sh
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
#!/bin/bash

declare -a arr=("eng-cat" "deu-cat" "fra-cat" "ita-cat" "spa-cat" "por-cat" "nld-cat" "jpn-cat" "glg-cat" "oci-cat" "eus-cat")
#declare -a arr=("eng-cat")
declare -a arr=("eng-cat")

for dirname in "${arr[@]}"; do
echo Processing $dirname
pushd $dirname
# Done at pre-process-all.sh
#./preprocess.sh
./preprocess.sh
rm -r -f run/
./voc.sh
./train.sh
Expand Down

0 comments on commit f414a4b

Please sign in to comment.