diff --git a/finetune_nullspace.sh b/finetune_nullspace.sh index 8874cac..267db71 100755 --- a/finetune_nullspace.sh +++ b/finetune_nullspace.sh @@ -3,75 +3,75 @@ PHONEMES="phonemes_nullspace" SPEAKERS_NULLSPACE="speakers_nullspace" DATASET_PATH=false -TRAIN_SET=false -VALIDATION_SET=false -CHECKPOINT_PATH=false -OUTPUT_DIR=false +TRAIN_SPLIT_FILE_PATH=false +VALIDATION_SPLIT_FILE_PATH=false +BASELINE_NO_CLUSTERING_CHECKPOINT_PATH=false +SAVE_DIR=false DIM_INBETWEEN=false FROM_STEP=$SPEAKERS -PHONES_PATH=false +PHONEME_ALIGNMENTS_FILE=false print_usage() { echo -e "Usage: ./finetune_nullspace.sh" - echo -e "\t-d DATASET_PATH" - echo -e "\t-t TRAIN_SET" - echo -e "\t-v VALIDATION_SET" - echo -e "\t-c CHECKPOINT_PATH" - echo -e "\t-o OUTPUT_DIR" + echo -e "\t-d DATASET_PATH (E.g. LIBRISPEECH_DATASET_PATH/train-clean-100)" + echo -e "\t-t TRAIN_SPLIT_FILE_PATH (E.g. LIBRISPEECH_TRAIN_CLEAN_100_TRAIN_SPLIT_FILE_PATH)" + echo -e "\t-v VALIDATION_SPLIT_FILE_PATH (E.g. LIBRISPEECH_TRAIN_CLEAN_100_TEST_SPLIT_FILE_PATH)" + echo -e "\t-c BASELINE_NO_CLUSTERING_CHECKPOINT_PATH" + echo -e "\t-o SAVE_DIR" echo -e "\t-n DIM_INBETWEEN (Dimension of nullspace will be DIM_EMBEDDING - DIM_INBETWEEN)" - echo -e "OPTIONAL FLAGS:" - echo -e "\t-f FROM_STEP (From which step do you want to start. Order: $SPEAKERS -> $PHONEMES -> $SPEAKERS_NULLSPACE)" - echo -e "\t-p PHONES_PATH (Path to the file containing phonemes for the entire dataset. You don't need it if you start from $SPEAKERS_NULLSPACE)" + echo -e "\t-p PHONEME_ALIGNMENTS_FILE (Path to the file containing phonemes for the entire dataset)" + echo -e "OPTIONAL ARGS:" + echo -e "\t-f FROM_STEP (From which step do you want to start. Order: $SPEAKERS [default] -> $PHONEMES -> $SPEAKERS_NULLSPACE)" } while getopts 'd:t:v:c:o:n:f:p:' flag; do case "${flag}" in d) DATASET_PATH="${OPTARG}" ;; - t) TRAIN_SET="${OPTARG}" ;; - v) VALIDATION_SET="${OPTARG}" ;; - c) CHECKPOINT_PATH="${OPTARG}" ;; - o) OUTPUT_DIR="${OPTARG}" ;; + t) TRAIN_SPLIT_FILE_PATH="${OPTARG}" ;; + v) VALIDATION_SPLIT_FILE_PATH="${OPTARG}" ;; + c) BASELINE_NO_CLUSTERING_CHECKPOINT_PATH="${OPTARG}" ;; + o) SAVE_DIR="${OPTARG}" ;; n) DIM_INBETWEEN="${OPTARG}" ;; f) FROM_STEP="${OPTARG}" ;; - p) PHONES_PATH="${OPTARG}" ;; + p) PHONEME_ALIGNMENTS_FILE="${OPTARG}" ;; *) print_usage exit 1 ;; esac done -echo $DATASET_PATH $TRAIN_SET $VALIDATION_SET $CHECKPOINT_PATH $OUTPUT_DIR $DIM_INBETWEEN $FROM_STEP $PHONES_PATH +echo $DATASET_PATH $TRAIN_SPLIT_FILE_PATH $VALIDATION_SPLIT_FILE_PATH $BASELINE_NO_CLUSTERING_CHECKPOINT_PATH $SAVE_DIR $DIM_INBETWEEN $FROM_STEP $PHONEME_ALIGNMENTS_FILE -if [[ $DATASET_PATH == false || $TRAIN_SET == false || $VALIDATION_SET == false || $CHECKPOINT_PATH == false || $OUTPUT_DIR == false || $DIM_INBETWEEN == false || ( $PHONES_PATH == false && $FROM_STEP != $SPEAKERS ) ]] +if [[ $DATASET_PATH == false || $TRAIN_SPLIT_FILE_PATH == false || $VALIDATION_SPLIT_FILE_PATH == false || $BASELINE_NO_CLUSTERING_CHECKPOINT_PATH == false || $SAVE_DIR == false || $DIM_INBETWEEN == false || $PHONEME_ALIGNMENTS_FILE == false ]] then - echo "Either DATASET_PATH, TRAIN_SET, VALIDATION_SET, CHECKPOINT_PATH, OUTPUT_DIR or DIM_INBETWEEN is not set or there are invalid PHONES_PATH and FROM_STEP." + echo "Either DATASET_PATH, TRAIN_SPLIT_FILE_PATH, VALIDATION_SPLIT_FILE_PATH, BASELINE_NO_CLUSTERING_CHECKPOINT_PATH, SAVE_DIR, DIM_INBETWEEN or PHONEME_ALIGNMENTS_FILE is not set." print_usage exit 1 fi -mkdir -p $OUTPUT_DIR +mkdir -p $SAVE_DIR case $FROM_STEP in $SPEAKERS) echo $SPEAKERS - mkdir -p ${OUTPUT_DIR}/${SPEAKERS}_${DIM_INBETWEEN} - python cpc/eval/linear_separability.py $DATASET_PATH $TRAIN_SET $VALIDATION_SET $CHECKPOINT_PATH --pathCheckpoint ${OUTPUT_DIR}/${SPEAKERS}_${DIM_INBETWEEN} --mode $SPEAKERS --max_size_loaded 40000000 --n_process_loader 2 --model cpc --dim_inter $DIM_INBETWEEN --gru_level 2 + mkdir -p ${SAVE_DIR}/${SPEAKERS}_${DIM_INBETWEEN} + python cpc/eval/linear_separability.py $DATASET_PATH $TRAIN_SPLIT_FILE_PATH $VALIDATION_SPLIT_FILE_PATH $BASELINE_NO_CLUSTERING_CHECKPOINT_PATH --pathCheckpoint ${SAVE_DIR}/${SPEAKERS}_${DIM_INBETWEEN} --mode $SPEAKERS --max_size_loaded 40000000 --n_process_loader 2 --model cpc --dim_inter $DIM_INBETWEEN --gru_level 2 ;& $PHONEMES) echo $PHONEMES - mkdir -p ${OUTPUT_DIR}/${PHONEMES}_${DIM_INBETWEEN} - python cpc/eval/linear_separability.py $DATASET_PATH $TRAIN_SET $VALIDATION_SET $CHECKPOINT_PATH --pathCheckpoint ${OUTPUT_DIR}/${PHONEMES}_${DIM_INBETWEEN} --mode $PHONEMES --max_size_loaded 40000000 --n_process_loader 2 --model cpc --pathPhone $PHONES_PATH --path_speakers_factorized ${OUTPUT_DIR}/${SPEAKERS}_${DIM_INBETWEEN}/checkpoint_9.pt --dim_inter $DIM_INBETWEEN --gru_level 2 + mkdir -p ${SAVE_DIR}/${PHONEMES}_${DIM_INBETWEEN} + python cpc/eval/linear_separability.py $DATASET_PATH $TRAIN_SPLIT_FILE_PATH $VALIDATION_SPLIT_FILE_PATH $BASELINE_NO_CLUSTERING_CHECKPOINT_PATH --pathCheckpoint ${SAVE_DIR}/${PHONEMES}_${DIM_INBETWEEN} --mode $PHONEMES --max_size_loaded 40000000 --n_process_loader 2 --model cpc --pathPhone $PHONEME_ALIGNMENTS_FILE --path_speakers_factorized ${SAVE_DIR}/${SPEAKERS}_${DIM_INBETWEEN}/checkpoint_9.pt --dim_inter $DIM_INBETWEEN --gru_level 2 ;& $SPEAKERS_NULLSPACE) echo $SPEAKERS_NULLSPACE - mkdir -p ${OUTPUT_DIR}/${SPEAKERS_NULLSPACE}_${DIM_INBETWEEN} - python cpc/eval/linear_separability.py $DATASET_PATH $TRAIN_SET $VALIDATION_SET $CHECKPOINT_PATH --pathCheckpoint ${OUTPUT_DIR}/${SPEAKERS_NULLSPACE}_${DIM_INBETWEEN} --mode $SPEAKERS_NULLSPACE --max_size_loaded 40000000 --n_process_loader 2 --model cpc --path_speakers_factorized ${OUTPUT_DIR}/${SPEAKERS}_${DIM_INBETWEEN}/checkpoint_9.pt --dim_inter $DIM_INBETWEEN --gru_level 2 + mkdir -p ${SAVE_DIR}/${SPEAKERS_NULLSPACE}_${DIM_INBETWEEN} + python cpc/eval/linear_separability.py $DATASET_PATH $TRAIN_SPLIT_FILE_PATH $VALIDATION_SPLIT_FILE_PATH $BASELINE_NO_CLUSTERING_CHECKPOINT_PATH --pathCheckpoint ${SAVE_DIR}/${SPEAKERS_NULLSPACE}_${DIM_INBETWEEN} --mode $SPEAKERS_NULLSPACE --max_size_loaded 40000000 --n_process_loader 2 --model cpc --path_speakers_factorized ${SAVE_DIR}/${SPEAKERS}_${DIM_INBETWEEN}/checkpoint_9.pt --dim_inter $DIM_INBETWEEN --gru_level 2 ;; *) echo "Invalid from step: ${FROM_STEP} while it should be either ${SPEAKERS}, ${PHONEMES} or ${SPEAKERS_NULLSPACE}" ;; esac -echo "Checkpoint with nullspace is located in ${OUTPUT_DIR}/${PHONEMES}_${DIM_INBETWEEN}/checkpoint_9.pt" -echo "The results of all the experiments are located in ${OUTPUT_DIR}/DIRECTORY/checkpoint_logs.json" +echo "Checkpoint with nullspace is located in ${SAVE_DIR}/${PHONEMES}_${DIM_INBETWEEN}/checkpoint_9.pt" +echo "The results of all the experiments are located in ${SAVE_DIR}/DIRECTORY/checkpoint_logs.json" exit 0 \ No newline at end of file diff --git a/scripts/create_ls_dataset_for_abx_eval.py b/scripts/create_ls_dataset_for_abx_eval.py new file mode 100755 index 0000000..13babd4 --- /dev/null +++ b/scripts/create_ls_dataset_for_abx_eval.py @@ -0,0 +1,56 @@ +import os +import sys +import shutil +import argparse +from pathlib import Path +import numpy as np +import soundfile as sf + +def parse_args(): + # Run parameters + parser = argparse.ArgumentParser() + parser.add_argument("librispeech_path", type=str, + help="Path to the root directory of LibriSpeech.") + parser.add_argument("zerospeech_dataset_path", type=str, + help="Path to the ZeroSpeech dataset.") + parser.add_argument("target_path", type=str, + help="Path to the output directory.") + parser.add_argument("--file_extension", type=str, default="flac", + help="Extension of the audio files in the dataset (default: flac).") + return parser.parse_args() + +def main(): + # Parse and print args + args = parse_args() + logger.info(args) + + phonetic = "phonetic" + datasets = ["dev-clean", "dev-other", "test-clean", "test-other"] + + for dataset in datasets: + print("> {}".format(dataset)) + target_dirname = os.path.join(args.target_path, phonetic, dataset) + Path(target_dirname).mkdir(parents=True, exist_ok=True) + + librispeech_dirname = os.path.join(args.librispeech_path, dataset) + files = [(filename, dirname) for dirname, _, files in os.walk(librispeech_dirname, followlinks=True) for filename in files if filename.endswith(args.file_extension)] + for i, (filename, dirname) in enumerate(files): + print("Progress {:2.1%}".format(i / len(files)), end="\r") + input_path = os.path.join(dirname, filename) + output_path = os.path.join(target_dirname, os.path.splitext(filename)[0] + ".wav") + data, sample_rate = sf.read(input_path) + sf.write(output_path, data, sample_rate) + + if dataset.startswith("dev"): + source_item_path = os.path.join(args.zerospeech_dataset_path, phonetic, dataset, dataset + ".item") + target_item_path = os.path.join(target_dirname, dataset + ".item") + shutil.copy(source_item_path, target_item_path) + + +if __name__ == "__main__": + #import ptvsd + #ptvsd.enable_attach(('0.0.0.0', 7310)) + #print("Attach debugger now") + #ptvsd.wait_for_attach() + main() + diff --git a/scripts/embeddings_abx.py b/scripts/embeddings_abx.py index 8e68a8c..4b6ab62 100644 --- a/scripts/embeddings_abx.py +++ b/scripts/embeddings_abx.py @@ -1,9 +1,4 @@ #!/usr/bin/env python3 -u -# !/usr/bin/env python3 -u -# Copyright (c) Facebook, Inc. and its affiliates. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. import logging import os diff --git a/scripts/eval_abx.sh b/scripts/eval_abx.sh index 5475b39..7bdd9ce 100755 --- a/scripts/eval_abx.sh +++ b/scripts/eval_abx.sh @@ -1,66 +1,66 @@ ########## CHANGE THIS ################## -ZEROSPEECH_EVAL_ENV=zerospeech2021 # Where the zerospeech2021-evaluate is installed -CPC_ENV=202010-fairseq-c11 +ZEROSPEECH_EVAL_ENVIRONMENT=zerospeech2021 # Where the zerospeech2021-evaluate is installed +CPC_ENVIRONMENT=202010-fairseq-c11 CONDA_PATH=/pio/scratch/2/i273233/miniconda3 ######################################### DATASET_PATH=false -ORIGINAL_DATASET_PATH=false +ZEROSPEECH_DATASET_PATH=false CHECKPOINT_PATH=false -OUTPUT_DIR=false +SAVE_DIR=false NULLSPACE=false NO_TEST=false print_usage() { echo -e "Usage: ./eval_abx.sh" - echo -e "\t-d DATASET_PATH" - echo -e "\t-r ORIGINAL_DATASET_PATH" + echo -e "\t-d DATASET_PATH (Either ZEROSPEECH_DATASET_PATH or LIBRISPEECH_FLATTENED_DATASET_PATH [Or anything that has directory structure of these two with dev-*.item files from ZEROSPEECH_DATASET_PATH])" + echo -e "\t-r ZEROSPEECH_DATASET_PATH" echo -e "\t-c CHECKPOINT_PATH" - echo -e "\t-o OUTPUT_DIR" - echo -e "OPTIONAL FLAGS:" - echo -e "\t-n (Load a model with nullspace)" + echo -e "\t-o SAVE_DIR" + echo -e "OPTIONAL ARGS:" + echo -e "\t-n (Provide this flag if you want to load a model with nullspace)" echo -e "\t-a CONDA_PATH" - echo -e "\t-e CPC_ENV" - echo -e "\t-z ZEROSPEECH_EVAL_ENV (The conda environment where the zerospeech2021-evaluate is installed)" + echo -e "\t-e CPC_ENVIRONMENT" + echo -e "\t-z ZEROSPEECH_EVAL_ENVIRONMENT (The conda environment where the zerospeech2021-evaluate is installed)" echo -e "\t-t (Do not compute embeddings for test set)" } while getopts 'd:r:c:o:na:e:z:t' flag; do case "${flag}" in d) DATASET_PATH="${OPTARG}" ;; - r) ORIGINAL_DATASET_PATH="${OPTARG}" ;; + r) ZEROSPEECH_DATASET_PATH="${OPTARG}" ;; c) CHECKPOINT_PATH="${OPTARG}" ;; - o) OUTPUT_DIR="${OPTARG}" ;; + o) SAVE_DIR="${OPTARG}" ;; n) NULLSPACE=true ;; a) CONDA_PATH="${OPTARG}" ;; - e) CPC_ENV="${OPTARG}" ;; - z) ZEROSPEECH_EVAL_ENV="${OPTARG}" ;; + e) CPC_ENVIRONMENT="${OPTARG}" ;; + z) ZEROSPEECH_EVAL_ENVIRONMENT="${OPTARG}" ;; t) NO_TEST=true ;; *) print_usage exit 1 ;; esac done -echo $DATASET_PATH $ORIGINAL_DATASET_PATH $CHECKPOINT_PATH $OUTPUT_DIR $NULLSPACE $CONDA_PATH $CPC_ENV $ZEROSPEECH_EVAL_ENV $NO_TEST +echo $DATASET_PATH $ZEROSPEECH_DATASET_PATH $CHECKPOINT_PATH $SAVE_DIR $NULLSPACE $CONDA_PATH $CPC_ENVIRONMENT $ZEROSPEECH_EVAL_ENVIRONMENT $NO_TEST -if [[ $DATASET_PATH == false || $ORIGINAL_DATASET_PATH == false || $CHECKPOINT_PATH == false || $OUTPUT_DIR == false ]] +if [[ $DATASET_PATH == false || $ZEROSPEECH_DATASET_PATH == false || $CHECKPOINT_PATH == false || $SAVE_DIR == false ]] then - echo "Either DATASET_PATH or ORIGINAL_DATASET_PATH or CHECKPOINT_PATH or OUTPUT_DIR is not set." + echo "Either DATASET_PATH or ZEROSPEECH_DATASET_PATH or CHECKPOINT_PATH or SAVE_DIR is not set." print_usage exit 1 fi SCRIPT_PATH="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )" -results=$OUTPUT_DIR/results -embeddings=$OUTPUT_DIR/embeddings +results=$SAVE_DIR/results +embeddings=$SAVE_DIR/embeddings mkdir -p embeddings source $CONDA_PATH/etc/profile.d/conda.sh -SAVED_ENV=$(conda info | sed -n 's/\( \)*active environment : //p') -echo SAVED_ENV: $SAVED_ENV +SAVED_ENVIRONMENT=$(conda info | sed -n 's/\( \)*active environment : //p') +echo SAVED_ENVIRONMENT: $SAVED_ENVIRONMENT -ENV_TO_ACTIVATE=$CPC_ENV +ENV_TO_ACTIVATE=$CPC_ENVIRONMENT conda activate $ENV_TO_ACTIVATE params="" @@ -92,7 +92,7 @@ do for file in `ls $embeddings/$i/phonetic/$directory` do filename_no_ext="${file%.*}" - if [[ ! -f "$ORIGINAL_DATASET_PATH/phonetic/$directory/${filename_no_ext}.wav" ]] + if [[ ! -f "$ZEROSPEECH_DATASET_PATH/phonetic/$directory/${filename_no_ext}.wav" ]] then rm $embeddings/$i/phonetic/$directory/$file fi @@ -100,7 +100,7 @@ do done done -conda activate $ZEROSPEECH_EVAL_ENV +conda activate $ZEROSPEECH_EVAL_ENVIRONMENT frame_shift="0.01" echo "Frame shift is ${frame_shift}s" @@ -127,9 +127,9 @@ EOF do cp $embeddings/$metric.yaml $embeddings/$i/meta.yaml #zerospeech2021-evaluate -j 12 -o $results/$metric/$i --no-lexical --no-syntactic --no-semantic $DATASET_PATH $embeddings/$i - #zerospeech2021-evaluate -j 12 -o $results/$metric/$i --force-cpu --no-lexical --no-syntactic --no-semantic $ORIGINAL_DATASET_PATH $embeddings/$i - #zerospeech2021-evaluate -j 20 -o $results/$metric/$i --force-cpu --no-lexical --no-syntactic --no-semantic $ORIGINAL_DATASET_PATH $embeddings/$i - zerospeech2021-evaluate -j 20 -o $results/$metric/$i --no-lexical --no-syntactic --no-semantic $ORIGINAL_DATASET_PATH $embeddings/$i + #zerospeech2021-evaluate -j 12 -o $results/$metric/$i --force-cpu --no-lexical --no-syntactic --no-semantic $ZEROSPEECH_DATASET_PATH $embeddings/$i + #zerospeech2021-evaluate -j 20 -o $results/$metric/$i --force-cpu --no-lexical --no-syntactic --no-semantic $ZEROSPEECH_DATASET_PATH $embeddings/$i + zerospeech2021-evaluate -j 20 -o $results/$metric/$i --no-lexical --no-syntactic --no-semantic $ZEROSPEECH_DATASET_PATH $embeddings/$i done done @@ -141,6 +141,6 @@ do cat $results/$metric/$i/score_phonetic.csv echo done -done > $OUTPUT_DIR/combined_results.txt +done > $SAVE_DIR/combined_results.txt -conda activate $SAVED_ENV \ No newline at end of file +conda activate $SAVED_ENVIRONMENT \ No newline at end of file