diff --git a/finetune_nullspace.sh b/finetune_nullspace.sh index e250170..8874cac 100755 --- a/finetune_nullspace.sh +++ b/finetune_nullspace.sh @@ -1,30 +1,77 @@ -SAVE_DIR="/pio/scratch/1/i273233/linear_separability/cpc/gru_level2/cpc_official" SPEAKERS="speakers_factorized" PHONEMES="phonemes_nullspace" SPEAKERS_NULLSPACE="speakers_nullspace" -DIM_INTER=$1 +DATASET_PATH=false +TRAIN_SET=false +VALIDATION_SET=false +CHECKPOINT_PATH=false +OUTPUT_DIR=false +DIM_INBETWEEN=false FROM_STEP=$SPEAKERS -if [[ $# -ge 2 ]]; then - FROM_STEP=$2 +PHONES_PATH=false + +print_usage() { + echo -e "Usage: ./finetune_nullspace.sh" + echo -e "\t-d DATASET_PATH" + echo -e "\t-t TRAIN_SET" + echo -e "\t-v VALIDATION_SET" + echo -e "\t-c CHECKPOINT_PATH" + echo -e "\t-o OUTPUT_DIR" + echo -e "\t-n DIM_INBETWEEN (Dimension of nullspace will be DIM_EMBEDDING - DIM_INBETWEEN)" + echo -e "OPTIONAL FLAGS:" + echo -e "\t-f FROM_STEP (From which step do you want to start. Order: $SPEAKERS -> $PHONEMES -> $SPEAKERS_NULLSPACE)" + echo -e "\t-p PHONES_PATH (Path to the file containing phonemes for the entire dataset. You don't need it if you start from $SPEAKERS_NULLSPACE)" +} + +while getopts 'd:t:v:c:o:n:f:p:' flag; do + case "${flag}" in + d) DATASET_PATH="${OPTARG}" ;; + t) TRAIN_SET="${OPTARG}" ;; + v) VALIDATION_SET="${OPTARG}" ;; + c) CHECKPOINT_PATH="${OPTARG}" ;; + o) OUTPUT_DIR="${OPTARG}" ;; + n) DIM_INBETWEEN="${OPTARG}" ;; + f) FROM_STEP="${OPTARG}" ;; + p) PHONES_PATH="${OPTARG}" ;; + *) print_usage + exit 1 ;; + esac +done + +echo $DATASET_PATH $TRAIN_SET $VALIDATION_SET $CHECKPOINT_PATH $OUTPUT_DIR $DIM_INBETWEEN $FROM_STEP $PHONES_PATH + +if [[ $DATASET_PATH == false || $TRAIN_SET == false || $VALIDATION_SET == false || $CHECKPOINT_PATH == false || $OUTPUT_DIR == false || $DIM_INBETWEEN == false || ( $PHONES_PATH == false && $FROM_STEP != $SPEAKERS ) ]] +then + echo "Either DATASET_PATH, TRAIN_SET, VALIDATION_SET, CHECKPOINT_PATH, OUTPUT_DIR or DIM_INBETWEEN is not set or there are invalid PHONES_PATH and FROM_STEP." + print_usage + exit 1 fi +mkdir -p $OUTPUT_DIR + case $FROM_STEP in $SPEAKERS) echo $SPEAKERS - mkdir -p ${SAVE_DIR}_${SPEAKERS}_${DIM_INTER} && python cpc/eval/linear_separability.py $zd/LibriSpeech/train-clean-100/ $zd/LibriSpeech/labels_split/train_split_100.txt $zd/LibriSpeech/labels_split/test_split_100.txt $zd/checkpoints/CPC-big-kmeans50/cpc_ll6k/checkpoint_32.pt --pathCheckpoint ${SAVE_DIR}_${SPEAKERS}_${DIM_INTER} --mode $SPEAKERS --max_size_loaded 40000000 --n_process_loader 2 --model cpc --dim_inter $DIM_INTER --gru_level 2 | tee ${SAVE_DIR}_${SPEAKERS}_${DIM_INTER}/log.txt + mkdir -p ${OUTPUT_DIR}/${SPEAKERS}_${DIM_INBETWEEN} + python cpc/eval/linear_separability.py $DATASET_PATH $TRAIN_SET $VALIDATION_SET $CHECKPOINT_PATH --pathCheckpoint ${OUTPUT_DIR}/${SPEAKERS}_${DIM_INBETWEEN} --mode $SPEAKERS --max_size_loaded 40000000 --n_process_loader 2 --model cpc --dim_inter $DIM_INBETWEEN --gru_level 2 ;& $PHONEMES) echo $PHONEMES - mkdir -p ${SAVE_DIR}_${PHONEMES}_${DIM_INTER} && python cpc/eval/linear_separability.py $zd/LibriSpeech/train-clean-100/ $zd/LibriSpeech/labels_split/train_split_100.txt $zd/LibriSpeech/labels_split/test_split_100.txt $zd/checkpoints/CPC-big-kmeans50/cpc_ll6k/checkpoint_32.pt --pathCheckpoint ${SAVE_DIR}_${PHONEMES}_${DIM_INTER} --mode $PHONEMES --max_size_loaded 40000000 --n_process_loader 2 --model cpc --pathPhone $zd/LibriSpeech/alignments2/converted_aligned_phones.txt --path_speakers_factorized ${SAVE_DIR}_${SPEAKERS}_${DIM_INTER}/checkpoint_9.pt --dim_inter $DIM_INTER --gru_level 2 | tee ${SAVE_DIR}_${PHONEMES}_${DIM_INTER}/log.txt + mkdir -p ${OUTPUT_DIR}/${PHONEMES}_${DIM_INBETWEEN} + python cpc/eval/linear_separability.py $DATASET_PATH $TRAIN_SET $VALIDATION_SET $CHECKPOINT_PATH --pathCheckpoint ${OUTPUT_DIR}/${PHONEMES}_${DIM_INBETWEEN} --mode $PHONEMES --max_size_loaded 40000000 --n_process_loader 2 --model cpc --pathPhone $PHONES_PATH --path_speakers_factorized ${OUTPUT_DIR}/${SPEAKERS}_${DIM_INBETWEEN}/checkpoint_9.pt --dim_inter $DIM_INBETWEEN --gru_level 2 ;& $SPEAKERS_NULLSPACE) echo $SPEAKERS_NULLSPACE - mkdir -p ${SAVE_DIR}_${SPEAKERS_NULLSPACE}_${DIM_INTER} && python cpc/eval/linear_separability.py $zd/LibriSpeech/train-clean-100/ $zd/LibriSpeech/labels_split/train_split_100.txt $zd/LibriSpeech/labels_split/test_split_100.txt $zd/checkpoints/CPC-big-kmeans50/cpc_ll6k/checkpoint_32.pt --pathCheckpoint ${SAVE_DIR}_${SPEAKERS_NULLSPACE}_${DIM_INTER} --mode $SPEAKERS_NULLSPACE --max_size_loaded 40000000 --n_process_loader 2 --model cpc --path_speakers_factorized ${SAVE_DIR}_${SPEAKERS}_${DIM_INTER}/checkpoint_9.pt --dim_inter $DIM_INTER --gru_level 2 | tee ${SAVE_DIR}_${SPEAKERS_NULLSPACE}_${DIM_INTER}/log.txt + mkdir -p ${OUTPUT_DIR}/${SPEAKERS_NULLSPACE}_${DIM_INBETWEEN} + python cpc/eval/linear_separability.py $DATASET_PATH $TRAIN_SET $VALIDATION_SET $CHECKPOINT_PATH --pathCheckpoint ${OUTPUT_DIR}/${SPEAKERS_NULLSPACE}_${DIM_INBETWEEN} --mode $SPEAKERS_NULLSPACE --max_size_loaded 40000000 --n_process_loader 2 --model cpc --path_speakers_factorized ${OUTPUT_DIR}/${SPEAKERS}_${DIM_INBETWEEN}/checkpoint_9.pt --dim_inter $DIM_INBETWEEN --gru_level 2 ;; *) echo "Invalid from step: ${FROM_STEP} while it should be either ${SPEAKERS}, ${PHONEMES} or ${SPEAKERS_NULLSPACE}" ;; esac +echo "Checkpoint with nullspace is located in ${OUTPUT_DIR}/${PHONEMES}_${DIM_INBETWEEN}/checkpoint_9.pt" +echo "The results of all the experiments are located in ${OUTPUT_DIR}/DIRECTORY/checkpoint_logs.json" + exit 0 \ No newline at end of file