Skip to content

Commit

Permalink
Added a script for creating flattened LibriSpeech dev/test dataset. M…
Browse files Browse the repository at this point in the history
…odified other scripts to match readme
  • Loading branch information
jdzikowski committed Apr 4, 2021
1 parent 6769f92 commit 856cb25
Show file tree
Hide file tree
Showing 4 changed files with 116 additions and 65 deletions.
60 changes: 30 additions & 30 deletions finetune_nullspace.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,75 +3,75 @@ PHONEMES="phonemes_nullspace"
SPEAKERS_NULLSPACE="speakers_nullspace"

DATASET_PATH=false
TRAIN_SET=false
VALIDATION_SET=false
CHECKPOINT_PATH=false
OUTPUT_DIR=false
TRAIN_SPLIT_FILE_PATH=false
VALIDATION_SPLIT_FILE_PATH=false
BASELINE_NO_CLUSTERING_CHECKPOINT_PATH=false
SAVE_DIR=false
DIM_INBETWEEN=false
FROM_STEP=$SPEAKERS
PHONES_PATH=false
PHONEME_ALIGNMENTS_FILE=false

print_usage() {
echo -e "Usage: ./finetune_nullspace.sh"
echo -e "\t-d DATASET_PATH"
echo -e "\t-t TRAIN_SET"
echo -e "\t-v VALIDATION_SET"
echo -e "\t-c CHECKPOINT_PATH"
echo -e "\t-o OUTPUT_DIR"
echo -e "\t-d DATASET_PATH (E.g. LIBRISPEECH_DATASET_PATH/train-clean-100)"
echo -e "\t-t TRAIN_SPLIT_FILE_PATH (E.g. LIBRISPEECH_TRAIN_CLEAN_100_TRAIN_SPLIT_FILE_PATH)"
echo -e "\t-v VALIDATION_SPLIT_FILE_PATH (E.g. LIBRISPEECH_TRAIN_CLEAN_100_TEST_SPLIT_FILE_PATH)"
echo -e "\t-c BASELINE_NO_CLUSTERING_CHECKPOINT_PATH"
echo -e "\t-o SAVE_DIR"
echo -e "\t-n DIM_INBETWEEN (Dimension of nullspace will be DIM_EMBEDDING - DIM_INBETWEEN)"
echo -e "OPTIONAL FLAGS:"
echo -e "\t-f FROM_STEP (From which step do you want to start. Order: $SPEAKERS -> $PHONEMES -> $SPEAKERS_NULLSPACE)"
echo -e "\t-p PHONES_PATH (Path to the file containing phonemes for the entire dataset. You don't need it if you start from $SPEAKERS_NULLSPACE)"
echo -e "\t-p PHONEME_ALIGNMENTS_FILE (Path to the file containing phonemes for the entire dataset)"
echo -e "OPTIONAL ARGS:"
echo -e "\t-f FROM_STEP (From which step do you want to start. Order: $SPEAKERS [default] -> $PHONEMES -> $SPEAKERS_NULLSPACE)"
}

while getopts 'd:t:v:c:o:n:f:p:' flag; do
case "${flag}" in
d) DATASET_PATH="${OPTARG}" ;;
t) TRAIN_SET="${OPTARG}" ;;
v) VALIDATION_SET="${OPTARG}" ;;
c) CHECKPOINT_PATH="${OPTARG}" ;;
o) OUTPUT_DIR="${OPTARG}" ;;
t) TRAIN_SPLIT_FILE_PATH="${OPTARG}" ;;
v) VALIDATION_SPLIT_FILE_PATH="${OPTARG}" ;;
c) BASELINE_NO_CLUSTERING_CHECKPOINT_PATH="${OPTARG}" ;;
o) SAVE_DIR="${OPTARG}" ;;
n) DIM_INBETWEEN="${OPTARG}" ;;
f) FROM_STEP="${OPTARG}" ;;
p) PHONES_PATH="${OPTARG}" ;;
p) PHONEME_ALIGNMENTS_FILE="${OPTARG}" ;;
*) print_usage
exit 1 ;;
esac
done

echo $DATASET_PATH $TRAIN_SET $VALIDATION_SET $CHECKPOINT_PATH $OUTPUT_DIR $DIM_INBETWEEN $FROM_STEP $PHONES_PATH
echo $DATASET_PATH $TRAIN_SPLIT_FILE_PATH $VALIDATION_SPLIT_FILE_PATH $BASELINE_NO_CLUSTERING_CHECKPOINT_PATH $SAVE_DIR $DIM_INBETWEEN $FROM_STEP $PHONEME_ALIGNMENTS_FILE

if [[ $DATASET_PATH == false || $TRAIN_SET == false || $VALIDATION_SET == false || $CHECKPOINT_PATH == false || $OUTPUT_DIR == false || $DIM_INBETWEEN == false || ( $PHONES_PATH == false && $FROM_STEP != $SPEAKERS ) ]]
if [[ $DATASET_PATH == false || $TRAIN_SPLIT_FILE_PATH == false || $VALIDATION_SPLIT_FILE_PATH == false || $BASELINE_NO_CLUSTERING_CHECKPOINT_PATH == false || $SAVE_DIR == false || $DIM_INBETWEEN == false || $PHONEME_ALIGNMENTS_FILE == false ]]
then
echo "Either DATASET_PATH, TRAIN_SET, VALIDATION_SET, CHECKPOINT_PATH, OUTPUT_DIR or DIM_INBETWEEN is not set or there are invalid PHONES_PATH and FROM_STEP."
echo "Either DATASET_PATH, TRAIN_SPLIT_FILE_PATH, VALIDATION_SPLIT_FILE_PATH, BASELINE_NO_CLUSTERING_CHECKPOINT_PATH, SAVE_DIR, DIM_INBETWEEN or PHONEME_ALIGNMENTS_FILE is not set."
print_usage
exit 1
fi

mkdir -p $OUTPUT_DIR
mkdir -p $SAVE_DIR

case $FROM_STEP in
$SPEAKERS)
echo $SPEAKERS
mkdir -p ${OUTPUT_DIR}/${SPEAKERS}_${DIM_INBETWEEN}
python cpc/eval/linear_separability.py $DATASET_PATH $TRAIN_SET $VALIDATION_SET $CHECKPOINT_PATH --pathCheckpoint ${OUTPUT_DIR}/${SPEAKERS}_${DIM_INBETWEEN} --mode $SPEAKERS --max_size_loaded 40000000 --n_process_loader 2 --model cpc --dim_inter $DIM_INBETWEEN --gru_level 2
mkdir -p ${SAVE_DIR}/${SPEAKERS}_${DIM_INBETWEEN}
python cpc/eval/linear_separability.py $DATASET_PATH $TRAIN_SPLIT_FILE_PATH $VALIDATION_SPLIT_FILE_PATH $BASELINE_NO_CLUSTERING_CHECKPOINT_PATH --pathCheckpoint ${SAVE_DIR}/${SPEAKERS}_${DIM_INBETWEEN} --mode $SPEAKERS --max_size_loaded 40000000 --n_process_loader 2 --model cpc --dim_inter $DIM_INBETWEEN --gru_level 2
;&
$PHONEMES)
echo $PHONEMES
mkdir -p ${OUTPUT_DIR}/${PHONEMES}_${DIM_INBETWEEN}
python cpc/eval/linear_separability.py $DATASET_PATH $TRAIN_SET $VALIDATION_SET $CHECKPOINT_PATH --pathCheckpoint ${OUTPUT_DIR}/${PHONEMES}_${DIM_INBETWEEN} --mode $PHONEMES --max_size_loaded 40000000 --n_process_loader 2 --model cpc --pathPhone $PHONES_PATH --path_speakers_factorized ${OUTPUT_DIR}/${SPEAKERS}_${DIM_INBETWEEN}/checkpoint_9.pt --dim_inter $DIM_INBETWEEN --gru_level 2
mkdir -p ${SAVE_DIR}/${PHONEMES}_${DIM_INBETWEEN}
python cpc/eval/linear_separability.py $DATASET_PATH $TRAIN_SPLIT_FILE_PATH $VALIDATION_SPLIT_FILE_PATH $BASELINE_NO_CLUSTERING_CHECKPOINT_PATH --pathCheckpoint ${SAVE_DIR}/${PHONEMES}_${DIM_INBETWEEN} --mode $PHONEMES --max_size_loaded 40000000 --n_process_loader 2 --model cpc --pathPhone $PHONEME_ALIGNMENTS_FILE --path_speakers_factorized ${SAVE_DIR}/${SPEAKERS}_${DIM_INBETWEEN}/checkpoint_9.pt --dim_inter $DIM_INBETWEEN --gru_level 2
;&
$SPEAKERS_NULLSPACE)
echo $SPEAKERS_NULLSPACE
mkdir -p ${OUTPUT_DIR}/${SPEAKERS_NULLSPACE}_${DIM_INBETWEEN}
python cpc/eval/linear_separability.py $DATASET_PATH $TRAIN_SET $VALIDATION_SET $CHECKPOINT_PATH --pathCheckpoint ${OUTPUT_DIR}/${SPEAKERS_NULLSPACE}_${DIM_INBETWEEN} --mode $SPEAKERS_NULLSPACE --max_size_loaded 40000000 --n_process_loader 2 --model cpc --path_speakers_factorized ${OUTPUT_DIR}/${SPEAKERS}_${DIM_INBETWEEN}/checkpoint_9.pt --dim_inter $DIM_INBETWEEN --gru_level 2
mkdir -p ${SAVE_DIR}/${SPEAKERS_NULLSPACE}_${DIM_INBETWEEN}
python cpc/eval/linear_separability.py $DATASET_PATH $TRAIN_SPLIT_FILE_PATH $VALIDATION_SPLIT_FILE_PATH $BASELINE_NO_CLUSTERING_CHECKPOINT_PATH --pathCheckpoint ${SAVE_DIR}/${SPEAKERS_NULLSPACE}_${DIM_INBETWEEN} --mode $SPEAKERS_NULLSPACE --max_size_loaded 40000000 --n_process_loader 2 --model cpc --path_speakers_factorized ${SAVE_DIR}/${SPEAKERS}_${DIM_INBETWEEN}/checkpoint_9.pt --dim_inter $DIM_INBETWEEN --gru_level 2
;;
*)
echo "Invalid from step: ${FROM_STEP} while it should be either ${SPEAKERS}, ${PHONEMES} or ${SPEAKERS_NULLSPACE}"
;;
esac

echo "Checkpoint with nullspace is located in ${OUTPUT_DIR}/${PHONEMES}_${DIM_INBETWEEN}/checkpoint_9.pt"
echo "The results of all the experiments are located in ${OUTPUT_DIR}/DIRECTORY/checkpoint_logs.json"
echo "Checkpoint with nullspace is located in ${SAVE_DIR}/${PHONEMES}_${DIM_INBETWEEN}/checkpoint_9.pt"
echo "The results of all the experiments are located in ${SAVE_DIR}/DIRECTORY/checkpoint_logs.json"

exit 0
56 changes: 56 additions & 0 deletions scripts/create_ls_dataset_for_abx_eval.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import os
import sys
import shutil
import argparse
from pathlib import Path
import numpy as np
import soundfile as sf

def parse_args():
# Run parameters
parser = argparse.ArgumentParser()
parser.add_argument("librispeech_path", type=str,
help="Path to the root directory of LibriSpeech.")
parser.add_argument("zerospeech_dataset_path", type=str,
help="Path to the ZeroSpeech dataset.")
parser.add_argument("target_path", type=str,
help="Path to the output directory.")
parser.add_argument("--file_extension", type=str, default="flac",
help="Extension of the audio files in the dataset (default: flac).")
return parser.parse_args()

def main():
# Parse and print args
args = parse_args()
logger.info(args)

phonetic = "phonetic"
datasets = ["dev-clean", "dev-other", "test-clean", "test-other"]

for dataset in datasets:
print("> {}".format(dataset))
target_dirname = os.path.join(args.target_path, phonetic, dataset)
Path(target_dirname).mkdir(parents=True, exist_ok=True)

librispeech_dirname = os.path.join(args.librispeech_path, dataset)
files = [(filename, dirname) for dirname, _, files in os.walk(librispeech_dirname, followlinks=True) for filename in files if filename.endswith(args.file_extension)]
for i, (filename, dirname) in enumerate(files):
print("Progress {:2.1%}".format(i / len(files)), end="\r")
input_path = os.path.join(dirname, filename)
output_path = os.path.join(target_dirname, os.path.splitext(filename)[0] + ".wav")
data, sample_rate = sf.read(input_path)
sf.write(output_path, data, sample_rate)

if dataset.startswith("dev"):
source_item_path = os.path.join(args.zerospeech_dataset_path, phonetic, dataset, dataset + ".item")
target_item_path = os.path.join(target_dirname, dataset + ".item")
shutil.copy(source_item_path, target_item_path)


if __name__ == "__main__":
#import ptvsd
#ptvsd.enable_attach(('0.0.0.0', 7310))
#print("Attach debugger now")
#ptvsd.wait_for_attach()
main()

5 changes: 0 additions & 5 deletions scripts/embeddings_abx.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,4 @@
#!/usr/bin/env python3 -u
# !/usr/bin/env python3 -u
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

import logging
import os
Expand Down
60 changes: 30 additions & 30 deletions scripts/eval_abx.sh
Original file line number Diff line number Diff line change
@@ -1,66 +1,66 @@
########## CHANGE THIS ##################
ZEROSPEECH_EVAL_ENV=zerospeech2021 # Where the zerospeech2021-evaluate is installed
CPC_ENV=202010-fairseq-c11
ZEROSPEECH_EVAL_ENVIRONMENT=zerospeech2021 # Where the zerospeech2021-evaluate is installed
CPC_ENVIRONMENT=202010-fairseq-c11
CONDA_PATH=/pio/scratch/2/i273233/miniconda3
#########################################

DATASET_PATH=false
ORIGINAL_DATASET_PATH=false
ZEROSPEECH_DATASET_PATH=false
CHECKPOINT_PATH=false
OUTPUT_DIR=false
SAVE_DIR=false
NULLSPACE=false
NO_TEST=false

print_usage() {
echo -e "Usage: ./eval_abx.sh"
echo -e "\t-d DATASET_PATH"
echo -e "\t-r ORIGINAL_DATASET_PATH"
echo -e "\t-d DATASET_PATH (Either ZEROSPEECH_DATASET_PATH or LIBRISPEECH_FLATTENED_DATASET_PATH [Or anything that has directory structure of these two with dev-*.item files from ZEROSPEECH_DATASET_PATH])"
echo -e "\t-r ZEROSPEECH_DATASET_PATH"
echo -e "\t-c CHECKPOINT_PATH"
echo -e "\t-o OUTPUT_DIR"
echo -e "OPTIONAL FLAGS:"
echo -e "\t-n (Load a model with nullspace)"
echo -e "\t-o SAVE_DIR"
echo -e "OPTIONAL ARGS:"
echo -e "\t-n (Provide this flag if you want to load a model with nullspace)"
echo -e "\t-a CONDA_PATH"
echo -e "\t-e CPC_ENV"
echo -e "\t-z ZEROSPEECH_EVAL_ENV (The conda environment where the zerospeech2021-evaluate is installed)"
echo -e "\t-e CPC_ENVIRONMENT"
echo -e "\t-z ZEROSPEECH_EVAL_ENVIRONMENT (The conda environment where the zerospeech2021-evaluate is installed)"
echo -e "\t-t (Do not compute embeddings for test set)"
}

while getopts 'd:r:c:o:na:e:z:t' flag; do
case "${flag}" in
d) DATASET_PATH="${OPTARG}" ;;
r) ORIGINAL_DATASET_PATH="${OPTARG}" ;;
r) ZEROSPEECH_DATASET_PATH="${OPTARG}" ;;
c) CHECKPOINT_PATH="${OPTARG}" ;;
o) OUTPUT_DIR="${OPTARG}" ;;
o) SAVE_DIR="${OPTARG}" ;;
n) NULLSPACE=true ;;
a) CONDA_PATH="${OPTARG}" ;;
e) CPC_ENV="${OPTARG}" ;;
z) ZEROSPEECH_EVAL_ENV="${OPTARG}" ;;
e) CPC_ENVIRONMENT="${OPTARG}" ;;
z) ZEROSPEECH_EVAL_ENVIRONMENT="${OPTARG}" ;;
t) NO_TEST=true ;;
*) print_usage
exit 1 ;;
esac
done

echo $DATASET_PATH $ORIGINAL_DATASET_PATH $CHECKPOINT_PATH $OUTPUT_DIR $NULLSPACE $CONDA_PATH $CPC_ENV $ZEROSPEECH_EVAL_ENV $NO_TEST
echo $DATASET_PATH $ZEROSPEECH_DATASET_PATH $CHECKPOINT_PATH $SAVE_DIR $NULLSPACE $CONDA_PATH $CPC_ENVIRONMENT $ZEROSPEECH_EVAL_ENVIRONMENT $NO_TEST

if [[ $DATASET_PATH == false || $ORIGINAL_DATASET_PATH == false || $CHECKPOINT_PATH == false || $OUTPUT_DIR == false ]]
if [[ $DATASET_PATH == false || $ZEROSPEECH_DATASET_PATH == false || $CHECKPOINT_PATH == false || $SAVE_DIR == false ]]
then
echo "Either DATASET_PATH or ORIGINAL_DATASET_PATH or CHECKPOINT_PATH or OUTPUT_DIR is not set."
echo "Either DATASET_PATH or ZEROSPEECH_DATASET_PATH or CHECKPOINT_PATH or SAVE_DIR is not set."
print_usage
exit 1
fi

SCRIPT_PATH="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"

results=$OUTPUT_DIR/results
embeddings=$OUTPUT_DIR/embeddings
results=$SAVE_DIR/results
embeddings=$SAVE_DIR/embeddings
mkdir -p embeddings

source $CONDA_PATH/etc/profile.d/conda.sh
SAVED_ENV=$(conda info | sed -n 's/\( \)*active environment : //p')
echo SAVED_ENV: $SAVED_ENV
SAVED_ENVIRONMENT=$(conda info | sed -n 's/\( \)*active environment : //p')
echo SAVED_ENVIRONMENT: $SAVED_ENVIRONMENT

ENV_TO_ACTIVATE=$CPC_ENV
ENV_TO_ACTIVATE=$CPC_ENVIRONMENT
conda activate $ENV_TO_ACTIVATE

params=""
Expand Down Expand Up @@ -92,15 +92,15 @@ do
for file in `ls $embeddings/$i/phonetic/$directory`
do
filename_no_ext="${file%.*}"
if [[ ! -f "$ORIGINAL_DATASET_PATH/phonetic/$directory/${filename_no_ext}.wav" ]]
if [[ ! -f "$ZEROSPEECH_DATASET_PATH/phonetic/$directory/${filename_no_ext}.wav" ]]
then
rm $embeddings/$i/phonetic/$directory/$file
fi
done
done
done

conda activate $ZEROSPEECH_EVAL_ENV
conda activate $ZEROSPEECH_EVAL_ENVIRONMENT

frame_shift="0.01"
echo "Frame shift is ${frame_shift}s"
Expand All @@ -127,9 +127,9 @@ EOF
do
cp $embeddings/$metric.yaml $embeddings/$i/meta.yaml
#zerospeech2021-evaluate -j 12 -o $results/$metric/$i --no-lexical --no-syntactic --no-semantic $DATASET_PATH $embeddings/$i
#zerospeech2021-evaluate -j 12 -o $results/$metric/$i --force-cpu --no-lexical --no-syntactic --no-semantic $ORIGINAL_DATASET_PATH $embeddings/$i
#zerospeech2021-evaluate -j 20 -o $results/$metric/$i --force-cpu --no-lexical --no-syntactic --no-semantic $ORIGINAL_DATASET_PATH $embeddings/$i
zerospeech2021-evaluate -j 20 -o $results/$metric/$i --no-lexical --no-syntactic --no-semantic $ORIGINAL_DATASET_PATH $embeddings/$i
#zerospeech2021-evaluate -j 12 -o $results/$metric/$i --force-cpu --no-lexical --no-syntactic --no-semantic $ZEROSPEECH_DATASET_PATH $embeddings/$i
#zerospeech2021-evaluate -j 20 -o $results/$metric/$i --force-cpu --no-lexical --no-syntactic --no-semantic $ZEROSPEECH_DATASET_PATH $embeddings/$i
zerospeech2021-evaluate -j 20 -o $results/$metric/$i --no-lexical --no-syntactic --no-semantic $ZEROSPEECH_DATASET_PATH $embeddings/$i
done
done

Expand All @@ -141,6 +141,6 @@ do
cat $results/$metric/$i/score_phonetic.csv
echo
done
done > $OUTPUT_DIR/combined_results.txt
done > $SAVE_DIR/combined_results.txt

conda activate $SAVED_ENV
conda activate $SAVED_ENVIRONMENT

0 comments on commit 856cb25

Please sign in to comment.