Ported eval_abx.sh script to CPC

chorowski-lab · Apr 2, 2021 · 1d636c6 · 1d636c6
1 parent 12c407b
commit 1d636c6
Show file tree

Hide file tree

Showing 3 changed files with 284 additions and 1 deletion.
diff --git a/cpc/model.py b/cpc/model.py
@@ -304,7 +304,6 @@ def __init__(self,
     def forward(self, batchData, label):
         cFeature, encodedData, label = self.cpc(batchData, label)
         cFeature = self.nullspace(cFeature)
-        encodedData = self.nullspace(encodedData)
         return cFeature, encodedData, label
 
 

diff --git a/scripts/embeddings_abx.py b/scripts/embeddings_abx.py
@@ -0,0 +1,138 @@
+#!/usr/bin/env python3 -u
+# !/usr/bin/env python3 -u
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+import os
+import sys
+import argparse
+from itertools import chain
+from pathlib import Path
+import time
+import copy
+import numpy as np
+import soundfile as sf
+
+from cpc.feature_loader import loadModel, FeatureModule
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+logging.basicConfig(
+    format="%(asctime)s | %(levelname)s | %(name)s | %(message)s",
+    datefmt="%Y-%m-%d %H:%M:%S",
+    level=os.environ.get("LOGLEVEL", "INFO").upper(),
+    stream=sys.stdout,
+)
+logger = logging.getLogger("zerospeech2021 abx")
+
+def parse_args():
+    # Run parameters
+    parser = argparse.ArgumentParser()
+    parser.add_argument("path_checkpoint", type=str,
+                        help="Path to the trained fairseq wav2vec2.0 model.")
+    parser.add_argument("path_data", type=str,
+                        help="Path to the dataset that we want to compute ABX for.")
+    parser.add_argument("path_output_dir", type=str,
+                        help="Path to the output directory.")
+    parser.add_argument("--debug", action="store_true",
+                        help="Load only a very small amount of files for "
+                        "debugging purposes.")
+    parser.add_argument("--cpu", action="store_true",
+                        help="Run on a cpu machine.")
+    parser.add_argument("--file_extension", type=str, default="wav",
+                          help="Extension of the audio files in the dataset (default: wav).")
+    parser.add_argument("--no_test", action="store_true",
+                        help="Don't compute embeddings for test-* parts of dataset")
+    parser.add_argument('--gru_level', type=int, default=-1,
+                        help='Hidden level of the LSTM autoregressive model to be taken'
+                        '(default: -1, last layer).')
+    parser.add_argument('--nullspace', action='store_true',
+                        help="Additionally load nullspace")
+    return parser.parse_args()
+
+def main():
+    # Parse and print args
+    args = parse_args()
+    logger.info(args)
+
+    # Load the model
+    print("")
+    print(f"Loading model from {args.path_checkpoint}") 
+
+    if args.gru_level is not None and args.gru_level > 0:
+        updateConfig = argparse.Namespace(nLevelsGRU=args.gru_level)
+    else:
+        updateConfig = None
+
+    model = loadModel([args.path_checkpoint], load_nullspace=args.nullspace, updateConfig=updateConfig)[0]
+
+    if args.gru_level is not None and args.gru_level > 0:
+        # Keep hidden units at LSTM layers on sequential batches
+        if args.nullspace:
+            model.cpc.gAR.keepHidden = True
+        else:
+            model.gAR.keepHidden = True
+
+    device = "cuda" if torch.cuda.is_available() and not args.cpu else "cpu"
+
+    # Register the hooks
+    layer_outputs = {}
+    def get_layer_output(name):
+        def hook(model, input, output):
+            if type(output) is tuple:
+                layer_outputs[name] = output[0].detach().squeeze(1).cpu().numpy()
+            elif type(output) is dict:
+                layer_outputs[name] = output["x"].detach().squeeze(0).cpu().numpy()
+            else:
+                layer_outputs[name] = output.detach().squeeze(0).cpu().numpy()
+        return hook
+
+    layer_names = []
+    layer_name = os.path.basename(os.path.dirname(args.path_checkpoint))
+    layer_names.append(layer_name)
+    if not args.nullspace:
+        model.gAR.register_forward_hook(get_layer_output(layer_name))
+    else:
+        model.nullspace.register_forward_hook(get_layer_output(layer_name))
+
+    model = model.eval().to(device)  
+    print("Model loaded!")
+    print(model)
+
+    # Extract values from chosen layers and save them to files
+    phonetic = "phonetic"
+    datasets_path = os.path.join(args.path_data, phonetic)
+    datasets = os.listdir(datasets_path)
+    datasets = [dataset for dataset in datasets if not args.no_test or not dataset.startswith("test")]
+    print(datasets)
+
+    with torch.no_grad():     
+        for dataset in datasets:
+            print("> {}".format(dataset))
+            dataset_path = os.path.join(datasets_path, dataset)
+            files = [f for f in os.listdir(dataset_path) if f.endswith(args.file_extension)]
+            for i, f in enumerate(files):
+                print("Progress {:2.1%}".format(i / len(files)), end="\r")
+                input_f = os.path.join(dataset_path, f)
+                x, sample_rate = sf.read(input_f)
+                x = torch.tensor(x).float().reshape(1,1,-1).to(device)
+                output = model(x, None)[0]
+
+                for layer_name, value in layer_outputs.items():
+                    output_dir = os.path.join(args.path_output_dir, layer_name, phonetic, dataset)
+                    Path(output_dir).mkdir(parents=True, exist_ok=True)
+                    out_f = os.path.join(output_dir, os.path.splitext(f)[0] + ".txt")
+                    np.savetxt(out_f, value)
+
+if __name__ == "__main__":
+    #import ptvsd
+    #ptvsd.enable_attach(('0.0.0.0', 7310))
+    #print("Attach debugger now")
+    #ptvsd.wait_for_attach()
+    main()
+
diff --git a/scripts/eval_abx.sh b/scripts/eval_abx.sh
@@ -0,0 +1,146 @@
+########## CHANGE THIS ##################
+ZEROSPEECH_EVAL_ENV=zerospeech2021 # Where the zerospeech2021-evaluate is installed
+CPC_ENV=202010-fairseq-c11
+CONDA_PATH=/pio/scratch/2/i273233/miniconda3
+#########################################
+
+DATASET_PATH=false
+ORIGINAL_DATASET_PATH=false
+CHECKPOINT_PATH=false
+OUTPUT_DIR=false
+NULLSPACE=false
+NO_TEST=false
+
+print_usage() {
+  echo -e "Usage: ./eval_abx.sh"
+  echo -e "\t-d DATASET_PATH"
+  echo -e "\t-r ORIGINAL_DATASET_PATH"
+  echo -e "\t-c CHECKPOINT_PATH"
+  echo -e "\t-o OUTPUT_DIR"
+  echo -e "OPTIONAL FLAGS:"
+  echo -e "\t-n (Load a model with nullspace)"
+  echo -e "\t-a CONDA_PATH"
+  echo -e "\t-e CPC_ENV"
+  echo -e "\t-z ZEROSPEECH_EVAL_ENV (The conda environment where the zerospeech2021-evaluate is installed)"
+  echo -e "\t-t (Do not compute embeddings for test set)"
+}
+
+while getopts 'd:r:c:o:na:e:z:t' flag; do
+    case "${flag}" in
+        d) DATASET_PATH="${OPTARG}" ;;
+        r) ORIGINAL_DATASET_PATH="${OPTARG}" ;;
+        c) CHECKPOINT_PATH="${OPTARG}" ;;
+        o) OUTPUT_DIR="${OPTARG}" ;;
+        n) NULLSPACE=true ;;
+        a) CONDA_PATH="${OPTARG}" ;;
+        e) CPC_ENV="${OPTARG}" ;;
+        z) ZEROSPEECH_EVAL_ENV="${OPTARG}" ;;
+        t) NO_TEST=true ;;
+        *) print_usage
+           exit 1 ;;
+    esac
+done
+
+echo $DATASET_PATH $ORIGINAL_DATASET_PATH $CHECKPOINT_PATH $OUTPUT_DIR $NULLSPACE $CONDA_PATH $CPC_ENV $ZEROSPEECH_EVAL_ENV $NO_TEST
+
+if [[ $DATASET_PATH == false || $ORIGINAL_DATASET_PATH == false || $CHECKPOINT_PATH == false || $OUTPUT_DIR == false ]]
+then
+    echo "Either DATASET_PATH or ORIGINAL_DATASET_PATH or CHECKPOINT_PATH or OUTPUT_DIR is not set."
+    print_usage
+    exit 1
+fi
+
+SCRIPT_PATH="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+
+results=$OUTPUT_DIR/results
+embeddings=$OUTPUT_DIR/embeddings
+mkdir -p embeddings
+
+source $CONDA_PATH/etc/profile.d/conda.sh
+SAVED_ENV=$(conda info | sed -n 's/\( \)*active environment : //p')
+echo SAVED_ENV: $SAVED_ENV
+
+ENV_TO_ACTIVATE=$CPC_ENV
+conda activate $ENV_TO_ACTIVATE
+
+params=""
+if [[ $NULLSPACE == true ]]
+then
+    params="${params} --nullspace"
+fi
+
+if [[ $NO_TEST == true ]]
+then
+    params="${params} --no_test"
+fi
+echo "Params: $params"
+
+echo "$SCRIPT_PATH/embeddings_abx.py"
+python $SCRIPT_PATH/embeddings_abx.py $CHECKPOINT_PATH $DATASET_PATH $embeddings --gru_level 2 $params
+
+directories=("dev-clean" "dev-other")
+if [[ $NO_TEST == false ]]
+then
+    directories+=("test-clean" "test-other")
+fi
+echo "Directories: ${directories[@]}"
+
+for i in `basename -a $(ls -d $embeddings/*/)`
+do
+    for directory in ${directories[@]}
+    do 
+        for file in `ls $embeddings/$i/phonetic/$directory` 
+        do 
+            filename_no_ext="${file%.*}" 
+            if [[ ! -f "$ORIGINAL_DATASET_PATH/phonetic/$directory/${filename_no_ext}.wav" ]] 
+            then 
+                rm $embeddings/$i/phonetic/$directory/$file 
+            fi
+        done
+    done 
+done
+
+conda activate $ZEROSPEECH_EVAL_ENV
+
+frame_shift="0.01"
+echo "Frame shift is ${frame_shift}s"
+
+metrics=("cosine" "euclidean")
+for metric in ${metrics[@]}
+do
+    cat > $embeddings/$metric.yaml << EOF
+author: LSTM Baseline
+affiliation: EHESS, ENS, PSL Research Univerity, CNRS and Inria
+description: >
+  CPC-big (trained on librispeech 960), kmeans (trained on librispeech 100),
+  LSTM. See https://zerospeech.com/2021 for more details.
+open_source: true
+train_set: librispeech 100 and 960
+gpu_budget: 60
+parameters:
+  phonetic:
+    metric: ${metric}
+    frame_shift: ${frame_shift}
+EOF
+
+    for i in `basename -a $(ls -d $embeddings/*/)`
+    do
+        cp $embeddings/$metric.yaml $embeddings/$i/meta.yaml
+        #zerospeech2021-evaluate -j 12 -o $results/$metric/$i --no-lexical --no-syntactic --no-semantic $DATASET_PATH $embeddings/$i
+        #zerospeech2021-evaluate -j 12 -o $results/$metric/$i --force-cpu --no-lexical --no-syntactic --no-semantic $ORIGINAL_DATASET_PATH $embeddings/$i
+        #zerospeech2021-evaluate -j 20 -o $results/$metric/$i --force-cpu --no-lexical --no-syntactic --no-semantic $ORIGINAL_DATASET_PATH $embeddings/$i
+        zerospeech2021-evaluate -j 20 -o $results/$metric/$i --no-lexical --no-syntactic --no-semantic $ORIGINAL_DATASET_PATH $embeddings/$i
+    done
+done
+
+for metric in ${metrics[@]}
+do
+    for i in `basename -a $(ls -d $embeddings/*/)`
+    do 
+        echo $i $metric
+        cat $results/$metric/$i/score_phonetic.csv
+        echo
+    done
+done > $OUTPUT_DIR/combined_results.txt
+
+conda activate $SAVED_ENV