Skip to content

Functions file

Julian Pömp edited this page Sep 25, 2017 · 1 revision

OCTRA needs an url to a functions.js file. This functions.js is needed to enable validation. If you want to have your transcription guidelines validated you need to create a functions.js file. This file should be hosted on a web server and referenced in each guidelines.json.

To create a new functions.js you can duplicate the functions.js in ./config/localmode/ and change as you need.

The functions.js has the following structure:

/**
 * this is a test validation method to show how validation could work.
 * @param annotation
 * @returns {{start: number, length: number, code: string}}
 */
function validateAnnotation(annotation, guidelines) {
    var result = [];

    // ...

    //the next line has to be before returning the result
    result = sortValidationResult(result);
    return result;
}

/**
 * method that is called before annotation was saved
 * @param annotation
 * @param guidelines
 * @returns string
 */
function tidyUpAnnotation(annotation, guidelines) {
    var result = annotation;

    // ...
    
    return result;
}


/*
###### Default methods.
 */
function escapeRegex(regex_str) {
    //escape special chars in regex
    return regex_str.replace(/[-\/\\^$*+?.()|[\]{}]/g, '\\$&');
}

function sortValidationResult(result) {
    return result.sort(function (a, b) {
        if (a.start === b.start)
            return 0;
        if (a.start < b.start)
            return -1;
        if (a.start > b.start)
            return 1;
    });
}

Methods

validateAnnotation(annotation, guidelines)

If you implement this method you need to go through the annotation text and parse it using regular expressions.

  • annotation: raw annotation text. Noise markers were replaced by their codes before. If an error was found you need to add it to an array. After all errors were collected you must sort and return this array. The structure for elements is:

    {
        start: <number>,    // text position
        length: <number>,   // length of the invalid text part
        code: <string>      // affected guideline
    }
    
  • guidelines: The whole JSON object from the current loaded guidelines (e.g. en or de)

tidyUpAnnotation(annotation, guidelines)

Before the transcript was saved the transcript must often be cleaned up (e.g. removing white spaces). You can clean up the annotation text using regular expressions and return the result.

Example

/**
 * this is a test validation method to show how validation could work.
 * @param annotation
 * @returns {{start: number, length: number, code: string}}
 */
function validateAnnotation(annotation, guidelines) {
    var result = [];

    //R06 Satzzeichen
    var re = /[\(\.,\!\?;\)]/g;
    while ((match = re.exec(annotation)) != null) {
        result.push({
            start: match.index,
            length: match[0].length,
            code: "R06"
        });
    }

    //M01
    for (var i = 0; i < guidelines.markers.length; i++) {
        var marker = guidelines.markers[i].code;

        re = new RegExp("(" + escapeRegex(marker) + ")( *(" + escapeRegex(marker) + "))+", "g");
        while ((match = re.exec(annotation)) != null) {
            result.push({
                start: match.index,
                length: match[0].length,
                code: "M01"
            });
        }
    }

    //the next line has to be before returning the result
    result = sortValidationResult(result);
    return result;
}

/**
 * method that is called before annotation was saved
 * @param annotation
 * @param guidelines
 * @returns string
 */
function tidyUpAnnotation(annotation, guidelines) {
    var result = annotation;

    result = result.replace(/<[~^a-z0-9]+>/g, function (x) {
        return " " + x + " ";
    });
    //set whitespaces before *
    result = result.replace(/(\w|ä|ü|ö|ß|Ü|Ö|Ä)\*(\w|ä|ü|ö|ß|Ü|Ö|Ä)/g, "$1 *$2");
    //set whitespaces before and after **
    result = result.replace(/(\*\*)|(\s\*\*)|(\*\*\s)/g, " ** ");

    //replace all numbers of whitespaces to one
    result = result.replace(/\s+/g, " ");
    //replace whitespaces at start an end
    result = result.replace(/^\s+/g, "");
    result = result.replace(/\s$/g, "");
    return result;
}


/*
###### Default methods.
 */
function escapeRegex(regex_str) {
    //escape special chars in regex
    return regex_str.replace(/[-\/\\^$*+?.()|[\]{}]/g, '\\$\\&');
}

function sortValidationResult(result) {
    return result.sort(function (a, b) {
        if (a.start === b.start)
            return 0;
        if (a.start < b.start)
            return -1;
        if (a.start > b.start)
            return 1;
    });
}