-
Notifications
You must be signed in to change notification settings - Fork 8
Functions file
OCTRA needs an url to a functions.js file. This functions.js is needed to enable validation. If you want to have your transcription guidelines validated you need to create a functions.js file. This file should be hosted on a web server and referenced in each guidelines.json.
To create a new functions.js you can duplicate the functions.js in ./config/localmode/
and change as you need.
The functions.js has the following structure:
/**
* this is a test validation method to show how validation could work.
* @param annotation
* @returns {{start: number, length: number, code: string}}
*/
function validateAnnotation(annotation, guidelines) {
var result = [];
// ...
//the next line has to be before returning the result
result = sortValidationResult(result);
return result;
}
/**
* method that is called before annotation was saved
* @param annotation
* @param guidelines
* @returns string
*/
function tidyUpAnnotation(annotation, guidelines) {
var result = annotation;
// ...
return result;
}
/*
###### Default methods.
*/
function escapeRegex(regex_str) {
//escape special chars in regex
return regex_str.replace(/[-\/\\^$*+?.()|[\]{}]/g, '\\$&');
}
function sortValidationResult(result) {
return result.sort(function (a, b) {
if (a.start === b.start)
return 0;
if (a.start < b.start)
return -1;
if (a.start > b.start)
return 1;
});
}
If you implement this method you need to go through the annotation text and parse it using regular expressions.
-
annotation: raw annotation text. Noise markers were replaced by their codes before. If an error was found you need to add it to an array. After all errors were collected you must sort and return this array. The structure for elements is:
{ start: <number>, // text position length: <number>, // length of the invalid text part code: <string> // affected guideline }
-
guidelines: The whole JSON object from the current loaded guidelines (e.g. en or de)
Before the transcript was saved the transcript must often be cleaned up (e.g. removing white spaces). You can clean up the annotation text using regular expressions and return the result.
/**
* this is a test validation method to show how validation could work.
* @param annotation
* @returns {{start: number, length: number, code: string}}
*/
function validateAnnotation(annotation, guidelines) {
var result = [];
//R06 Satzzeichen
var re = /[\(\.,\!\?;\)]/g;
while ((match = re.exec(annotation)) != null) {
result.push({
start: match.index,
length: match[0].length,
code: "R06"
});
}
//M01
for (var i = 0; i < guidelines.markers.length; i++) {
var marker = guidelines.markers[i].code;
re = new RegExp("(" + escapeRegex(marker) + ")( *(" + escapeRegex(marker) + "))+", "g");
while ((match = re.exec(annotation)) != null) {
result.push({
start: match.index,
length: match[0].length,
code: "M01"
});
}
}
//the next line has to be before returning the result
result = sortValidationResult(result);
return result;
}
/**
* method that is called before annotation was saved
* @param annotation
* @param guidelines
* @returns string
*/
function tidyUpAnnotation(annotation, guidelines) {
var result = annotation;
result = result.replace(/<[~^a-z0-9]+>/g, function (x) {
return " " + x + " ";
});
//set whitespaces before *
result = result.replace(/(\w|ä|ü|ö|ß|Ü|Ö|Ä)\*(\w|ä|ü|ö|ß|Ü|Ö|Ä)/g, "$1 *$2");
//set whitespaces before and after **
result = result.replace(/(\*\*)|(\s\*\*)|(\*\*\s)/g, " ** ");
//replace all numbers of whitespaces to one
result = result.replace(/\s+/g, " ");
//replace whitespaces at start an end
result = result.replace(/^\s+/g, "");
result = result.replace(/\s$/g, "");
return result;
}
/*
###### Default methods.
*/
function escapeRegex(regex_str) {
//escape special chars in regex
return regex_str.replace(/[-\/\\^$*+?.()|[\]{}]/g, '\\$\\&');
}
function sortValidationResult(result) {
return result.sort(function (a, b) {
if (a.start === b.start)
return 0;
if (a.start < b.start)
return -1;
if (a.start > b.start)
return 1;
});
}