Skip to content

Commit

Permalink
Fix for highlighting of first and last incorrect words refs #26, and …
Browse files Browse the repository at this point in the history
…fix for processing markup without whitespace between nodes by comparing node display style refs #29
  • Loading branch information
badsyntax committed Mar 10, 2013
1 parent 2f274fd commit ef46167
Show file tree
Hide file tree
Showing 2 changed files with 134 additions and 29 deletions.
67 changes: 41 additions & 26 deletions src/js/jquery.spellchecker.js
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,26 @@
return $('<div />').html(text).html();
};

var stripHTML = function(text) {

var elem = $('<div />').hide().appendTo('body').html(text);

// Add some whitespace after block and inline-block elements
elem.find('*').each(function() {

var e = $(this);
var display = e.css('display');
// Block elements
var isBlock = (display && (display !== 'inline'));
// Elements that are inline but behave like block
var isWordBreaking = $.inArray(this.nodeName, ['BR']) !== -1;

(isBlock || isWordBreaking) && e.after(" ");
});

return elem.text();
};

RegExp.escape = function(text) {
return text.replace(/[\-\[\]{}()*+?.,\^$|#\s]/g, "\\$&");
};
Expand Down Expand Up @@ -447,18 +467,10 @@

Parser.prototype.clean = function(text) {

text = '' + text; // Typecast to string
text = stripHTML(text || ''); // Strip any markup
text = decode(text); // Decode HTML characters
text = text.replace(/\xA0|\s+|(&nbsp;)/mg, ' '); // Convert whitespace

var wordBreakingTags = '(p|li)';
text = text.replace(new RegExp('<' + wordBreakingTags + '[^>]*>', 'gi'), ' '); // Strip word breaking tags
text = text.replace(new RegExp('<' + wordBreakingTags + '>', 'gi'), ' '); // Strip word breaking tags
text = text.replace(new RegExp('</' + wordBreakingTags + ' [^>]*>', 'gi'), ' '); // Strip word breaking tags
text = text.replace(new RegExp('</' + wordBreakingTags + '>', 'gi'), ' '); // Strip word breaking tags

text = text.replace(new RegExp('<[^>]+>', 'g'), ''); // Strip other HTML tags

var puncExpr = [
'(^|\\s+)[' + punctuationChars + ']+', // punctuation(s) with leading whitespace(s)
'[' + punctuationChars + ']+\\s+[' + punctuationChars + ']+', // punctuation(s) with leading and trailing whitespace(s)
Expand Down Expand Up @@ -510,8 +522,8 @@
inherits(HtmlParser, Parser);

HtmlParser.prototype.getText = function(text, textGetter) {
if (text && (text = $(text)).length) {
return this.clean(text.text());
if (text) {
return this.clean(text);
}
return $.map(this.elements, function(element) {

Expand All @@ -522,8 +534,7 @@
.clone()
.find('[class^="spellchecker-"]')
.remove()
.end()
.text();
.end();
}

return this.clean(text);
Expand Down Expand Up @@ -583,21 +594,18 @@
};

HtmlParser.prototype.highlightWords = function(incorrectWords, element) {

if (!incorrectWords.length) {
return;
}

this.incorrectWords = incorrectWords;
incorrectWords = $.map(incorrectWords, function(word) {
this.incorrectWords = $.map(incorrectWords, function(word) {
return RegExp.escape(word);
});

var regExp = '';
regExp += '([^' + letterChars + '])';
regExp += '(' + incorrectWords.join('|') + ')';
regExp += '(?=[^' + letterChars + '])';
var regExp = '(^|[^' + letterChars + '])(' + this.incorrectWords.join('|') + ')(?=[^' + letterChars + ']|$)';

this.replaceText(new RegExp(regExp, 'g'), element[0], this.highlightWordsHandler(incorrectWords), 2);
this.replaceText(new RegExp(regExp, 'g'), element[0], this.highlightWordsHandler(this.incorrectWords), 2);
};

HtmlParser.prototype.highlightWordsHandler = function(incorrectWords) {
Expand Down Expand Up @@ -888,17 +896,26 @@ window.findAndReplaceDOMText = (function() {
if (node.nodeType === 3) {
return node.data;
}
if (node.nodeName === 'BR') {
return ' ';
}

var tmpNode = node;
var txt = '';

if (!!(node = node.firstChild)) do {
var wordBreakingNode = (node.tagName === 'P' || node.tagName === 'LI');
txt += ((wordBreakingNode ? ' ' : '') + _getText(node));

txt += _getText(node);
} while (!!(node = node.nextSibling));

return txt;
var display = ((window.getComputedStyle)
? window.getComputedStyle(tmpNode, null)
: tmpNode.currentStyle).display;

if (display && (display !== 'inline')) {
tmpNode.parentNode.insertBefore(document.createTextNode(' '), tmpNode.nextSibling);
}

return txt;
}

/**
Expand Down Expand Up @@ -935,8 +952,6 @@ window.findAndReplaceDOMText = (function() {
startNodeIndex = matchLocation[0] - atIndex;
}
atIndex += curNode.length;
} else if (curNode.tagName === 'P' || curNode.tagName === 'LI') {
atIndex += 1;
}

if (startNode && endNode) {
Expand Down
96 changes: 93 additions & 3 deletions tests/javascript/spec/spellchecker.js
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,8 @@ describe("SpellChecker", function() {
expect(replaced).toBe('Привет, ты в хорошо? Хотели бы Вы немного кокса? Нет, спасибо, я в хорошо!');
});
});
describe('Html parser', function() {

describe('Html parser', function() {

var spellchecker, a, parser;

Expand All @@ -306,7 +307,7 @@ describe('Html parser', function() {

it('Removes punctuation from text with tags', function() {

var text1 = '<p><b>Hello</b>, this "is" a-test.</p><P>How \'are\' you today?</P>';
var text1 = '<p><b>Hello</b>, this "is" a-test.</p><p>How \'are\' you today?</p>';
var text2 = '<ul><li>test!</li><li>test.</li></ul>';
var cleaned1 = parser.clean(text1);
var cleaned2 = parser.clean(text2);
Expand Down Expand Up @@ -349,7 +350,96 @@ describe('Html parser', function() {

expect(replaced).toBe('Привет, ты в хорошо? Хотели бы Вы немного кокса? Нет, спасибо, я в хорошо!');
});
});

describe('Highlight words', function() {

// See: https://github.com/badsyntax/jquery-spellchecker/issues/26
it('Can highlight words correctly', function() {

var text = $('<p>tesst tesst tesst</p>');
var checked;

runs(function() {
newSpellChecker('html', text).check(text, function() {
checked = true;
});
});

waitsFor(function() {
return checked;
}, "Failed", 750);

runs(function() {
expect(text.html()).toBe('<span class="spellchecker-word-highlight">tesst</span> <span class="spellchecker-word-highlight">tesst</span> <span class="spellchecker-word-highlight">tesst</span>');
});
});

describe('With whitespace between nodes', function() {
it('Can highlight words correctly', function() {

var text = $('<p><span>This is the first sentensce</span> <span>This is the second sentence.</span></p>');
var checked;

runs(function() {
newSpellChecker('html', text).check(text, function() {
checked = true;
});
});

waitsFor(function() {
return checked;
}, "Failed", 750);

runs(function() {
expect(text.html()).toBe('<span>This is the first <span class="spellchecker-word-highlight">sentensce</span></span> <span>This is the second sentence.</span>');
});
});
})

describe('With no whitespace between nodes', function() {

it('Highlights words correctly with <br> separators', function() {

var text = $('<p><span>This is the first sentensce</span><br /><span>This is the second sentence.</span></p>');
var checked;

runs(function() {
newSpellChecker('html', text).check(text, function() {
checked = true;
});
});

waitsFor(function() {
return checked;
}, "Failed", 750);

runs(function() {
expect(text.html()).toBe('<span>This is the first <span class="spellchecker-word-highlight">sentensce</span></span><br> <span>This is the second sentence.</span>');
});
});

it('Highlights words correctly with block-level separators', function() {

var text = $('<div><p>This is the first sentensce</p><p>This is the second sentence.</p></div>');
var checked;

runs(function() {
newSpellChecker('html', text).check(text, function() {
checked = true;
});
});

waitsFor(function() {
return checked;
}, "Failed", 750);

runs(function() {
expect(text.html()).toBe('<p>This is the first <span class="spellchecker-word-highlight">sentensce</span></p> <p>This is the second sentence.</p> ');
});
});
});
});
});

describe('Public methods', function() {

Expand Down

0 comments on commit ef46167

Please sign in to comment.