Fix for highlighting of first and last incorrect words refs #26, and …

…fix for processing markup without whitespace between nodes by comparing node display style refs #29
badsyntax · Mar 10, 2013 · ef46167 · ef46167
1 parent 2f274fd
commit ef46167
Show file tree

Hide file tree

Showing 2 changed files with 134 additions and 29 deletions.
diff --git a/src/js/jquery.spellchecker.js b/src/js/jquery.spellchecker.js
@@ -57,6 +57,26 @@
     return $('<div />').html(text).html();
   };
 
+  var stripHTML = function(text) {
+
+    var elem = $('<div />').hide().appendTo('body').html(text);
+
+    // Add some whitespace after block and inline-block elements
+    elem.find('*').each(function() {
+
+      var e = $(this);
+      var display = e.css('display');
+      // Block elements
+      var isBlock = (display && (display !== 'inline'));
+      // Elements that are inline but behave like block
+      var isWordBreaking = $.inArray(this.nodeName, ['BR']) !== -1;
+
+      (isBlock || isWordBreaking) && e.after(" ");
+    });
+
+    return elem.text();
+  };
+
   RegExp.escape = function(text) {
     return text.replace(/[\-\[\]{}()*+?.,\^$|#\s]/g, "\\$&");
   };
@@ -447,18 +467,10 @@
 
   Parser.prototype.clean = function(text) {
 
-    text = '' + text; // Typecast to string
+    text = stripHTML(text || ''); // Strip any markup
     text = decode(text); // Decode HTML characters
     text = text.replace(/\xA0|\s+|(&nbsp;)/mg, ' '); // Convert whitespace
 
-    var wordBreakingTags = '(p|li)';
-    text = text.replace(new RegExp('<' + wordBreakingTags + '[^>]*>', 'gi'), ' '); // Strip word breaking tags
-    text = text.replace(new RegExp('<' + wordBreakingTags + '>', 'gi'), ' '); // Strip word breaking tags   
-    text = text.replace(new RegExp('</' + wordBreakingTags + ' [^>]*>', 'gi'), ' '); // Strip word breaking tags
-    text = text.replace(new RegExp('</' + wordBreakingTags + '>', 'gi'), ' '); // Strip word breaking tags
-
-    text = text.replace(new RegExp('<[^>]+>', 'g'), ''); // Strip other HTML tags
-
     var puncExpr = [
       '(^|\\s+)[' + punctuationChars + ']+',                        // punctuation(s) with leading whitespace(s)
       '[' + punctuationChars + ']+\\s+[' + punctuationChars + ']+', // punctuation(s) with leading and trailing whitespace(s)
@@ -510,8 +522,8 @@
   inherits(HtmlParser, Parser);
 
   HtmlParser.prototype.getText = function(text, textGetter) {
-    if (text && (text = $(text)).length) {
-      return this.clean(text.text());
+    if (text) {
+      return this.clean(text);
     }
     return $.map(this.elements, function(element) {
 
@@ -522,8 +534,7 @@
         .clone()
         .find('[class^="spellchecker-"]')
         .remove()
-        .end()
-        .text();
+        .end();
       }
 
       return this.clean(text);
@@ -583,21 +594,18 @@
   };
 
   HtmlParser.prototype.highlightWords = function(incorrectWords, element) {
+
     if (!incorrectWords.length) {
       return;
     }
 
-    this.incorrectWords = incorrectWords;
-    incorrectWords = $.map(incorrectWords, function(word) {
+    this.incorrectWords = $.map(incorrectWords, function(word) {
       return RegExp.escape(word);
     });
 
-    var regExp = '';
-    regExp += '([^' + letterChars + '])';
-    regExp += '(' + incorrectWords.join('|') + ')';
-    regExp += '(?=[^' + letterChars + '])';
+    var regExp = '(^|[^' + letterChars + '])(' + this.incorrectWords.join('|') + ')(?=[^' + letterChars + ']|$)';
 
-    this.replaceText(new RegExp(regExp, 'g'), element[0], this.highlightWordsHandler(incorrectWords), 2);
+    this.replaceText(new RegExp(regExp, 'g'), element[0], this.highlightWordsHandler(this.incorrectWords), 2);
   };
 
   HtmlParser.prototype.highlightWordsHandler = function(incorrectWords) {
@@ -888,17 +896,26 @@ window.findAndReplaceDOMText = (function() {
     if (node.nodeType === 3) {
       return node.data;
     }
+    if (node.nodeName === 'BR') {
+      return ' ';
+    }
 
+    var tmpNode = node;
     var txt = '';
 
     if (!!(node = node.firstChild)) do {
-            var wordBreakingNode = (node.tagName === 'P' || node.tagName === 'LI');
-            txt += ((wordBreakingNode ? ' ' : '') + _getText(node));
-
+      txt += _getText(node);
     } while (!!(node = node.nextSibling));
 
-    return txt;
+    var display = ((window.getComputedStyle)
+      ? window.getComputedStyle(tmpNode, null)
+      : tmpNode.currentStyle).display;
+
+    if (display && (display !== 'inline')) {
+      tmpNode.parentNode.insertBefore(document.createTextNode(' '), tmpNode.nextSibling);
+    }
 
+    return txt;
   }
 
   /** 
@@ -935,8 +952,6 @@ window.findAndReplaceDOMText = (function() {
           startNodeIndex = matchLocation[0] - atIndex;
         }
         atIndex += curNode.length;
-      } else if (curNode.tagName === 'P' || curNode.tagName === 'LI') {
-          atIndex += 1;
       }
 
       if (startNode && endNode) {

diff --git a/tests/javascript/spec/spellchecker.js b/tests/javascript/spec/spellchecker.js
@@ -289,7 +289,8 @@ describe("SpellChecker", function() {
       expect(replaced).toBe('Привет, ты в хорошо? Хотели бы Вы немного кокса? Нет, спасибо, я в хорошо!');
     });
   });  
-describe('Html parser', function() {
+
+  describe('Html parser', function() {
 
     var spellchecker, a, parser;
 
@@ -306,7 +307,7 @@ describe('Html parser', function() {
 
     it('Removes punctuation from text with tags', function() {
 
-      var text1 = '<p><b>Hello</b>, this "is" a-test.</p><P>How \'are\' you today?</P>';
+      var text1 = '<p><b>Hello</b>, this "is" a-test.</p><p>How \'are\' you today?</p>';
       var text2 = '<ul><li>test!</li><li>test.</li></ul>';
       var cleaned1 = parser.clean(text1);
       var cleaned2 = parser.clean(text2);
@@ -349,7 +350,96 @@ describe('Html parser', function() {
 
       expect(replaced).toBe('Привет, ты в хорошо? Хотели бы Вы немного кокса? Нет, спасибо, я в хорошо!');
     });
-  });  
+
+    describe('Highlight words', function() {
+
+      // See: https://github.com/badsyntax/jquery-spellchecker/issues/26
+      it('Can highlight words correctly', function() {
+
+        var text = $('<p>tesst tesst tesst</p>');
+        var checked;
+
+        runs(function() {
+          newSpellChecker('html', text).check(text, function() {
+            checked = true;
+          });
+        });
+
+        waitsFor(function() {
+          return checked;
+        }, "Failed", 750);
+
+        runs(function() {
+          expect(text.html()).toBe('<span class="spellchecker-word-highlight">tesst</span> <span class="spellchecker-word-highlight">tesst</span> <span class="spellchecker-word-highlight">tesst</span>');
+        });
+      });
+
+      describe('With whitespace between nodes', function() {
+         it('Can highlight words correctly', function() {
+
+          var text = $('<p><span>This is the first sentensce</span> <span>This is the second sentence.</span></p>');
+          var checked;
+
+          runs(function() {
+            newSpellChecker('html', text).check(text, function() {
+              checked = true;
+            });
+          });
+
+          waitsFor(function() {
+            return checked;
+          }, "Failed", 750);
+
+          runs(function() {
+            expect(text.html()).toBe('<span>This is the first <span class="spellchecker-word-highlight">sentensce</span></span> <span>This is the second sentence.</span>');
+          });
+        });
+      })
+
+      describe('With no whitespace between nodes', function() {
+
+        it('Highlights words correctly with <br> separators', function() {
+
+          var text = $('<p><span>This is the first sentensce</span><br /><span>This is the second sentence.</span></p>');
+          var checked;
+
+          runs(function() {
+            newSpellChecker('html', text).check(text, function() {
+              checked = true;
+            });
+          });
+
+          waitsFor(function() {
+            return checked;
+          }, "Failed", 750);
+
+          runs(function() {
+            expect(text.html()).toBe('<span>This is the first <span class="spellchecker-word-highlight">sentensce</span></span><br> <span>This is the second sentence.</span>');
+          });
+        });
+
+        it('Highlights words correctly with block-level separators', function() {
+
+          var text = $('<div><p>This is the first sentensce</p><p>This is the second sentence.</p></div>');
+          var checked;
+
+          runs(function() {
+            newSpellChecker('html', text).check(text, function() {
+              checked = true;
+            });
+          });
+
+          waitsFor(function() {
+            return checked;
+          }, "Failed", 750);
+
+          runs(function() {
+            expect(text.html()).toBe('<p>This is the first <span class="spellchecker-word-highlight">sentensce</span></p>  <p>This is the second sentence.</p>  ');
+          });
+        });
+      });
+    });  
+  });
 
   describe('Public methods', function() {