Five years ago I posted Mimicking Lookbehind in JavaScript on this blog, wherein I detailed several ways to emulate positive and negative lookbehind in JavaScript. My approaches back then were all fairly rough, and it was complicated to properly customize any of them to work with a given pattern. Plus, they were only designed to simulate lookbehind in a regex-based replacement.
To make it much easier to use lookbehind, I recently posted a collection of short functions on GitHub. They use XRegExp v2, so you should check that out, too.
Here's the code:
// Simulating infinite-length leading lookbehind in JavaScript. Uses XRegExp. // Captures within lookbehind are not included in match results. Lazy // repetition in lookbehind may lead to unexpected results. (function (XRegExp) { function prepareLb(lb) { // Allow mode modifier before lookbehind var parts = /^((?:\(\?[\w$]+\))?)\(\?<([=!])([\s\S]*)\)$/.exec(lb); return { // $(?!\s) allows use of (?m) in lookbehind lb: XRegExp(parts ? parts[1] + "(?:" + parts[3] + ")$(?!\\s)" : lb), // Positive or negative lookbehind. Use positive if no lookbehind group type: parts ? parts[2] === "=" : !parts }; } XRegExp.execLb = function (str, lb, regex) { var pos = 0, match, leftContext; lb = prepareLb(lb); while (match = XRegExp.exec(str, regex, pos)) { leftContext = str.slice(0, match.index); if (lb.type === lb.lb.test(leftContext)) { return match; } pos = match.index + 1; } return null; }; XRegExp.testLb = function (str, lb, regex) { return !!XRegExp.execLb(str, lb, regex); }; XRegExp.searchLb = function (str, lb, regex) { var match = XRegExp.execLb(str, lb, regex); return match ? match.index : -1; }; XRegExp.matchAllLb = function (str, lb, regex) { var matches = [], pos = 0, match, leftContext; lb = prepareLb(lb); while (match = XRegExp.exec(str, regex, pos)) { leftContext = str.slice(0, match.index); if (lb.type === lb.lb.test(leftContext)) { matches.push(match[0]); pos = match.index + (match[0].length || 1); } else { pos = match.index + 1; } } return matches; }; XRegExp.replaceLb = function (str, lb, regex, replacement) { var output = "", pos = 0, lastEnd = 0, match, leftContext; lb = prepareLb(lb); while (match = XRegExp.exec(str, regex, pos)) { leftContext = str.slice(0, match.index); if (lb.type === lb.lb.test(leftContext)) { // Doesn't work correctly if lookahead in regex looks outside of the match output += str.slice(lastEnd, match.index) + XRegExp.replace(match[0], regex, replacement); lastEnd = match.index + match[0].length; if (!regex.global) { break; } pos = match.index + (match[0].length || 1); } else { pos = match.index + 1; } } return output + str.slice(lastEnd); }; }(XRegExp));
That's less than 0.5 KB after minification and gzipping. It provides a collection of functions that make it simple to emulate leading lookbehind:
XRegExp.execLb
XRegExp.testLb
XRegExp.searchLb
XRegExp.matchAllLb
XRegExp.replaceLb
Each of these functions takes three arguments: the string to search, the lookbehind pattern as a string (can use XRegExp syntax extensions), and the main regex. XRegExp.replaceLb
takes a fourth argument for the replacement value, which can be a string or function.
Usage examples follow:
XRegExp.execLb("Fluffy cat", "(?i)(?<=fluffy\\W+)", XRegExp("(?i)(?<first>c)at")); // -> ["cat", "c"] // Result has named backref: result.first -> "c" XRegExp.execLb("Fluffy cat", "(?i)(?<!fluffy\\W+)", /cat/i); // -> null XRegExp.testLb("Fluffy cat", "(?i)(?<=fluffy\\W+)", /cat/i); // -> true XRegExp.testLb("Fluffy cat", "(?i)(?<!fluffy\\W+)", /cat/i); // -> false XRegExp.searchLb("Catwoman's fluffy cat", "(?i)(?<=fluffy\\W+)", /cat/i); // -> 18 XRegExp.searchLb("Catwoman's fluffy cat", "(?i)(?<!fluffy\\W+)", /cat/i); // -> 0 XRegExp.matchAllLb("Catwoman's cats are fluffy cats", "(?i)(?<=fluffy\\W+)", /cat\w*/i); // -> ["cats"] XRegExp.matchAllLb("Catwoman's cats are fluffy cats", "(?i)(?<!fluffy\\W+)", /cat\w*/i); // -> ["Catwoman", "cats"] XRegExp.replaceLb("Catwoman's fluffy cat is a cat", "(?i)(?<=fluffy\\W+)", /cat/ig, "dog"); // -> "Catwoman's fluffy dog is a cat" XRegExp.replaceLb("Catwoman's fluffy cat is a cat", "(?i)(?<!fluffy\\W+)", /cat/ig, "dog"); // -> "dogwoman's fluffy cat is a dog" XRegExp.replaceLb("Catwoman's fluffy cat is a cat", "(?i)(?<!fluffy\\W+)", /cat/ig, function ($0) { var first = $0.charAt(0); return first === first.toUpperCase() ? "Dog" : "dog"; }); // -> "Dogwoman's fluffy cat is a dog"
Easy peasy lemon squeezy. 🙂