JavaScript Regex Lookbehind Redux

Five years ago I posted Mimicking Lookbehind in JavaScript on this blog, wherein I detailed several ways to emulate positive and negative lookbehind in JavaScript. My approaches back then were all fairly rough, and it was complicated to properly customize any of them to work with a given pattern. Plus, they were only designed to simulate lookbehind in a regex-based replacement.

To make it much easier to use lookbehind, I recently posted a collection of short functions on GitHub. They use XRegExp v2, so you should check that out, too.

Here's the code:

// Simulating infinite-length leading lookbehind in JavaScript. Uses XRegExp.
// Captures within lookbehind are not included in match results. Lazy
// repetition in lookbehind may lead to unexpected results.

(function (XRegExp) {

    function prepareLb(lb) {
        // Allow mode modifier before lookbehind
        var parts = /^((?:\(\?[\w$]+\))?)\(\?<([=!])([\s\S]*)\)$/.exec(lb);
        return {
            // $(?!\s) allows use of (?m) in lookbehind
            lb: XRegExp(parts ? parts[1] + "(?:" + parts[3] + ")$(?!\\s)" : lb),
            // Positive or negative lookbehind. Use positive if no lookbehind group
            type: parts ? parts[2] === "=" : !parts
        };
    }

    XRegExp.execLb = function (str, lb, regex) {
        var pos = 0, match, leftContext;
        lb = prepareLb(lb);
        while (match = XRegExp.exec(str, regex, pos)) {
            leftContext = str.slice(0, match.index);
            if (lb.type === lb.lb.test(leftContext)) {
                return match;
            }
            pos = match.index + 1;
        }
        return null;
    };

    XRegExp.testLb = function (str, lb, regex) {
        return !!XRegExp.execLb(str, lb, regex);
    };

    XRegExp.searchLb = function (str, lb, regex) {
        var match = XRegExp.execLb(str, lb, regex);
        return match ? match.index : -1;
    };

    XRegExp.matchAllLb = function (str, lb, regex) {
        var matches = [], pos = 0, match, leftContext;
        lb = prepareLb(lb);
        while (match = XRegExp.exec(str, regex, pos)) {
            leftContext = str.slice(0, match.index);
            if (lb.type === lb.lb.test(leftContext)) {
                matches.push(match[0]);
                pos = match.index + (match[0].length || 1);
            } else {
                pos = match.index + 1;
            }
        }
        return matches;
    };

    XRegExp.replaceLb = function (str, lb, regex, replacement) {
        var output = "", pos = 0, lastEnd = 0, match, leftContext;
        lb = prepareLb(lb);
        while (match = XRegExp.exec(str, regex, pos)) {
            leftContext = str.slice(0, match.index);
            if (lb.type === lb.lb.test(leftContext)) {
                // Doesn't work correctly if lookahead in regex looks outside of the match
                output += str.slice(lastEnd, match.index) + XRegExp.replace(match[0], regex, replacement);
                lastEnd = match.index + match[0].length;
                if (!regex.global) {
                    break;
                }
                pos = match.index + (match[0].length || 1);
            } else {
                pos = match.index + 1;
            }
        }
        return output + str.slice(lastEnd);
    };

}(XRegExp));

That's less than 0.5 KB after minification and gzipping. It provides a collection of functions that make it simple to emulate leading lookbehind:

  • XRegExp.execLb
  • XRegExp.testLb
  • XRegExp.searchLb
  • XRegExp.matchAllLb
  • XRegExp.replaceLb

Each of these functions takes three arguments: the string to search, the lookbehind pattern as a string (can use XRegExp syntax extensions), and the main regex. XRegExp.replaceLb takes a fourth argument for the replacement value, which can be a string or function.

Usage examples follow:

XRegExp.execLb("Fluffy cat", "(?i)(?<=fluffy\\W+)", XRegExp("(?i)(?<first>c)at"));
// -> ["cat", "c"]
// Result has named backref: result.first -> "c"

XRegExp.execLb("Fluffy cat", "(?i)(?<!fluffy\\W+)", /cat/i);
// -> null

XRegExp.testLb("Fluffy cat", "(?i)(?<=fluffy\\W+)", /cat/i);
// -> true

XRegExp.testLb("Fluffy cat", "(?i)(?<!fluffy\\W+)", /cat/i);
// -> false

XRegExp.searchLb("Catwoman's fluffy cat", "(?i)(?<=fluffy\\W+)", /cat/i);
// -> 18

XRegExp.searchLb("Catwoman's fluffy cat", "(?i)(?<!fluffy\\W+)", /cat/i);
// -> 0

XRegExp.matchAllLb("Catwoman's cats are fluffy cats", "(?i)(?<=fluffy\\W+)", /cat\w*/i);
// -> ["cats"]

XRegExp.matchAllLb("Catwoman's cats are fluffy cats", "(?i)(?<!fluffy\\W+)", /cat\w*/i);
// -> ["Catwoman", "cats"]

XRegExp.replaceLb("Catwoman's fluffy cat is a cat", "(?i)(?<=fluffy\\W+)", /cat/ig, "dog");
// -> "Catwoman's fluffy dog is a cat"

XRegExp.replaceLb("Catwoman's fluffy cat is a cat", "(?i)(?<!fluffy\\W+)", /cat/ig, "dog");
// -> "dogwoman's fluffy cat is a dog"

XRegExp.replaceLb("Catwoman's fluffy cat is a cat", "(?i)(?<!fluffy\\W+)", /cat/ig, function ($0) {
    var first = $0.charAt(0);
    return first === first.toUpperCase() ? "Dog" : "dog";
});
// -> "Dogwoman's fluffy cat is a dog"

Easy peasy lemon squeezy. 🙂

10 thoughts on “JavaScript Regex Lookbehind Redux”

  1. @Peter Boughton, I hold XRegExp and its official addons to a high standard. I hope these lookbehind functions are useful to you and others, but their limitations (such as working only at the beginning of a pattern) prevent them from making the cut for the main script. Also, I’d like to keep the main XRegExp script as lean as possible, since file size can be a major concern in browserland.

    Still, if you want lookbehind emulation in JavaScript, this is easily the best and most robust solution out there.

  2. Hi Steven, I apologize if this isn’t a good place to ask this, but is there an ActiveX or COM version of XRegExp? I currently use VBScript.RegExp as a COM object from a proprietary scripting language for FrameMaker (FrameScript). It would be great if I could use XRegExp this way as well. Thank you very much. Rick

  3. This is a fix for replaceLb that allows lookaheads following lookbehind. This is useful when you want to add some text at certain positions between characters.

    XRegExp.replaceLb = function (str, lb, regex, replacement) {
    ..
    ..
    // Doesn’t work correctly if lookahead in regex looks outside of the match
    //output += str.slice(lastEnd, match.index) +
    XRegExp.replace(match[0], regex, replacement);

    // AMR: code to allow (lookbehind)(lookahead) constructs
    var re = XRegExp(regex.source, (regex.ignoreCase ? ‘i’ : ”) + (regex.multiline ? ‘m’ : ”));
    var rContx1 = str.slice(match.index);
    var rContx2 = XRegExp.replace(rContx1, re, replacement);
    var replen = match[0].length + rContx2.length – rContx1.length;
    output += str.slice(lastEnd, match.index) + rContx2.slice(0, replen);
    //
    ..
    ..
    };

    //examples of (?<=)(?=)
    console.log(XRegExp.replaceLb("tx,tx", "(? “t-x,t-x”

    console.log(XRegExp.replaceLb(“12345678”, “(? “12,345,678”

  4. // examples of (?<=)(?=)
    console.log(XRegExp.replaceLb(“tx,tx”, “(?<=t)”, /(?=x)/g, “-“));
    // -> “t-x,t-x”

    console.log(XRegExp.replaceLb(“12345678”, “(?<=\\d)”, /(?=(?:\d{3})+(?!\d))/g, “,”));
    // -> “12,345,678”

  5. I wonder, will there ever be a solution for lookbehind in the middle of a regex? I’m facing this problem now, and I can’t think of a way around it.

  6. I don’t get it. These are executing *lookaheads*. If I execute XRegExp.execLb(“This a test.”, “\ a test.”], not [“This “].

    You say the second parameter of execLb is the lookbehind pattern, but it doesn’t do that.

Leave a Reply

Your email address will not be published. Required fields are marked *