Lodash核心优化浅析

2020-03-09 本文已影响0人有色眼镜的世界

从何而来

lodash是一个JS遵从函数式思维的工具库，作者John-David Dalton描述其动机是underscore在可读性（易理解）以及性能上是不能满足需求^[1], 并且融合函数式编程思维。
作为新兴的工具库，还提供了lodash/fp作为可单独工作的核心模块, 压缩体积约占总体积的1/6，此外还提供了UMD module和lodash-webpack-plugin^[2]等周边工具的支持。

Note: 本篇主要讲解分析lodash对JS的核心优化方式，如何使用请参考官方文档^[3]。

深入实现

1. Lazy Evaluation

1.1 Shortcut Fusion

非严格模式下，优化不需要的执行动作。
首先对于没有shortcut fusion^[4]优化的underscore我们定义一个定长度的数组，然后进行map、filter、take三个操作

// underscore.js
_.chain(_.range(4)).map(v => { console.log('map ---> ', v); return v;}).filter(v => { console.log('filter ---> ', v); return !!(v % 2)}).take(3).value();
// results:
// map --->  0
// map --->  1
// map --->  2
// map --->  3
// filter --->  0
// filter --->  1
// filter --->  2
// filter --->  3

得到的结果是：函数遍历的次数是4+4
接下来，我们看一下进行了short fusing优化的lodash是如何处理的：

// lodash.js
_.chain(_.range(4)).map(v => { console.log('map ---> ', v); return v;}).filter(v => { console.log('filter ---> ', v); return !!(v % 2)}).take(3).value();
// map --->  0
// filter --->  0
// map --->  1
// filter --->  1
// map --->  2
// filter --->  2
// map --->  3
// filter --->  3

同样的操作，函数遍历的次数是3+3，后者会优化掉不必要的函数调用（如map 4 和 filter 4），在数据量比较大并且take的数量比较小的情况效果非常明显。更多的性能对比，可以参考jsPerf上的例子^[5]

性能测试结果对比图片

如果，你觉得数据不直观的话，这里有两个动画很好的展示了什么是shortcut fusion(from Filip Zawada's blog^[6]和中文对照版^[7])

Before Shortcut Fusion

After Shortcut Fusion

实现逻辑：lodash的shortcut核心类是LazyWrapper, 然后通过判断是否使用了可shortcut的函数（Filter，Map），然后在.value()执行时进行优化。
Step 1: 初始化LazyWrapper

// Add `LazyWrapper` methods that accept an `iteratee` value.
    arrayEach(['filter', 'map', 'takeWhile'], function(methodName, index) {
      var type = index + 1,
        isFilter = type == LAZY_FILTER_FLAG || type == LAZY_WHILE_FLAG;

      LazyWrapper.prototype[methodName] = function(iteratee) {
        var result = this.clone();
        result.__iteratees__.push({
          'iteratee': getIteratee(iteratee, 3),
          'type': type
        });
        result.__filtered__ = result.__filtered__ || isFilter;
        return result;
      };
    });

Step 2: 当使用chain.filter时，标志当前LodashWrapper.filtered为true

// Add `LazyWrapper` methods that accept an `iteratee` value.
    arrayEach(['filter', 'map', 'takeWhile'], function(methodName, index) {
      var type = index + 1,
        isFilter = type == LAZY_FILTER_FLAG || type == LAZY_WHILE_FLAG;

      LazyWrapper.prototype[methodName] = function(iteratee) {
        var result = this.clone();
        result.__iteratees__.push({
          'iteratee': getIteratee(iteratee, 3),
          'type': type
        });
        result.__filtered__ = result.__filtered__ || isFilter;
        return result;
      };
    });

Step 3: 当使用.take()时，将LodashWrapper转换为LazyWrapper

arrayEach(['drop', 'take'], function(methodName, index) {
      LazyWrapper.prototype[methodName] = function(n) {
        n = n === undefined ? 1 : nativeMax(toInteger(n), 0);

        var result = (this.__filtered__ && !index)
          ? new LazyWrapper(this)
          : this.clone();

        if (result.__filtered__) {
          result.__takeCount__ = nativeMin(n, result.__takeCount__);
        } else {
          result.__views__.push({
            'size': nativeMin(n, MAX_ARRAY_LENGTH),
            'type': methodName + (result.__dir__ < 0 ? 'Right' : '')
          });
        }
        return result;
      };

      LazyWrapper.prototype[methodName + 'Right'] = function(n) {
        return this.reverse()[methodName](n).reverse();
      };
    });

Step 4: 当调用.value()时，对可以shortcut的函数进行优化，优化的方式主要是标签语句^[8]

 /**
     * Extracts the unwrapped value from its lazy wrapper.
     *
     * @private
     * @name value
     * @memberOf LazyWrapper
     * @returns {*} Returns the unwrapped value.
     */
    function lazyValue() {
      var array = this.__wrapped__.value(),
        dir = this.__dir__,
        isArr = isArray(array),
        isRight = dir < 0,
        arrLength = isArr ? array.length : 0,
        view = getView(0, arrLength, this.__views__),
        start = view.start,
        end = view.end,
        length = end - start,
        index = isRight ? end : (start - 1),
        iteratees = this.__iteratees__,
        iterLength = iteratees.length,
        resIndex = 0,
        takeCount = nativeMin(length, this.__takeCount__);

      if (!isArr || (!isRight && arrLength == length && takeCount == length)) {
        return baseWrapperValue(array, this.__actions__);
      }
      var result = [];

      outer:
        while (length-- && resIndex < takeCount) {
          index += dir;

          var iterIndex = -1,
            value = array[index];

          while (++iterIndex < iterLength) {
            var data = iteratees[iterIndex],
              iteratee = data.iteratee,
              type = data.type,
              computed = iteratee(value);

            if (type == LAZY_MAP_FLAG) {
              value = computed;
            } else if (!computed) {
              if (type == LAZY_FILTER_FLAG) {
                continue outer;
              } else {
                break outer;
              }
            }
          }
          result[resIndex++] = value;
        }
      return result;
    }

1.2 Shortcut Fusion Functions

以进行shortcut fusion的函数列表, 目前版本v4.17.15

 * The wrapper methods that support shortcut fusion are:
 * `at`, `compact`, `drop`, `dropRight`, `dropWhile`, `filter`, `find`,
 * `findLast`, `head`, `initial`, `last`, `map`, `reject`, `reverse`, `slice`,
 * `tail`, `take`, `takeRight`, `takeRightWhile`, `takeWhile`, and `toArray`

1.3 Pipelining & Deferred Execution

Pipeling好处是避免在链式方法执行中创建中间数组，减少内存压力。
Deferred Execution允许在真正执行操作(如.value()操作)之前并不进行真正执行，这里有利于在执行之前对代码进行优化，特别是哪些多次组合pipeline的数组。

下面我们来看两个例子：

Pipeline严格模式和非严格模式
Note: 严格模式与非严格模式的区别，请参考我的文章lazy-evaluation^[9]

// 定义操作
var fun1, fun2, fun3;
var result = _(source).map(func1).map(func2).map(func3).value();

// 严格模式下可能的代码运行方式
var result = [], temp1 = [], temp2 = [], temp3 = [];
for(var i = 0; i &lt; source.length; i++) {
   temp1[i] = func1(source[i]);
}
for(i = 0; i &lt; source.length; i++) {
   temp2[i] = func2(temp1[i]);
}
for(i = 0; i &lt; source.length; i++) {
   temp3[i] = func3(temp2[i]);
}
result = temp3;

// 非严格模式下可能的代码运行方式
var result = [];
for(var i = 0; i &lt; source.length; i++) {
   result[i] = func3(func2(func1(source[i])));
}

不难看出，对于非严格模式下的代码优化，并没有产生中间数组，这对性能提升会大有帮助，尤其是在数据量比较大的情况下。
NOTE: 这里提出另外一个假设，如果func1、func2、func3都为相同操作类型（如Map）时是否可以优化为一次，目前的结果是否定的^[10]。

Deferred Execution
以lodash为例，在执行.value()操作之前生成的都是chainable的中间变量，这一点与lazyJS的Sequence概念十分相似^[11]。

_.chain([1, 2, 3]).map(i => i * 2)
// 执行结果： On {__wrapped__: Un, __actions__: Array(0), __chain__: true, __index__: 0, __values__: undefined}
_.chain([1, 2, 3]).map(i => i * 2).value()
// 执行结果：[2, 4, 6]

两次执行的效率差异非常大，如果将过个结果集合并之后在求值，性能提升会非常明显。

var a = _.chain([1, 2, 3]).map(i => i * 2);
var b = (a) => _.chain(a).filter(i => i > 2)
b(a).value();
// 执行结果：[4, 6]

2. Cheap check vs Expansive check

通过先在某些条件下有限使用cheap check来进行优化性能

function baseMatchesProperty(path, srcValue) {
  if (isKey(path) && isStrictComparable(srcValue)) {
    return matchesStrictComparable(toKey(path), srcValue);
  }
  return function(object) {
    var objValue = get(object, path);
    return (objValue === undefined && objValue === srcValue)
      ? hasIn(object, path)
      : baseIsEqual(srcValue, objValue, COMPARE_PARTIAL_FLAG | COMPARE_UNORDERED_FLAG);
  };
}

3. Inline Caching(ICs)

inline-cahcing内联缓存^[12]，享受编译器提供的热加载性能提升, 对arrayEach进行缓存^[13], 关于V8 ICs的文章也描述了这一点^[14]。

// ROUDN I
function forEach(collection, iteratee) {
  var func = arrayEach;
  return func(collection, getIteratee(iteratee, 3));
}
function arrayEach(array, iteratee) {
    var index = -1,
      length = array == null ? 0 : array.length;

    while (++index < length) {
      if (iteratee(array[index], index, array) === false) {
        break;
      }
    }
    return array;
}

// ROUND II
function forEach(collection, iteratee) {
  var func = function(array, iteratee) {
       var index = -1,
            length = array == null ? 0 : array.length;
      
          while (++index < length) {
            if (iteratee(array[index], index, array) === false) {
              break;
            }
          }
          return array;
  };
  return func(collection, getIteratee(iteratee, 3));
}

这里比较了两种写法在执行效率上的差异，证明了inline-caching的正确性^[13]。但是目前本地使用Nodejs执行benchmark测试失败^[15]。

4. Do not use Build-Ins

这里举一个使用Array的例子

var arr = Array.apply(null, { length: 100000 }).fill({ id: 1 });

// 测试条件
// 循环次数：10000 次

// 测试结果
// JS#Array x 26.96 ops/sec ±1.46% (48 runs sampled)
// JS#BuildIn-Array x 26.12 ops/sec ±1.26% (47 runs sampled)
// Fastest is JS#Array

// 添加测试
suite
  .add('JS#Array', function() {
    let index = -1,
      length = arr.length,
      result = Array(length);
    while (++index < length) {
      result[index] = iteratee(arr[index]);
    }
    return result;
  })
  .add('JS#BuildIn-Array', function() {
    let index = -1,
      length = arr.length,
      result = [];
    while (++index < length) {
      result.push(iteratee(arr[index]));
    }
    return result;
  })
  .on('cycle', function(event) {
    console.log(String(event.target));
  })
  .on('complete', function() {
    console.log('Fastest is ' + this.filter('fastest').map('name'));
  })
  .run({ async: true });

const iteratee = function(i) {
  i.id = +i.id.toString() / 2 + '1';
};