Implement groupBy with for() and measure performance

about 8 times faster
This commit is contained in:
Alexander Zobnin
2017-06-25 22:51:27 +03:00
parent da6373bda7
commit 2f6da44311
11 changed files with 236 additions and 38 deletions

View File

@@ -1,12 +1,46 @@
import _ from 'lodash';
import ts from '../timeseries'; import ts from '../timeseries';
let datapoints = [[]]; let datapoints = [[10.7104, 1498409636085], [10.578, 1498409651011], [10.5985, 1498409666628], [10.6877, 1498409681525], [10.5495, 1498409696586], [10.5981, 1498409711009], [10.5076, 1498409726949], [11.4807, 1498409741853], [11.6165, 1498409756165], [11.8575, 1498409771018], [11.9936, 1498409786056], [10.7566, 1498409801942], [10.7484, 1498409816010], [10.6038, 1498409831018], [10.2932, 1498409846010], [10.4912, 1498409861946], [10.4151, 1498409876871], [10.2401, 1498409891710], [10.4921, 1498409906143], [10.4413, 1498409921477], [10.6318, 1498409936147], [10.5277, 1498409951915], [10.6333, 1498409966052], [10.6417, 1498409981944], [10.4505, 1498409996867], [10.5812, 1498410011770], [10.4934, 1498410026573], [10.5731, 1498410041317], [10.5, 1498410056213], [10.6505, 1498410071013], [9.4035, 1498410086387]];
var fibonacci = function (n) { let series_set = [
return n < 2 ? n : fibonacci(n - 1) + fibonacci(n - 2); [[1.0247, 1498409631773], [0.9988, 1498409646697], [0.9817, 1498409661239], [0.9569, 1498409676045], [1.0331, 1498409691922], [1.0755, 1498409706546], [1.1862, 1498409721525], [1.2984, 1498409736175], [1.2389, 1498409751817], [1.1452, 1498409766783], [1.102, 1498409781699], [0.9647, 1498409796664], [1.0063, 1498409811627], [1.0318, 1498409826887], [1.065, 1498409841645], [1.0907, 1498409856647], [1.0229, 1498409871521], [1.0654, 1498409886031], [1.0568, 1498409901544], [1.0818, 1498409916194], [1.1335, 1498409931672], [1.057, 1498409946673], [1.0243, 1498409961669], [1.0329, 1498409976637], [1.1428, 1498409991563], [1.2198, 1498410006441], [1.2192, 1498410021230], [1.2615, 1498410036027], [1.1765, 1498410051907], [1.2352, 1498410066109], [1.0557, 1498410081043]],
}; [[10.7104, 1498409636085], [10.578, 1498409651011], [10.5985, 1498409666628], [10.6877, 1498409681525], [10.5495, 1498409696586], [10.5981, 1498409711009], [10.5076, 1498409726949], [11.4807, 1498409741853], [11.6165, 1498409756165], [11.8575, 1498409771018], [11.9936, 1498409786056], [10.7566, 1498409801942], [10.7484, 1498409816010], [10.6038, 1498409831018], [10.2932, 1498409846010], [10.4912, 1498409861946], [10.4151, 1498409876871], [10.2401, 1498409891710], [10.4921, 1498409906143], [10.4413, 1498409921477], [10.6318, 1498409936147], [10.5277, 1498409951915], [10.6333, 1498409966052], [10.6417, 1498409981944], [10.4505, 1498409996867], [10.5812, 1498410011770], [10.4934, 1498410026573], [10.5731, 1498410041317], [10.5, 1498410056213], [10.6505, 1498410071013], [9.4035, 1498410086387]]
];
module.exports = function () { module.exports = [
fibonacci(10); {
fibonacci(8); name: 'groupBy',
}; tests: {
'groupBy(AVERAGE)': () => {
ts.groupBy(datapoints, '5m', ts.AVERAGE);
},
'groupBy(MAX)': () => {
ts.groupBy(datapoints, '5m', ts.COUNT);
}
}
},
{
name: 'sumSeries',
tests: {
'sumSeries()': () => {
ts.sumSeries(series_set);
},
'groupBy(MAX)->sumSeries()': () => {
let prepeared_series = _.map(series_set, datapoints => ts.groupBy(datapoints, '5m', ts.MAX));
ts.sumSeries(prepeared_series);
}
}
},
{
name: 'groupBy vs groupBy_perf',
tests: {
'groupBy()': () => {
ts.groupBy(datapoints, '5m', ts.AVERAGE);
},
'groupBy_perf()': () => {
ts.groupBy_perf(datapoints, '5m', ts.AVERAGE);
}
}
}
];

View File

@@ -92,7 +92,7 @@ System.register(['lodash', './utils', './timeseries'], function (_export, _conte
}], }],
execute: function () { execute: function () {
downsampleSeries = ts.downsample; downsampleSeries = ts.downsample;
groupBy = ts.groupBy; groupBy = ts.groupBy_perf;
sumSeries = ts.sumSeries; sumSeries = ts.sumSeries;
delta = ts.delta; delta = ts.delta;

File diff suppressed because one or more lines are too long

View File

@@ -3,21 +3,11 @@
System.register(['lodash', './utils'], function (_export, _context) { System.register(['lodash', './utils'], function (_export, _context) {
"use strict"; "use strict";
var _, utils, exportedFunctions; var _, utils, POINT_VALUE, POINT_TIMESTAMP, exportedFunctions;
/** /**
* Downsample time series by using given function (avg, min, max). * Downsample time series by using given function (avg, min, max).
*/ */
/**
* timeseries.js
*
* This module contains functions for working with time series.
*
* datapoints - array of points where point is [value, timestamp]. In almost all cases (if other wasn't
* explicitly said) we assume datapoints are sorted by timestamp.
*
*/
function downsample(datapoints, time_to, ms_interval, func) { function downsample(datapoints, time_to, ms_interval, func) {
var downsampledSeries = []; var downsampledSeries = [];
var timeWindow = { var timeWindow = {
@@ -92,6 +82,34 @@ System.register(['lodash', './utils'], function (_export, _context) {
})); }));
} }
function groupBy_perf(datapoints, interval, groupByCallback) {
var ms_interval = utils.parseInterval(interval);
var grouped_series = [];
var frame_values = [];
var frame_value = void 0;
var frame_ts = datapoints.length ? getPointTimeFrame(datapoints[0][POINT_TIMESTAMP], ms_interval) : 0;
var point_frame_ts = frame_ts;
var point = void 0;
for (var i = 0; i < datapoints.length; i++) {
point = datapoints[i];
point_frame_ts = getPointTimeFrame(point[POINT_TIMESTAMP], ms_interval);
if (point_frame_ts === frame_ts) {
frame_values.push(point[POINT_VALUE]);
} else {
frame_value = groupByCallback(frame_values);
grouped_series.push([frame_value, frame_ts]);
frame_ts = point_frame_ts;
frame_values = [point[POINT_VALUE]];
}
}
frame_value = groupByCallback(frame_values);
grouped_series.push([frame_value, frame_ts]);
return grouped_series;
}
/** /**
* Summarize set of time series into one. * Summarize set of time series into one.
* @param {datapoints[]} timeseries array of time series * @param {datapoints[]} timeseries array of time series
@@ -128,11 +146,15 @@ System.register(['lodash', './utils'], function (_export, _context) {
} }
return sortByTime(new_timeseries); return sortByTime(new_timeseries);
}function scale(datapoints, factor) { }
function scale(datapoints, factor) {
return _.map(datapoints, function (point) { return _.map(datapoints, function (point) {
return [point[0] * factor, point[1]]; return [point[0] * factor, point[1]];
}); });
}function delta(datapoints) { }
function delta(datapoints) {
var newSeries = []; var newSeries = [];
var deltaValue = void 0; var deltaValue = void 0;
for (var i = 1; i < datapoints.length; i++) { for (var i = 1; i < datapoints.length; i++) {
@@ -140,25 +162,37 @@ System.register(['lodash', './utils'], function (_export, _context) {
newSeries.push([deltaValue, datapoints[i][1]]); newSeries.push([deltaValue, datapoints[i][1]]);
} }
return newSeries; return newSeries;
}function SUM(values) { }
function SUM(values) {
var sum = 0; var sum = 0;
_.each(values, function (value) { _.each(values, function (value) {
sum += value; sum += value;
}); });
return sum; return sum;
}function COUNT(values) { }
function COUNT(values) {
return values.length; return values.length;
}function AVERAGE(values) { }
function AVERAGE(values) {
var sum = 0; var sum = 0;
_.each(values, function (value) { _.each(values, function (value) {
sum += value; sum += value;
}); });
return sum / values.length; return sum / values.length;
}function MIN(values) { }
function MIN(values) {
return _.min(values); return _.min(values);
}function MAX(values) { }
function MAX(values) {
return _.max(values); return _.max(values);
}function MEDIAN(values) { }
function MEDIAN(values) {
var sorted = _.sortBy(values); var sorted = _.sortBy(values);
return sorted[Math.floor(sorted.length / 2)]; return sorted[Math.floor(sorted.length / 2)];
} }
@@ -167,6 +201,18 @@ System.register(['lodash', './utils'], function (_export, _context) {
// Utility functions // // Utility functions //
/////////////////////// ///////////////////////
/**
* For given point calculate corresponding time frame.
*
* |__*_|_*__|___*| -> |*___|*___|*___|
*
* @param {*} timestamp
* @param {*} ms_interval
*/
function getPointTimeFrame(timestamp, ms_interval) {
return Math.floor(timestamp / ms_interval) * ms_interval;
}
function sortByTime(series) { function sortByTime(series) {
return _.sortBy(series, function (point) { return _.sortBy(series, function (point) {
return point[1]; return point[1];
@@ -194,13 +240,17 @@ System.register(['lodash', './utils'], function (_export, _context) {
} }
} }
return series; return series;
}function linearInterpolation(timestamp, left, right) { }
function linearInterpolation(timestamp, left, right) {
if (left[1] === right[1]) { if (left[1] === right[1]) {
return (left[0] + right[0]) / 2; return (left[0] + right[0]) / 2;
} else { } else {
return left[0] + (right[0] - left[0]) / (right[1] - left[1]) * (timestamp - left[1]); return left[0] + (right[0] - left[0]) / (right[1] - left[1]) * (timestamp - left[1]);
} }
}function findNearestRight(series, point) { }
function findNearestRight(series, point) {
var point_index = _.indexOf(series, point); var point_index = _.indexOf(series, point);
var nearestRight; var nearestRight;
for (var i = point_index; i < series.length; i++) { for (var i = point_index; i < series.length; i++) {
@@ -209,7 +259,9 @@ System.register(['lodash', './utils'], function (_export, _context) {
} }
} }
return nearestRight; return nearestRight;
}function findNearestLeft(series, point) { }
function findNearestLeft(series, point) {
var point_index = _.indexOf(series, point); var point_index = _.indexOf(series, point);
var nearestLeft; var nearestLeft;
for (var i = point_index; i > 0; i--) { for (var i = point_index; i > 0; i--) {
@@ -231,9 +283,12 @@ System.register(['lodash', './utils'], function (_export, _context) {
utils = _utils; utils = _utils;
}], }],
execute: function () { execute: function () {
POINT_VALUE = 0;
POINT_TIMESTAMP = 1;
exportedFunctions = { exportedFunctions = {
downsample: downsample, downsample: downsample,
groupBy: groupBy, groupBy: groupBy,
groupBy_perf: groupBy_perf,
sumSeries: sumSeries, sumSeries: sumSeries,
scale: scale, scale: scale,
delta: delta, delta: delta,

File diff suppressed because one or more lines are too long

View File

@@ -37,4 +37,14 @@ module.exports = [{
_timeseries2.default.sumSeries(prepeared_series); _timeseries2.default.sumSeries(prepeared_series);
} }
} }
}, {
name: 'groupBy vs groupBy_perf',
tests: {
'groupBy()': function groupBy() {
_timeseries2.default.groupBy(datapoints, '5m', _timeseries2.default.AVERAGE);
},
'groupBy_perf()': function groupBy_perf() {
_timeseries2.default.groupBy_perf(datapoints, '5m', _timeseries2.default.AVERAGE);
}
}
}]; }];

View File

@@ -21,7 +21,7 @@ function _interopRequireWildcard(obj) { if (obj && obj.__esModule) { return obj;
function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; } function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; }
var downsampleSeries = _timeseries2.default.downsample; var downsampleSeries = _timeseries2.default.downsample;
var groupBy = _timeseries2.default.groupBy; var groupBy = _timeseries2.default.groupBy_perf;
var sumSeries = _timeseries2.default.sumSeries; var sumSeries = _timeseries2.default.sumSeries;
var delta = _timeseries2.default.delta; var delta = _timeseries2.default.delta;
var scale = function scale(factor, datapoints) { var scale = function scale(factor, datapoints) {

View File

@@ -16,9 +16,6 @@ function _interopRequireWildcard(obj) { if (obj && obj.__esModule) { return obj;
function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; } function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; }
/**
* Downsample time series by using given function (avg, min, max).
*/
/** /**
* timeseries.js * timeseries.js
* *
@@ -29,6 +26,12 @@ function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { de
* *
*/ */
var POINT_VALUE = 0;
var POINT_TIMESTAMP = 1;
/**
* Downsample time series by using given function (avg, min, max).
*/
function downsample(datapoints, time_to, ms_interval, func) { function downsample(datapoints, time_to, ms_interval, func) {
var downsampledSeries = []; var downsampledSeries = [];
var timeWindow = { var timeWindow = {
@@ -103,6 +106,34 @@ function groupBy(datapoints, interval, groupByCallback) {
})); }));
} }
function groupBy_perf(datapoints, interval, groupByCallback) {
var ms_interval = utils.parseInterval(interval);
var grouped_series = [];
var frame_values = [];
var frame_value = void 0;
var frame_ts = datapoints.length ? getPointTimeFrame(datapoints[0][POINT_TIMESTAMP], ms_interval) : 0;
var point_frame_ts = frame_ts;
var point = void 0;
for (var i = 0; i < datapoints.length; i++) {
point = datapoints[i];
point_frame_ts = getPointTimeFrame(point[POINT_TIMESTAMP], ms_interval);
if (point_frame_ts === frame_ts) {
frame_values.push(point[POINT_VALUE]);
} else {
frame_value = groupByCallback(frame_values);
grouped_series.push([frame_value, frame_ts]);
frame_ts = point_frame_ts;
frame_values = [point[POINT_VALUE]];
}
}
frame_value = groupByCallback(frame_values);
grouped_series.push([frame_value, frame_ts]);
return grouped_series;
}
/** /**
* Summarize set of time series into one. * Summarize set of time series into one.
* @param {datapoints[]} timeseries array of time series * @param {datapoints[]} timeseries array of time series
@@ -194,6 +225,18 @@ function MEDIAN(values) {
// Utility functions // // Utility functions //
/////////////////////// ///////////////////////
/**
* For given point calculate corresponding time frame.
*
* |__*_|_*__|___*| -> |*___|*___|*___|
*
* @param {*} timestamp
* @param {*} ms_interval
*/
function getPointTimeFrame(timestamp, ms_interval) {
return Math.floor(timestamp / ms_interval) * ms_interval;
}
function sortByTime(series) { function sortByTime(series) {
return _lodash2.default.sortBy(series, function (point) { return _lodash2.default.sortBy(series, function (point) {
return point[1]; return point[1];
@@ -260,6 +303,7 @@ function findNearestLeft(series, point) {
var exportedFunctions = { var exportedFunctions = {
downsample: downsample, downsample: downsample,
groupBy: groupBy, groupBy: groupBy,
groupBy_perf: groupBy_perf,
sumSeries: sumSeries, sumSeries: sumSeries,
scale: scale, scale: scale,
delta: delta, delta: delta,

View File

@@ -31,5 +31,16 @@ module.exports = [
ts.sumSeries(prepeared_series); ts.sumSeries(prepeared_series);
} }
} }
},
{
name: 'groupBy vs groupBy_perf',
tests: {
'groupBy()': () => {
ts.groupBy(datapoints, '5m', ts.AVERAGE);
},
'groupBy_perf()': () => {
ts.groupBy_perf(datapoints, '5m', ts.AVERAGE);
}
}
} }
]; ];

View File

@@ -3,7 +3,7 @@ import * as utils from './utils';
import ts from './timeseries'; import ts from './timeseries';
let downsampleSeries = ts.downsample; let downsampleSeries = ts.downsample;
let groupBy = ts.groupBy; let groupBy = ts.groupBy_perf;
let sumSeries = ts.sumSeries; let sumSeries = ts.sumSeries;
let delta = ts.delta; let delta = ts.delta;
let scale = (factor, datapoints) => ts.scale(datapoints, factor); let scale = (factor, datapoints) => ts.scale(datapoints, factor);

View File

@@ -11,6 +11,9 @@
import _ from 'lodash'; import _ from 'lodash';
import * as utils from './utils'; import * as utils from './utils';
const POINT_VALUE = 0;
const POINT_TIMESTAMP = 1;
/** /**
* Downsample time series by using given function (avg, min, max). * Downsample time series by using given function (avg, min, max).
*/ */
@@ -90,6 +93,34 @@ function groupBy(datapoints, interval, groupByCallback) {
})); }));
} }
function groupBy_perf(datapoints, interval, groupByCallback) {
let ms_interval = utils.parseInterval(interval);
let grouped_series = [];
let frame_values = [];
let frame_value;
let frame_ts = datapoints.length ? getPointTimeFrame(datapoints[0][POINT_TIMESTAMP], ms_interval) : 0;
let point_frame_ts = frame_ts;
let point;
for (let i=0; i < datapoints.length; i++) {
point = datapoints[i];
point_frame_ts = getPointTimeFrame(point[POINT_TIMESTAMP], ms_interval);
if (point_frame_ts === frame_ts) {
frame_values.push(point[POINT_VALUE]);
} else {
frame_value = groupByCallback(frame_values);
grouped_series.push([frame_value, frame_ts]);
frame_ts = point_frame_ts;
frame_values = [point[POINT_VALUE]];
}
}
frame_value = groupByCallback(frame_values);
grouped_series.push([frame_value, frame_ts]);
return grouped_series;
}
/** /**
* Summarize set of time series into one. * Summarize set of time series into one.
* @param {datapoints[]} timeseries array of time series * @param {datapoints[]} timeseries array of time series
@@ -184,6 +215,18 @@ function MEDIAN(values) {
// Utility functions // // Utility functions //
/////////////////////// ///////////////////////
/**
* For given point calculate corresponding time frame.
*
* |__*_|_*__|___*| -> |*___|*___|*___|
*
* @param {*} timestamp
* @param {*} ms_interval
*/
function getPointTimeFrame(timestamp, ms_interval) {
return Math.floor(timestamp / ms_interval) * ms_interval;
}
function sortByTime(series) { function sortByTime(series) {
return _.sortBy(series, function (point) { return _.sortBy(series, function (point) {
return point[1]; return point[1];
@@ -250,6 +293,7 @@ function findNearestLeft(series, point) {
const exportedFunctions = { const exportedFunctions = {
downsample, downsample,
groupBy, groupBy,
groupBy_perf,
sumSeries, sumSeries,
scale, scale,
delta, delta,