2021-12-03 17:58:48 +01:00

892 lines
24 KiB
JavaScript

/*
* vasync.js: utilities for observable asynchronous control flow
*/
var mod_assert = require('assert');
var mod_events = require('events');
var mod_util = require('util');
var mod_verror = require('verror');
/*
* Public interface
*/
exports.parallel = parallel;
exports.forEachParallel = forEachParallel;
exports.pipeline = pipeline;
exports.tryEach = tryEach;
exports.forEachPipeline = forEachPipeline;
exports.filter = filter;
exports.filterLimit = filterLimit;
exports.filterSeries = filterSeries;
exports.whilst = whilst;
exports.queue = queue;
exports.queuev = queuev;
exports.barrier = barrier;
exports.waterfall = waterfall;
if (!global.setImmediate) {
global.setImmediate = function (func) {
var args = Array.prototype.slice.call(arguments, 1);
args.unshift(0);
args.unshift(func);
setTimeout.apply(this, args);
};
}
/*
* This is incorporated here from jsprim because jsprim ends up pulling in a lot
* of dependencies. If we end up needing more from jsprim, though, we should
* add it back and rip out this function.
*/
function isEmpty(obj)
{
var key;
for (key in obj)
return (false);
return (true);
}
/*
* Given a set of functions that complete asynchronously using the standard
* callback(err, result) pattern, invoke them all and merge the results. See
* README.md for details.
*/
function parallel(args, callback)
{
var funcs, rv, doneOne, i;
mod_assert.equal(typeof (args), 'object', '"args" must be an object');
mod_assert.ok(Array.isArray(args['funcs']),
'"args.funcs" must be specified and must be an array');
mod_assert.equal(typeof (callback), 'function',
'callback argument must be specified and must be a function');
funcs = args['funcs'].slice(0);
rv = {
'operations': new Array(funcs.length),
'successes': [],
'ndone': 0,
'nerrors': 0
};
if (funcs.length === 0) {
setImmediate(function () { callback(null, rv); });
return (rv);
}
doneOne = function (entry) {
return (function (err, result) {
mod_assert.equal(entry['status'], 'pending');
entry['err'] = err;
entry['result'] = result;
entry['status'] = err ? 'fail' : 'ok';
if (err)
rv['nerrors']++;
else
rv['successes'].push(result);
if (++rv['ndone'] < funcs.length)
return;
var errors = rv['operations'].filter(function (ent) {
return (ent['status'] == 'fail');
}).map(function (ent) { return (ent['err']); });
if (errors.length > 0)
callback(new mod_verror.MultiError(errors), rv);
else
callback(null, rv);
});
};
for (i = 0; i < funcs.length; i++) {
rv['operations'][i] = {
'func': funcs[i],
'funcname': funcs[i].name || '(anon)',
'status': 'pending'
};
funcs[i](doneOne(rv['operations'][i]));
}
return (rv);
}
/*
* Exactly like parallel, except that the input is specified as a single
* function to invoke on N different inputs (rather than N functions). "args"
* must have the following fields:
*
* func asynchronous function to invoke on each input value
*
* inputs array of input values
*/
function forEachParallel(args, callback)
{
var func, funcs;
mod_assert.equal(typeof (args), 'object', '"args" must be an object');
mod_assert.equal(typeof (args['func']), 'function',
'"args.func" must be specified and must be a function');
mod_assert.ok(Array.isArray(args['inputs']),
'"args.inputs" must be specified and must be an array');
func = args['func'];
funcs = args['inputs'].map(function (input) {
return (function (subcallback) {
return (func(input, subcallback));
});
});
return (parallel({ 'funcs': funcs }, callback));
}
/*
* Like parallel, but invokes functions in sequence rather than in parallel
* and aborts if any function exits with failure. Arguments include:
*
* funcs invoke the functions in parallel
*
* arg first argument to each pipeline function
*/
function pipeline(args, callback)
{
mod_assert.equal(typeof (args), 'object', '"args" must be an object');
mod_assert.ok(Array.isArray(args['funcs']),
'"args.funcs" must be specified and must be an array');
var opts = {
'funcs': args['funcs'].slice(0),
'callback': callback,
'args': { impl: 'pipeline', uarg: args['arg'] },
'stop_when': 'error',
'res_type': 'rv'
};
return (waterfall_impl(opts));
}
function tryEach(funcs, callback)
{
mod_assert.ok(Array.isArray(funcs),
'"funcs" must be specified and must be an array');
mod_assert.ok(arguments.length == 1 || typeof (callback) == 'function',
'"callback" must be a function');
var opts = {
'funcs': funcs.slice(0),
'callback': callback,
'args': { impl: 'tryEach' },
'stop_when': 'success',
'res_type': 'array'
};
return (waterfall_impl(opts));
}
/*
* Exactly like pipeline, except that the input is specified as a single
* function to invoke on N different inputs (rather than N functions). "args"
* must have the following fields:
*
* func asynchronous function to invoke on each input value
*
* inputs array of input values
*/
function forEachPipeline(args, callback) {
mod_assert.equal(typeof (args), 'object', '"args" must be an object');
mod_assert.equal(typeof (args['func']), 'function',
'"args.func" must be specified and must be a function');
mod_assert.ok(Array.isArray(args['inputs']),
'"args.inputs" must be specified and must be an array');
mod_assert.equal(typeof (callback), 'function',
'callback argument must be specified and must be a function');
var func = args['func'];
var funcs = args['inputs'].map(function (input) {
return (function (_, subcallback) {
return (func(input, subcallback));
});
});
return (pipeline({'funcs': funcs}, callback));
}
/*
* async.js compatible filter, filterLimit, and filterSeries. Takes an input
* array, optionally a limit, and a single function to filter an array and will
* callback with a new filtered array. This is effectively an asynchronous
* version of Array.prototype.filter.
*/
function filter(inputs, filterFunc, callback) {
return (filterLimit(inputs, Infinity, filterFunc, callback));
}
function filterSeries(inputs, filterFunc, callback) {
return (filterLimit(inputs, 1, filterFunc, callback));
}
function filterLimit(inputs, limit, filterFunc, callback) {
mod_assert.ok(Array.isArray(inputs),
'"inputs" must be specified and must be an array');
mod_assert.equal(typeof (limit), 'number',
'"limit" must be a number');
mod_assert.equal(isNaN(limit), false,
'"limit" must be a number');
mod_assert.equal(typeof (filterFunc), 'function',
'"filterFunc" must be specified and must be a function');
mod_assert.equal(typeof (callback), 'function',
'"callback" argument must be specified as a function');
var errors = [];
var q = queue(processInput, limit);
var results = [];
function processInput(input, cb) {
/*
* If the errors array has any members, an error was
* encountered in a previous invocation of filterFunc, so all
* future filtering will be skipped.
*/
if (errors.length > 0) {
cb();
return;
}
filterFunc(input.elem, function inputFiltered(err, ans) {
/*
* We ensure here that a filterFunc callback is only
* ever invoked once.
*/
if (results.hasOwnProperty(input.idx)) {
throw (new mod_verror.VError(
'vasync.filter*: filterFunc idx %d ' +
'invoked its callback twice', input.idx));
}
/*
* The original element, as well as the answer "ans"
* (truth value) is stored to later be filtered when
* all outstanding jobs are finished.
*/
results[input.idx] = {
elem: input.elem,
ans: !!ans
};
/*
* Any error encountered while filtering will result in
* all future operations being skipped, and the error
* object being returned in the users callback.
*/
if (err) {
errors.push(err);
cb();
return;
}
cb();
});
}
q.once('end', function queueDrained() {
if (errors.length > 0) {
callback(mod_verror.errorFromList(errors));
return;
}
/*
* results is now an array of objects in the same order of the
* inputs array, where each object looks like:
*
* {
* "ans": <true|false>,
* "elem": <original input element>
* }
*
* we filter out elements that have a false "ans" value, and
* then map the array to contain only the input elements.
*/
results = results.filter(function filterFalseInputs(input) {
return (input.ans);
}).map(function mapInputElements(input) {
return (input.elem);
});
callback(null, results);
});
inputs.forEach(function iterateInput(elem, idx) {
/*
* We retain the array index to ensure that order is
* maintained.
*/
q.push({
elem: elem,
idx: idx
});
});
q.close();
return (q);
}
/*
* async-compatible "whilst" function, with a few notable exceptions/addons.
*
* 1. More strict typing of arguments (functions *must* be supplied).
* 2. A callback function is required, not optional.
* 3. An object is returned, not undefined.
*/
function whilst(testFunc, iterateFunc, callback) {
mod_assert.equal(typeof (testFunc), 'function',
'"testFunc" must be specified and must be a function');
mod_assert.equal(typeof (iterateFunc), 'function',
'"iterateFunc" must be specified and must be a function');
mod_assert.equal(typeof (callback), 'function',
'"callback" argument must be specified as a function');
/*
* The object returned to the caller that provides a read-only
* interface to introspect this specific invocation of "whilst".
*/
var o = {
'finished': false,
'iterations': 0
};
/*
* Store the last set of arguments from the final call to "iterateFunc".
* The arguments will be passed to the final callback when an error is
* encountered or when the testFunc returns false.
*/
var args = [];
function iterate() {
var shouldContinue = testFunc();
if (!shouldContinue) {
/*
* The test condition is false - break out of the loop.
*/
done();
return;
}
/* Bump iterations after testFunc but before iterateFunc. */
o.iterations++;
iterateFunc(function whilstIteration(err) {
/* Store the latest set of arguments seen. */
args = Array.prototype.slice.call(arguments);
/* Any error with iterateFunc will break the loop. */
if (err) {
done();
return;
}
/* Try again. */
setImmediate(iterate);
});
}
function done() {
mod_assert.ok(!o.finished, 'whilst already finished');
o.finished = true;
callback.apply(this, args);
}
setImmediate(iterate);
return (o);
}
/*
* async-compatible "queue" function.
*/
function queue(worker, concurrency)
{
return (new WorkQueue({
'worker': worker,
'concurrency': concurrency
}));
}
function queuev(args)
{
return (new WorkQueue(args));
}
function WorkQueue(args)
{
mod_assert.ok(args.hasOwnProperty('worker'));
mod_assert.equal(typeof (args['worker']), 'function');
mod_assert.ok(args.hasOwnProperty('concurrency'));
mod_assert.equal(typeof (args['concurrency']), 'number');
mod_assert.equal(Math.floor(args['concurrency']), args['concurrency']);
mod_assert.ok(args['concurrency'] > 0);
mod_events.EventEmitter.call(this);
this.nextid = 0;
this.worker = args['worker'];
this.worker_name = args['worker'].name || 'anon';
this.npending = 0;
this.pending = {};
this.queued = [];
this.closed = false;
this.ended = false;
/* user-settable fields inherited from "async" interface */
this.concurrency = args['concurrency'];
this.saturated = undefined;
this.empty = undefined;
this.drain = undefined;
}
mod_util.inherits(WorkQueue, mod_events.EventEmitter);
WorkQueue.prototype.push = function (tasks, callback)
{
if (!Array.isArray(tasks))
return (this.pushOne(tasks, callback));
var wq = this;
return (tasks.map(function (task) {
return (wq.pushOne(task, callback));
}));
};
WorkQueue.prototype.updateConcurrency = function (concurrency)
{
if (this.closed)
throw new mod_verror.VError(
'update concurrency invoked after queue closed');
this.concurrency = concurrency;
this.dispatchNext();
};
WorkQueue.prototype.close = function ()
{
var wq = this;
if (wq.closed)
return;
wq.closed = true;
/*
* If the queue is already empty, just fire the "end" event on the
* next tick.
*/
if (wq.npending === 0 && wq.queued.length === 0) {
setImmediate(function () {
if (!wq.ended) {
wq.ended = true;
wq.emit('end');
}
});
}
};
/* private */
WorkQueue.prototype.pushOne = function (task, callback)
{
if (this.closed)
throw new mod_verror.VError('push invoked after queue closed');
var id = ++this.nextid;
var entry = { 'id': id, 'task': task, 'callback': callback };
this.queued.push(entry);
this.dispatchNext();
return (id);
};
/* private */
WorkQueue.prototype.dispatchNext = function ()
{
var wq = this;
if (wq.npending === 0 && wq.queued.length === 0) {
if (wq.drain)
wq.drain();
wq.emit('drain');
/*
* The queue is closed; emit the final "end"
* event before we come to rest:
*/
if (wq.closed) {
wq.ended = true;
wq.emit('end');
}
} else if (wq.queued.length > 0) {
while (wq.queued.length > 0 && wq.npending < wq.concurrency) {
var next = wq.queued.shift();
wq.dispatch(next);
if (wq.queued.length === 0) {
if (wq.empty)
wq.empty();
wq.emit('empty');
}
}
}
};
WorkQueue.prototype.dispatch = function (entry)
{
var wq = this;
mod_assert.ok(!this.pending.hasOwnProperty(entry['id']));
mod_assert.ok(this.npending < this.concurrency);
mod_assert.ok(!this.ended);
this.npending++;
this.pending[entry['id']] = entry;
if (this.npending === this.concurrency) {
if (this.saturated)
this.saturated();
this.emit('saturated');
}
/*
* We invoke the worker function on the next tick so that callers can
* always assume that the callback is NOT invoked during the call to
* push() even if the queue is not at capacity. It also avoids O(n)
* stack usage when used with synchronous worker functions.
*/
setImmediate(function () {
wq.worker(entry['task'], function (err) {
--wq.npending;
delete (wq.pending[entry['id']]);
if (entry['callback'])
entry['callback'].apply(null, arguments);
wq.dispatchNext();
});
});
};
WorkQueue.prototype.length = function ()
{
return (this.queued.length);
};
WorkQueue.prototype.kill = function ()
{
this.killed = true;
this.queued = [];
this.drain = undefined;
this.close();
};
/*
* Barriers coordinate multiple concurrent operations.
*/
function barrier(args)
{
return (new Barrier(args));
}
function Barrier(args)
{
mod_assert.ok(!args || !args['nrecent'] ||
typeof (args['nrecent']) == 'number',
'"nrecent" must have type "number"');
mod_events.EventEmitter.call(this);
var nrecent = args && args['nrecent'] ? args['nrecent'] : 10;
if (nrecent > 0) {
this.nrecent = nrecent;
this.recent = [];
}
this.pending = {};
this.scheduled = false;
}
mod_util.inherits(Barrier, mod_events.EventEmitter);
Barrier.prototype.start = function (name)
{
mod_assert.ok(!this.pending.hasOwnProperty(name),
'operation "' + name + '" is already pending');
this.pending[name] = Date.now();
};
Barrier.prototype.done = function (name)
{
mod_assert.ok(this.pending.hasOwnProperty(name),
'operation "' + name + '" is not pending');
if (this.recent) {
this.recent.push({
'name': name,
'start': this.pending[name],
'done': Date.now()
});
if (this.recent.length > this.nrecent)
this.recent.shift();
}
delete (this.pending[name]);
/*
* If we executed at least one operation and we're now empty, we should
* emit "drain". But most code doesn't deal well with events being
* processed while they're executing, so we actually schedule this event
* for the next tick.
*
* We use the "scheduled" flag to avoid emitting multiple "drain" events
* on consecutive ticks if the user starts and ends another task during
* this tick.
*/
if (!isEmpty(this.pending) || this.scheduled)
return;
this.scheduled = true;
var self = this;
setImmediate(function () {
self.scheduled = false;
/*
* It's also possible that the user has started another task on
* the previous tick, in which case we really shouldn't emit
* "drain".
*/
if (isEmpty(self.pending))
self.emit('drain');
});
};
/*
* waterfall([ funcs ], callback): invoke each of the asynchronous functions
* "funcs" in series. Each function is passed any values emitted by the
* previous function (none for the first function), followed by the callback to
* invoke upon completion. This callback must be invoked exactly once,
* regardless of success or failure. As conventional in Node, the first
* argument to the callback indicates an error (if non-null). Subsequent
* arguments are passed to the next function in the "funcs" chain.
*
* If any function fails (i.e., calls its callback with an Error), then the
* remaining functions are not invoked and "callback" is invoked with the error.
*
* The only difference between waterfall() and pipeline() are the arguments
* passed to each function in the chain. pipeline() always passes the same
* argument followed by the callback, while waterfall() passes whatever values
* were emitted by the previous function followed by the callback.
*/
function waterfall(funcs, callback)
{
mod_assert.ok(Array.isArray(funcs),
'"funcs" must be specified and must be an array');
mod_assert.ok(arguments.length == 1 || typeof (callback) == 'function',
'"callback" must be a function');
var opts = {
'funcs': funcs.slice(0),
'callback': callback,
'args': { impl: 'waterfall' },
'stop_when': 'error',
'res_type': 'values'
};
return (waterfall_impl(opts));
}
/*
* This function is used to implement vasync-functions that need to execute a
* list of functions in a sequence, but differ in how they make use of the
* intermediate callbacks and finall callback, as well as under what conditions
* they stop executing the functions in the list. Examples of such functions
* are `pipeline`, `waterfall`, and `tryEach`. See the documentation for those
* functions to see how they operate.
*
* This function's behavior is influenced via the `opts` object that we pass
* in. This object has the following layout:
*
* {
* 'funcs': array of functions
* 'callback': the final callback
* 'args': {
* 'impl': 'pipeline' or 'tryEach' or 'waterfall'
* 'uarg': the arg passed to each func for 'pipeline'
* }
* 'stop_when': 'error' or 'success'
* 'res_type': 'values' or 'arrays' or 'rv'
* }
*
* In the object, 'res_type' is used to indicate what the type of the result
* values(s) is that we pass to the final callback. We secondarily use
* 'args.impl' to adjust this behavior in an implementation-specific way. For
* example, 'tryEach' only returns an array if it has more than 1 result passed
* to the final callback. Otherwise, it passes a solitary value to the final
* callback.
*
* In case it's not clear, 'rv' in the `res_type` member, is just the
* result-value that we also return. This is the convention in functions that
* originated in `vasync` (pipeline), but not in functions that originated in
* `async` (waterfall, tryEach).
*/
function waterfall_impl(opts)
{
mod_assert.ok(typeof (opts) === 'object');
var rv, current, next;
var funcs = opts.funcs;
var callback = opts.callback;
mod_assert.ok(Array.isArray(funcs),
'"opts.funcs" must be specified and must be an array');
mod_assert.ok(arguments.length == 1,
'Function "waterfall_impl" must take only 1 arg');
mod_assert.ok(opts.res_type === 'values' ||
opts.res_type === 'array' || opts.res_type == 'rv',
'"opts.res_type" must either be "values", "array", or "rv"');
mod_assert.ok(opts.stop_when === 'error' ||
opts.stop_when === 'success',
'"opts.stop_when" must either be "error" or "success"');
mod_assert.ok(opts.args.impl === 'pipeline' ||
opts.args.impl === 'waterfall' || opts.args.impl === 'tryEach',
'"opts.args.impl" must be "pipeline", "waterfall", or "tryEach"');
if (opts.args.impl === 'pipeline') {
mod_assert.ok(typeof (opts.args.uarg) !== undefined,
'"opts.args.uarg" should be defined when pipeline is used');
}
rv = {
'operations': funcs.map(function (func) {
return ({
'func': func,
'funcname': func.name || '(anon)',
'status': 'waiting'
});
}),
'successes': [],
'ndone': 0,
'nerrors': 0
};
if (funcs.length === 0) {
if (callback)
setImmediate(function () {
var res = (opts.args.impl === 'pipeline') ? rv
: undefined;
callback(null, res);
});
return (rv);
}
next = function (idx, err) {
/*
* Note that nfunc_args contains the args we will pass to the
* next func in the func-list the user gave us. Except for
* 'tryEach', which passes cb's. However, it will pass
* 'nfunc_args' to its final callback -- see below.
*/
var res_key, nfunc_args, entry, nextentry;
if (err === undefined)
err = null;
if (idx != current) {
throw (new mod_verror.VError(
'vasync.waterfall: function %d ("%s") invoked ' +
'its callback twice', idx,
rv['operations'][idx].funcname));
}
mod_assert.equal(idx, rv['ndone'],
'idx should be equal to ndone');
entry = rv['operations'][rv['ndone']++];
if (opts.args.impl === 'tryEach' ||
opts.args.impl === 'waterfall') {
nfunc_args = Array.prototype.slice.call(arguments, 2);
res_key = 'results';
entry['results'] = nfunc_args;
} else if (opts.args.impl === 'pipeline') {
nfunc_args = [ opts.args.uarg ];
res_key = 'result';
entry['result'] = arguments[2];
}
mod_assert.equal(entry['status'], 'pending',
'status should be pending');
entry['status'] = err ? 'fail' : 'ok';
entry['err'] = err;
if (err) {
rv['nerrors']++;
} else {
rv['successes'].push(entry[res_key]);
}
if ((opts.stop_when === 'error' && err) ||
(opts.stop_when === 'success' &&
rv['successes'].length > 0) ||
rv['ndone'] == funcs.length) {
if (callback) {
if (opts.res_type === 'values' ||
(opts.res_type === 'array' &&
nfunc_args.length <= 1)) {
nfunc_args.unshift(err);
callback.apply(null, nfunc_args);
} else if (opts.res_type === 'array') {
callback(err, nfunc_args);
} else if (opts.res_type === 'rv') {
callback(err, rv);
}
}
} else {
nextentry = rv['operations'][rv['ndone']];
nextentry['status'] = 'pending';
current++;
nfunc_args.push(next.bind(null, current));
setImmediate(function () {
var nfunc = nextentry['func'];
/*
* At first glance it may seem like this branch
* is superflous with the code above that
* branches on `opts.args.impl`. It may also
* seem like calling `nfunc.apply` is
* sufficient for both cases (after all we
* pushed `next.bind(null, current)` to the
* `nfunc_args` array), before we call
* `setImmediate()`. However, this is not the
* case, because the interface exposed by
* tryEach is different from the others. The
* others pass argument(s) from task to task.
* tryEach passes nothing but a callback
* (`next.bind` below). However, the callback
* itself _can_ be called with one or more
* results, which we collect into `nfunc_args`
* using the aformentioned `opts.args.impl`
* branch above, and which we pass to the
* callback via the `opts.res_type` branch
* above (where res_type is set to 'array').
*/
if (opts.args.impl !== 'tryEach') {
nfunc.apply(null, nfunc_args);
} else {
nfunc(next.bind(null, current));
}
});
}
};
rv['operations'][0]['status'] = 'pending';
current = 0;
if (opts.args.impl !== 'pipeline') {
funcs[0](next.bind(null, current));
} else {
funcs[0](opts.args.uarg, next.bind(null, current));
}
return (rv);
}