var MongoClient = require('mongodb').MongoClient,
assert = require('assert');
MongoClient.connect('mongodb://localhost:27017/crunchbase', function(err, db) {
assert.equal(err, null);
console.log("Successfully connected to MongoDB.");
var query = {
"category_code": "biotech"
};
db.collection('companies').find(query).toArray(function(err, docs) {
assert.equal(err, null);
assert.notEqual(docs.length, 0);
docs.forEach(function(doc) {
console.log(doc.name + " is a " + doc.category_code + " company.");
});
db.close();
});
});
Notice that the call .toArray
is making the application to fetch the entire dataset.
var MongoClient = require('mongodb').MongoClient,
assert = require('assert');
MongoClient.connect('mongodb://localhost:27017/crunchbase', function(err, db) {
assert.equal(err, null);
console.log("Successfully connected to MongoDB.");
var query = {
"category_code": "biotech"
};
var cursor = db.collection('companies').find(query);
function(doc) {
cursor.forEach(
console.log(doc.name + " is a " + doc.category_code + " company.");
},
function(err) {
assert.equal(err, null);
return db.close();
}
);
});
Notice that the cursor returned by the find()
is assigned to var cursor
. With this approach, instead of fetching all data in memory and consuming data at once, we're streaming the data to our application. find()
can create a cursor immediately because it doesn't actually make a request to the database until we try to use some of the documents it will provide. The point of cursor
is to describe our query. The 2nd parameter to cursor.forEach
shows what to do when the driver gets exhausted or an error occurs.
In the initial version of the above code, it was toArray()
which forced the database call. It meant we needed ALL the documents and wanted them to be in an array
.
Also, MongoDB
returns data in batch format. The image below shows, requests from cursors (from application) to MongoDB

forEach
is better than toArray
because we can process documents as they come in until we reach the end. Contrast it with toArray
- where we wait for ALL the documents to be retrieved and the entire array is built. This means we're not getting any advantage from the fact that the driver and the database system are working together to batch results to your application. Batching is meant to provide efficiency in terms of memory overhead and the execution time. Take advantage of it, if you can in your application.