Hello, devs 👋
In this blog, we will learn the basics of the MongoDB aggregation framework to filter, sort, group, and transform our MongoDB results. MongoDB helps us to do all these operations through aggregation pipelines which are a series of operations that process data documents sequentially.
For practice, we can use - Mongo playground
Input docs
[
{
"key": 1,
username: "saurabh",
age: 18,
languages: [
"c",
"c++"
]
},
{
"key": 2,
username: "leonord",
age: 22,
languages: [
"c",
"c++",
"java"
]
},
{
"key": 3,
username: "sheldon",
age: 14,
languages: [
"c",
"c++",
"java",
"python"
]
},
{
"key": 4,
username: "howard",
age: 32,
languages: [
"c",
"c++",
"java",
"python",
"dart"
]
},
{
"key": 5,
username: "raj",
age: 5,
languages: [
"c",
"c++",
"java",
"python",
"dart",
"ts"
]
}
]
1. $group aggregation = used for grouping and summarizing documents. We must specify an _id
field with a valid expression.
Query
db.collection.aggregate([
{
$group: {
_id: "table_stats",
// Get count of all docs in the collection
count: {
$sum: 1
},
// Get age stats by grouping age field
avgAge: {
$avg: "$age"
},
maxAge: {
$max: "$age"
},
minAge: {
$min: "$age"
},
sumAge: {
$sum: "$age"
},
// Get all usernames by grouping username field
allUsernames: {
$push: "$username"
},
// Get username of first doc
firstUsername: {
$first: "$username"
},
// Get username of last doc
lastUsername: {
$last: "$username"
}
}
}
])
Result
[
{
"_id": "table_stats",
"allUsernames": [
"saurabh",
"leonord",
"sheldon",
"howard",
"raj"
],
"avgAge": 18.2,
"count": 5,
"firstUsername": "saurabh",
"lastUsername": "raj",
"maxAge": 32,
"minAge": 5,
"sumAge": 91
}
]
2. $match aggregation = This is used to reduce the number of docs in the result by filtering.
Query
// Match all docs where `age` is greater than 20 or equal to 20
db.collection.aggregate([
{
"$match": {
age: {
$gte: 20
}
}
}
])
Result
[
{
"_id": ObjectId("5a934e000102030405000001"),
"age": 22,
"key": 2,
"languages": [
"c",
"c++",
"java"
],
"username": "leonord"
},
{
"_id": ObjectId("5a934e000102030405000003"),
"age": 32,
"key": 4,
"languages": [
"c",
"c++",
"java",
"python",
"dart"
],
"username": "howard"
}
]
Query
// Match all docs that have languages either `python` or `dart` or both
db.collection.aggregate([
{
"$match": {
languages: {
$in: [
"python",
"dart"
]
}
}
}
])
Result
[
{
"_id": ObjectId("5a934e000102030405000002"),
"age": 14,
"key": 3,
"languages": [
"c",
"c++",
"java",
"python"
],
"username": "sheldon"
},
{
"_id": ObjectId("5a934e000102030405000003"),
"age": 32,
"key": 4,
"languages": [
"c",
"c++",
"java",
"python",
"dart"
],
"username": "howard"
},
{
"_id": ObjectId("5a934e000102030405000004"),
"age": 5,
"key": 5,
"languages": [
"c",
"c++",
"java",
"python",
"dart",
"ts"
],
"username": "raj"
}
]
Query
// Match all docs with username `saurabh`
db.collection.aggregate([
{
"$match": {
username: {
$eq: "saurabh"
}
}
}
])
Result
[
{
"_id": ObjectId("5a934e000102030405000000"),
"age": 18,
"key": 1,
"languages": [
"c",
"c++"
],
"username": "saurabh"
}
]
Some of the match operators
-
$gte
= Matches if values are greater or equal to the given value. -
$lte
= Matches if values are less or equal to the given value. -
$lt
= Matches if values are less than the given value. -
$gt
= Matches if values are greater than the given value. -
$eq
= Matches values that are equal to the given value. -
$ne
= Matches values that are not equal to the given value. -
$in
= Matches any of the values in an array. -
$nin
= Matches none of the values specified in an array.
3. $skip and $limit aggregation = $skip takes a positive integer that specifies the maximum number of documents to skip. $limit limits the number of documents to look at, by the given number starting from the current positions.
Without skip and limit
Query
// Get all docs with username lexicographically less than or equal to "saurabh"
db.collection.aggregate([
{
"$match": {
username: {
$lte: "saurabh"
}
}
},
// ignore this aggregation, for now, we'll look into it later
{
$project: {
"languages": 0,
"key": 0,
"id": 0
}
}
])
Result
[
{
"_id": ObjectId("5a934e000102030405000000"),
"age": 18,
"username": "saurabh"
},
{
"_id": ObjectId("5a934e000102030405000001"),
"age": 22,
"username": "leonord"
},
{
"_id": ObjectId("5a934e000102030405000003"),
"age": 32,
"username": "howard"
},
{
"_id": ObjectId("5a934e000102030405000004"),
"age": 5,
"username": "raj"
}
]
Without skip = 1 and limit = 2
Query
// Skip the first doc ($skip) and return next 2 docs ($limit)
db.collection.aggregate([
{
"$match": {
username: {
$lte: "saurabh"
}
}
},
{
$skip: 1
},
{
$limit: 2
},
// ignore this aggregation for now
{
$project: {
"languages": 0,
"key": 0,
"id": 0
}
}
])
Result
[
{
"_id": ObjectId("5a934e000102030405000001"),
"age": 22,
"username": "leonord"
},
{
"_id": ObjectId("5a934e000102030405000003"),
"age": 32,
"username": "howard"
}
]
4. $sort aggregation = Sorts all input documents and returns them to the pipeline in sorted order.
1 = sort ascending, -1 = sort descending.
Query
// Get all the docs sorted in ascending order on the `age` field
db.collection.aggregate([
{
$sort: {
age: 1
}
},
// ignore this aggregation for now
{
$project: {
"languages": 0,
"key": 0,
"id": 0
}
}
])
Result
[
{
"_id": ObjectId("5a934e000102030405000003"),
"age": 32,
"username": "howard"
},
{
"_id": ObjectId("5a934e000102030405000001"),
"age": 22,
"username": "leonord"
},
{
"_id": ObjectId("5a934e000102030405000000"),
"age": 18,
"username": "saurabh"
},
{
"_id": ObjectId("5a934e000102030405000002"),
"age": 14,
"username": "sheldon"
},
{
"_id": ObjectId("5a934e000102030405000004"),
"age": 5,
"username": "raj"
}
]
*5. $unwind aggregation * = This is used to unwind documents that are using arrays.
Query
db.collection.aggregate([
{
$unwind: "$languages"
},
])
Result
[
{
"_id": ObjectId("5a934e000102030405000000"),
"age": 18,
"key": 1,
"languages": "c",
"username": "saurabh"
},
{
"_id": ObjectId("5a934e000102030405000000"),
"age": 18,
"key": 1,
"languages": "c++",
"username": "saurabh"
},
{
"_id": ObjectId("5a934e000102030405000001"),
"age": 22,
"key": 2,
"languages": "c",
"username": "leonord"
},
{
"_id": ObjectId("5a934e000102030405000001"),
"age": 22,
"key": 2,
"languages": "c++",
"username": "leonord"
},
{
"_id": ObjectId("5a934e000102030405000001"),
"age": 22,
"key": 2,
"languages": "java",
"username": "leonord"
},
{
"_id": ObjectId("5a934e000102030405000002"),
"age": 14,
"key": 3,
"languages": "c",
"username": "sheldon"
},
{
"_id": ObjectId("5a934e000102030405000002"),
"age": 14,
"key": 3,
"languages": "c++",
"username": "sheldon"
},
{
"_id": ObjectId("5a934e000102030405000002"),
"age": 14,
"key": 3,
"languages": "java",
"username": "sheldon"
},
{
"_id": ObjectId("5a934e000102030405000002"),
"age": 14,
"key": 3,
"languages": "python",
"username": "sheldon"
},
{
"_id": ObjectId("5a934e000102030405000003"),
"age": 32,
"key": 4,
"languages": "c",
"username": "howard"
},
{
"_id": ObjectId("5a934e000102030405000003"),
"age": 32,
"key": 4,
"languages": "c++",
"username": "howard"
},
{
"_id": ObjectId("5a934e000102030405000003"),
"age": 32,
"key": 4,
"languages": "java",
"username": "howard"
},
{
"_id": ObjectId("5a934e000102030405000003"),
"age": 32,
"key": 4,
"languages": "python",
"username": "howard"
},
{
"_id": ObjectId("5a934e000102030405000003"),
"age": 32,
"key": 4,
"languages": "dart",
"username": "howard"
},
{
"_id": ObjectId("5a934e000102030405000004"),
"age": 5,
"key": 5,
"languages": "c",
"username": "raj"
},
{
"_id": ObjectId("5a934e000102030405000004"),
"age": 5,
"key": 5,
"languages": "c++",
"username": "raj"
},
{
"_id": ObjectId("5a934e000102030405000004"),
"age": 5,
"key": 5,
"languages": "java",
"username": "raj"
},
{
"_id": ObjectId("5a934e000102030405000004"),
"age": 5,
"key": 5,
"languages": "python",
"username": "raj"
},
{
"_id": ObjectId("5a934e000102030405000004"),
"age": 5,
"key": 5,
"languages": "dart",
"username": "raj"
},
{
"_id": ObjectId("5a934e000102030405000004"),
"age": 5,
"key": 5,
"languages": "ts",
"username": "raj"
}
]
Query
db.collection.aggregate([
{
$unwind: "$languages"
},
{
$match: {
username: "saurabh"
}
}
])
Result
[
{
"_id": ObjectId("5a934e000102030405000000"),
"age": 18,
"key": 1,
"languages": "c",
"username": "saurabh"
},
{
"_id": ObjectId("5a934e000102030405000000"),
"age": 18,
"key": 1,
"languages": "c++",
"username": "saurabh"
}
]
6. $project aggregation = Get some specific fields from a collection by giving the keys values as 0 (exclude) or 1 (include)
Basic Query
db.collection.aggregate([
{
$project: {
username: 1,
languages: 1
}
},
{
$unwind: "$languages"
},
{
$match: {
username: "saurabh"
}
}
])
Result
[
{
"_id": ObjectId("5a934e000102030405000000"),
"languages": "c",
"username": "saurabh"
},
{
"_id": ObjectId("5a934e000102030405000000"),
"languages": "c++",
"username": "saurabh"
}
]
Query with update column names
db.collection.aggregate([
{
$project: {
"system_username": "$username",
"system_languages": "$languages"
}
},
{
$unwind: "$system_languages"
},
{
$match: {
system_username: "saurabh"
}
}
])
Result
[
{
"_id": ObjectId("5a934e000102030405000000"),
"system_languages": "c",
"system_username": "saurabh"
},
{
"_id": ObjectId("5a934e000102030405000000"),
"system_languages": "c++",
"system_username": "saurabh"
}
]
Follow for more cool articles
Thanks 😎
Top comments (0)