四、Mongodb视图及pipline
1 视图
1.1 创建视图
基本语法
db.createView(
"<viewName>",
"<source>",
[<pipeline>],
{
"collation" : { <collation> }
}
)
#参数解释
<viewName> 必须,视图名称
<source> 必须,数据源,集合/视图
[<pipeline>] 可选,一组管道
"collation" 可选,排序规则
1.2 删除视图
db.视图名.drop()
1.3 例子
t2、t3表数据
> db.t2.find()
{ "_id" : ObjectId("60adbec5d15f5f92b4805399"), "id" : 1, "name" : "a" }
{ "_id" : ObjectId("60adbec5d15f5f92b480539a"), "id" : 2, "name" : "b" }
{ "_id" : ObjectId("60adbec5d15f5f92b480539b"), "id" : 3, "name" : "c" }
> db.t3.find()
{ "_id" : ObjectId("60adbf29d15f5f92b480539c"), "id" : 1, "age" : 10, "city" : "beijing" }
{ "_id" : ObjectId("60adbf29d15f5f92b480539d"), "id" : 2, "age" : 11, "city" : "shanghai" }
{ "_id" : ObjectId("60adbf29d15f5f92b480539e"), "id" : 3, "age" : 12, "city" : "dalian" }
{ "_id" : ObjectId("60b0602aac7d71a1301a1715"), "id" : 2, "age" : 20, "city" : "jilin" }
建立视图,基于t2
表的id
字段,查询t3
表id
字段相同的内容,t3
表查出的内容放到age&city
字段中。对于id=2
的数据,t3
中匹配多行,多行数据放到[ ]
中,分隔为2个{ }
db.createView(
"t2joint3",
"t2",
[
{
$lookup:
{
from:"t3",
localField:"id",
foreignField:"id",
as:"age&city"
}
},
{
$project:
{
"id":1,"name":1,"age&city.age":1,"age&city.city":1
}
}
]
)
查询视图内容
> db.t2joint3.find()
{ "_id" : ObjectId("60adbec5d15f5f92b4805399"), "id" : 1, "name" : "a", "age&city" : [ { "age" : 10, "city" : "beijing" } ] }
{ "_id" : ObjectId("60adbec5d15f5f92b480539a"), "id" : 2, "name" : "b", "age&city" : [ { "age" : 11, "city" : "shanghai" }, { "age" : 20, "city" : "jilin" } ] }
{ "_id" : ObjectId("60adbec5d15f5f92b480539b"), "id" : 3, "name" : "c", "age&city" : [ { "age" : 12, "city" : "dalian" } ] }
优化
使用$unwind
拆分age&city
字段中的列表
db.createView(
"t2joint3",
"t2",
[
{
$lookup:
{
from:"t3",
localField:"id",
foreignField:"id",
as:"age&city"
}
},
{
$project:
{
"id":1,"name":1,"age&city.age":1,"age&city.city":1
}
},
{
$unwind:
{
path:"$age&city"
}
}
]
)
查询视图内容
> db.t2joint3.find()
{ "_id" : ObjectId("60adbec5d15f5f92b4805399"), "id" : 1, "name" : "a", "age&city" : { "age" : 10, "city" : "beijing" } }
{ "_id" : ObjectId("60adbec5d15f5f92b480539a"), "id" : 2, "name" : "b", "age&city" : { "age" : 11, "city" : "shanghai" } }
{ "_id" : ObjectId("60adbec5d15f5f92b480539a"), "id" : 2, "name" : "b", "age&city" : { "age" : 20, "city" : "jilin" } }
{ "_id" : ObjectId("60adbec5d15f5f92b480539b"), "id" : 3, "name" : "c", "age&city" : { "age" : 12, "city" : "dalian" } }
2 pipeline管道
pipeline将多个文档中的值组合在一起,并可对分组数据执行各种操作,以返回单个结果,相当于SQL中的 count(*)与group by。
aggregation pipeline主要是用aggregate()方法来实现聚合操作。
pipeline通俗的理解就是一个管道,其中每一个操作就是管道的一个阶段,每次当前操作接受上一个阶段的输出作为输入,并把输出结果作为输入结果给下一个阶段。
语法如下:db.collection.aggregate( [ { $语法 }, … ] )
2.1 $project
$project用来指明返回值包含哪些列,包含的置1,不包含的置0(自定义列默认置0,_id列默认置1,需要手工置0)
原数据
> db.t3.find()
{ "_id" : ObjectId("60adbf29d15f5f92b480539c"), "id" : 1, "age" : 10, "city" : "beijing" }
{ "_id" : ObjectId("60adbf29d15f5f92b480539d"), "id" : 2, "age" : 11, "city" : "shanghai" }
{ "_id" : ObjectId("60adbf29d15f5f92b480539e"), "id" : 3, "age" : 12, "city" : "dalian" }
只取age列{$project:{"age":1,"_id":0}}
> db.t3.aggregate([{$project:{"age":1,"_id":0}}])
{ "age" : 10 }
{ "age" : 11 }
{ "age" : 12 }
2.2 $match
通过跟查询语句相比对,来过滤集合,只返回跟查询语句相匹配的行。
原数据
> db.t3.find()
{ "_id" : ObjectId("60adbf29d15f5f92b480539c"), "id" : 1, "age" : 10, "city" : "beijing" }
{ "_id" : ObjectId("60adbf29d15f5f92b480539d"), "id" : 2, "age" : 11, "city" : "shanghai" }
{ "_id" : ObjectId("60adbf29d15f5f92b480539e"), "id" : 3, "age" : 12, "city" : "dalian" }
只取age=10的行{$match:{"age":10}}
> db.t3.aggregate([{$match:{"age":10}}])
{ "_id" : ObjectId("60adbf29d15f5f92b480539c"), "id" : 1, "age" : 10, "city" : "beijing" }
只取age<11的行{$match:{"age":{$lt:11}}}
,其余比较符号$gt
、$eq
、$ne
> db.t3.aggregate([{$match:{"age":{$lt:11}}}])
{ "_id" : ObjectId("60adbf29d15f5f92b480539c"), "id" : 1, "age" : 10, "city" : "beijing" }
2.3 $limit
限制返回的数据条目数
原数据
> db.t3.find()
{ "_id" : ObjectId("60adbf29d15f5f92b480539c"), "id" : 1, "age" : 10, "city" : "beijing" }
{ "_id" : ObjectId("60adbf29d15f5f92b480539d"), "id" : 2, "age" : 11, "city" : "shanghai" }
{ "_id" : ObjectId("60adbf29d15f5f92b480539e"), "id" : 3, "age" : 12, "city" : "dalian" }
只取2行{$limit:2}
> db.t3.aggregate([{$limit:2}])
{ "_id" : ObjectId("60adbf29d15f5f92b480539c"), "id" : 1, "age" : 10, "city" : "beijing" }
{ "_id" : ObjectId("60adbf29d15f5f92b480539d"), "id" : 2, "age" : 11, "city" : "shanghai" }
2.4 $skip
跳过n行数据
原数据
> db.t3.find()
{ "_id" : ObjectId("60adbf29d15f5f92b480539c"), "id" : 1, "age" : 10, "city" : "beijing" }
{ "_id" : ObjectId("60adbf29d15f5f92b480539d"), "id" : 2, "age" : 11, "city" : "shanghai" }
{ "_id" : ObjectId("60adbf29d15f5f92b480539e"), "id" : 3, "age" : 12, "city" : "dalian" }
跳过2行{$skip:2}
> db.t3.aggregate([{$skip:2}])
{ "_id" : ObjectId("60adbf29d15f5f92b480539e"), "id" : 3, "age" : 12, "city" : "dalian" }
2.5 $sort
对所有的输入数据进行排序,并输出排序好的数据。
使用语法
{ $sort: { <field1>: <sort order>, <field2>: <sort order> ... } }
其中sort order可以取值为1,-1:
当sort order取值为1时,代表升序。
当sort order取值为-1时,代表降序。
原数据
> db.t3.find()
{ "_id" : ObjectId("60adbf29d15f5f92b480539c"), "id" : 1, "age" : 10, "city" : "beijing" }
{ "_id" : ObjectId("60adbf29d15f5f92b480539d"), "id" : 2, "age" : 11, "city" : "shanghai" }
{ "_id" : ObjectId("60adbf29d15f5f92b480539e"), "id" : 3, "age" : 12, "city" : "dalian" }
按照age列倒序排序{$sort:{"age":-1}}
> db.t3.aggregate([{$sort:{"age":-1}}])
{ "_id" : ObjectId("60adbf29d15f5f92b480539e"), "id" : 3, "age" : 12, "city" : "dalian" }
{ "_id" : ObjectId("60adbf29d15f5f92b480539d"), "id" : 2, "age" : 11, "city" : "shanghai" }
{ "_id" : ObjectId("60adbf29d15f5f92b480539c"), "id" : 1, "age" : 10, "city" : "beijing" }
2.6 $unwind
将数据行中含有列表内容的部分,拆分成一个一个数据显示
使用语法
{
$unwind:
{
path: <field path>,
includeArrayIndex: <string>,
preserveNullAndEmptyArrays: <boolean>
}
}
path:必须有,指明需要拆分的字段,一般为一个数组
includeArrayIndex: 可选,如果需要拆分开的数据包含数据下标,可以指定数组下标的字段名
preserveNullAndEmptyArrays: 可选,在数组为null、缺失、空的情况下,如果为true, $unwind 同样输出当前文档. 如果为false, $unwind不输出文档。默认是false.
原数据
> db.t4.find()
{ "_id" : ObjectId("60ae0de2bff5299b604005c6"), "id" : 1, "age" : 10, "list" : [ "a", "b", "c" ] }
{ "_id" : ObjectId("60ae0de2bff5299b604005c7"), "id" : 2, "age" : 11, "list" : [ ] }
{ "_id" : ObjectId("60ae0de2bff5299b604005c8"), "id" : 3, "age" : 12, "list" : null }
{ "_id" : ObjectId("60ae0ec0bff5299b604005c9"), "id" : 4, "age" : 13 }
对list列的数组进行拆分显示,并显示数组下标,下标字段名称为arrayIndex{ $unwind: { path: "$list", includeArrayIndex: "arrayIndex", } }
除第一行外,其余行list
字段为空、null、缺失,所以没有显示
> db.t4.aggregate([{ $unwind: { path: "$list", includeArrayIndex: "arrayIndex" } } ])
{ "_id" : ObjectId("60ae0de2bff5299b604005c6"), "id" : 1, "age" : 10, "list" : "a", "arrayIndex" : NumberLong(0) }
{ "_id" : ObjectId("60ae0de2bff5299b604005c6"), "id" : 1, "age" : 10, "list" : "b", "arrayIndex" : NumberLong(1) }
{ "_id" : ObjectId("60ae0de2bff5299b604005c6"), "id" : 1, "age" : 10, "list" : "c", "arrayIndex" : NumberLong(2) }
配置preserveNullAndEmptyArrays: true
> db.t4.aggregate([ { $unwind: { path: "$list", includeArrayIndex: "arrayIndex", preserveNullAndEmptyArrays: true } } ] )
{ "_id" : ObjectId("60ae0de2bff5299b604005c6"), "id" : 1, "age" : 10, "list" : "a", "arrayIndex" : NumberLong(0) }
{ "_id" : ObjectId("60ae0de2bff5299b604005c6"), "id" : 1, "age" : 10, "list" : "b", "arrayIndex" : NumberLong(1) }
{ "_id" : ObjectId("60ae0de2bff5299b604005c6"), "id" : 1, "age" : 10, "list" : "c", "arrayIndex" : NumberLong(2) }
{ "_id" : ObjectId("60ae0de2bff5299b604005c7"), "id" : 2, "age" : 11, "arrayIndex" : null }
{ "_id" : ObjectId("60ae0de2bff5299b604005c8"), "id" : 3, "age" : 12, "list" : null, "arrayIndex" : null }
{ "_id" : ObjectId("60ae0ec0bff5299b604005c9"), "id" : 4, "age" : 13, "arrayIndex" : null }
2.7 $lookup
将用localField
字段中的内容和from
集合中的foreignField
字段进行比较,如果比较相同,就将from
集合中的这条记录加入当前文档中,字段名为as
的值。
使用语法
{
$lookup:
{
from: <collection to join>,
localField: <field from the input documents>,
foreignField: <field from the documents of the "from" collection>,
as: <output array field>
}
}
原数据
> db.t2.find()
{ "_id" : ObjectId("60adbec5d15f5f92b4805399"), "id" : 1, "name" : "a" }
{ "_id" : ObjectId("60adbec5d15f5f92b480539a"), "id" : 2, "name" : "b" }
{ "_id" : ObjectId("60adbec5d15f5f92b480539b"), "id" : 3, "name" : "c" }
> db.t3.find()
{ "_id" : ObjectId("60adbf29d15f5f92b480539c"), "id" : 1, "age" : 10, "city" : "beijing" }
{ "_id" : ObjectId("60adbf29d15f5f92b480539d"), "id" : 2, "age" : 11, "city" : "shanghai" }
{ "_id" : ObjectId("60adbf29d15f5f92b480539e"), "id" : 3, "age" : 12, "city" : "dalian" }
基于t2
表的id
字段联合查询t3
表的id
字段,将结果合并显示,t3
的内容放在location
字段中,使用location.city
、location.age
控制只显示t3
中的city
和age
值
> db.t2.aggregate(
[
{
$lookup:
{
from: "t3",
localField: "id",
foreignField: "id",
as: "location"
}
},
{
$project:
{
"id":1,
"name":1,
"location.city":1,
"location.age":1
}
}
]
)
{ "_id" : ObjectId("60adbec5d15f5f92b4805399"), "id" : 1, "name" : "a", "location" : [ { "age" : 10, "city" : "beijing" } ] }
{ "_id" : ObjectId("60adbec5d15f5f92b480539a"), "id" : 2, "name" : "b", "location" : [ { "age" : 11, "city" : "shanghai" } ] }
{ "_id" : ObjectId("60adbec5d15f5f92b480539b"), "id" : 3, "name" : "c", "location" : [ { "age" : 12, "city" : "dalian" } ] }
2.8 $group
类似与mysql中的group by,指定数据根据某个字段分组。在$group
中我们是通过_id
来指定分组依据。
原数据
> db.t3.find()
{ "_id" : ObjectId("60adbf29d15f5f92b480539c"), "id" : 1, "age" : 10, "city" : "beijing" }
{ "_id" : ObjectId("60adbf29d15f5f92b480539d"), "id" : 2, "age" : 11, "city" : "shanghai" }
{ "_id" : ObjectId("60adbf29d15f5f92b480539e"), "id" : 3, "age" : 12, "city" : "dalian" }
{ "_id" : ObjectId("60b0602aac7d71a1301a1715"), "id" : 2, "age" : 20, "city" : "jilin" }
指定数据根据id
来进行分组,然后在每个分组中通过$sum函数来计算age
的总数,并将计算结果给了totalage
这个字段。
> db.t3.aggregate([
{
$group:{_id:"$id",
totalage:{$sum:"$age"}
}
}
])
{ "_id" : 1, "totalage" : 10 }
{ "_id" : 2, "totalage" : 31 }
{ "_id" : 3, "totalage" : 12 }