1.准备环境
1.1 mongodb下载
1.2 mongodb启动
C:\mongodb\bin\mongod --dbpath D:\mongodb\data
1.3 可视化mongo工具Robo 3T下载
2.准备数据
org.mongodb
mongo-java-driver
3.6.1
java代码执行
</span><span style="color: #0000ff;">try</span><span style="color: #000000;"> {
</span><span style="color: #008000;">/**</span><span style="color: #008000;">** Connect to MongoDB ***</span><span style="color: #008000;">*/</span>
<span style="color: #008000;">//</span><span style="color: #008000;"> Since 2.10.0,uses MongoClient</span>
MongoClient mongo = <span style="color: #0000ff;">new</span> MongoClient("localhost",27017<span style="color: #000000;">);
</span><span style="color: #008000;">/**</span><span style="color: #008000;">** Get database ***</span><span style="color: #008000;">*/</span>
<span style="color: #008000;">//</span><span style="color: #008000;"> if database doesn't exists,MongoDB will create it for you</span>
DB db = mongo.getDB("www"<span style="color: #000000;">);
</span><span style="color: #008000;">/**</span><span style="color: #008000;">** Get collection / table from 'testdb' ***</span><span style="color: #008000;">*/</span>
<span style="color: #008000;">//</span><span style="color: #008000;"> if collection doesn't exists,MongoDB will create it for you</span>
DBCollection table = db.getCollection("person"<span style="color: #000000;">);
</span><span style="color: #008000;">/**</span><span style="color: #008000;">** Insert ***</span><span style="color: #008000;">*/</span>
<span style="color: #008000;">//</span><span style="color: #008000;"> create a document to store key and value</span>
BasicDBObject document=<span style="color: #0000ff;">null</span><span style="color: #000000;">;
</span><span style="color: #0000ff;">for</span>(<span style="color: #0000ff;">int</span> i=0;i<100000000;i++<span style="color: #000000;">) {
document </span>= <span style="color: #0000ff;">new</span><span style="color: #000000;"> BasicDBObject();
document.put(</span>"name","mkyong"+<span style="color: #000000;">i);
document.put(</span>"age",30<span style="color: #000000;">);
document.put(</span>"sex","f"<span style="color: #000000;">);
table.insert(document);
}
</span><span style="color: #008000;">/**</span><span style="color: #008000;">** Done ***</span><span style="color: #008000;">*/</span><span style="color: #000000;">
System.out.println(</span>"Done"<span style="color: #000000;">);
} </span><span style="color: #0000ff;">catch</span><span style="color: #000000;"> (UnknownHostException e) {
e.printStackTrace();
} </span><span style="color: #0000ff;">catch</span><span style="color: #000000;"> (MongoException e) {
e.printStackTrace();
}
}</span></pre>
3.分页查询
传统的limit方式当数据量较大时查询缓慢,不太适用。考虑别的方式,参考了logstash-input-mongodb的思路:
=
collection.find({:_id => {:$gt =>
collection_name </span>=<span style="color: #000000;"> collection[:name]
@logger.debug(</span><span style="color: #800000;">"</span><span style="color: #800000;">collection_data is: #{@collection_data}</span><span style="color: #800000;">"</span><span style="color: #000000;">)
last_id </span>=<span style="color: #000000;"> @collection_data[index][:last_id]
</span><span style="color: #008000;">#</span><span style="color: #008000;">@logger.debug("last_id is #{last_id}",:index => index,:collection => collection_name)</span>
<span style="color: #008000;">#</span><span style="color: #008000;"> get batch of events starting at the last_place if it is set</span>
<span style="color: #000000;">
last_id_object </span>=<span style="color: #000000;"> last_id
</span><span style="color: #0000ff;">if</span> since_type == <span style="color: #800000;">'</span><span style="color: #800000;">id</span><span style="color: #800000;">'</span><span style="color: #000000;">
last_id_object </span>=<span style="color: #000000;"> BSON::ObjectId(last_id)
elsif since_type </span>== <span style="color: #800000;">'</span><span style="color: #800000;">time</span><span style="color: #800000;">'</span>
<span style="color: #0000ff;">if</span> last_id != <span style="color: #800000;">''</span><span style="color: #000000;">
last_id_object </span>=<span style="color: #000000;"> Time.at(last_id)
end
end
cursor </span>= get_cursor_for_collection(@mongodb,collection_name,batch_size)</pre>
使用java实现
<span style="color: #0000ff;">import<span style="color: #000000;"> org.bson.types.ObjectId;
<span style="color: #0000ff;">import<span style="color: #000000;"> com.mongodb.BasicDBObject;
<span style="color: #0000ff;">import<span style="color: #000000;"> com.mongodb.DB;
<span style="color: #0000ff;">import<span style="color: #000000;"> com.mongodb.DBCollection;
<span style="color: #0000ff;">import<span style="color: #000000;"> com.mongodb.DBCursor;
<span style="color: #0000ff;">import<span style="color: #000000;"> com.mongodb.DBObject;
<span style="color: #0000ff;">import<span style="color: #000000;"> com.mongodb.MongoClient;
<span style="color: #0000ff;">import<span style="color: #000000;"> com.mongodb.MongoException;
<span style="color: #0000ff;">import<span style="color: #000000;"> com.mongodb.DB;
<span style="color: #0000ff;">import<span style="color: #000000;"> com.mongodb.DBCollection;
<span style="color: #0000ff;">import<span style="color: #000000;"> com.mongodb.DBCursor;
<span style="color: #0000ff;">import<span style="color: #000000;"> com.mongodb.DBObject;
<span style="color: #0000ff;">import<span style="color: #000000;"> com.mongodb.MongoClient;
<span style="color: #0000ff;">import<span style="color: #000000;"> com.mongodb.MongoException;
<span style="color: #0000ff;">public <span style="color: #0000ff;">class<span style="color: #000000;"> Test {
</span><span style="color: #0000ff;">public</span> <span style="color: #0000ff;">static</span> <span style="color: #0000ff;">void</span><span style="color: #000000;"> main(String[] args) {
</span><span style="color: #0000ff;">int</span> pageSize=50000<span style="color: #000000;">;
</span><span style="color: #0000ff;">try</span><span style="color: #000000;"> {
</span><span style="color: #008000;">/**</span><span style="color: #008000;">** Connect to MongoDB ***</span><span style="color: #008000;">*/</span>
<span style="color: #008000;">//</span><span style="color: #008000;"> Since 2.10.0,MongoDB will create it for you</span>
DBCollection table = db.getCollection("person"<span style="color: #000000;">);
DBCursor dbObjects;
Long cnt</span>=<span style="color: #000000;">table.count();
</span><span style="color: #008000;">//</span><span style="color: #008000;">System.out.println(table.getStats());</span>
Long page=<span style="color: #000000;">getPageSize(cnt,pageSize);
ObjectId lastIdObject</span>=<span style="color: #0000ff;">new</span> ObjectId("5bda8f66ef2ed979bab041aa"<span style="color: #000000;">);
</span><span style="color: #0000ff;">for</span>(Long i=0L;i<page;i++<span style="color: #000000;">) {
Long start</span>=<span style="color: #000000;">System.currentTimeMillis();
dbObjects</span>=<span style="color: #000000;">getCursorForCollection(table,lastIdObject,pageSize);
System.out.println(</span>"第"+(i+1)+"次查询,耗时:"+(System.currentTimeMillis()-start)/1000+"秒"<span style="color: #000000;">);
List</span><DBObject> objs=<span style="color: #000000;">dbObjects.toArray();
lastIdObject</span>=(ObjectId) objs.get(objs.size()-1).get("_id"<span style="color: #000000;">);
}
} </span><span style="color: #0000ff;">catch</span><span style="color: #000000;"> (UnknownHostException e) {
e.printStackTrace();
} </span><span style="color: #0000ff;">catch</span><span style="color: #000000;"> (MongoException e) {
e.printStackTrace();
}
}
</span><span style="color: #0000ff;">public</span> <span style="color: #0000ff;">static</span> DBCursor getCursorForCollection(DBCollection collection,ObjectId lastIdObject,<span style="color: #0000ff;">int</span><span style="color: #000000;"> pageSize) {
DBCursor dbObjects</span>=<span style="color: #0000ff;">null</span><span style="color: #000000;">;
</span><span style="color: #0000ff;">if</span>(lastIdObject==<span style="color: #0000ff;">null</span><span style="color: #000000;">) {
lastIdObject</span>=(ObjectId) collection.findOne().get("_id"<span style="color: #000000;">); //TODO 排序sort取第一个,否则可能丢失数据
}
BasicDBObject query</span>=<span style="color: #0000ff;">new</span><span style="color: #000000;"> BasicDBObject();
query.append(</span>"_id",<span style="color: #0000ff;">new</span> BasicDBObject("$gt"<span style="color: #000000;">,lastIdObject));
BasicDBObject sort</span>=<span style="color: #0000ff;">new</span><span style="color: #000000;"> BasicDBObject();
sort.append(</span>"_id",1<span style="color: #000000;">);
dbObjects</span>=<span style="color: #000000;">collection.find(query).limit(pageSize).sort(sort);
</span><span style="color: #0000ff;">return</span><span style="color: #000000;"> dbObjects;
}
</span><span style="color: #0000ff;">public</span> <span style="color: #0000ff;">static</span> Long getPageSize(Long cnt,<span style="color: #0000ff;">int</span><span style="color: #000000;"> pageSize) {
</span><span style="color: #0000ff;">return</span> cnt%pageSize==0?cnt/pageSize:cnt/pageSize+1<span style="color: #000000;">;
}
}
4.一些经验教训
1. 不小心漏打了一个$符号,导致查询不到数据,浪费了一些时间去查找原因
query.append("_id",new BasicDBObject("$gt",lastIdObject)); 2.创建索引 创建普通的单列索引:db.collection.ensureIndex({field:1/-1}); 1是升续 -1是降续 实例:db.articles.ensureIndex({title:1}) //注意 field 不要加""双引号,否则创建不成功 查看当前索引状态: db.collection.getIndexes(); 实例: db.articles.getIndexes(); 删除单个索引db.collection.dropIndex({filed:1/-1});
3.执行计划
db.student.find({"name":"dd1"}).explain()
参考文献:
【1】https://github.com/phutchins/logstash-input-mongodb/blob/master/lib/logstash/inputs/mongodb.rb
【2】https://www.cnblogs.com/yxlblogs/p/4930308.html
【3】https://docs.mongodb.com/manual/reference/method/db.collection.ensureIndex/