其实还有更新的结论是,在新的MYSQL版本中BLOB字段并不是性能损失的原因。更具体的原因,下次将我理解的MYSQL索引的过程写下来
———————————-2015-11-19以上—————————————
最近公司项目中遇到了性能问题,是关于BLOB字段带来的性能损失。
首先在数据库中创建了这么两个表
CREATE TABLE `icsp_alarm_summarieswith` (
`code` char(12) NOT NULL,
`org_id` int(11) DEFAULT '10000',
`schema_code` varchar(12) DEFAULT NULL,
`task_code` varchar(12) DEFAULT NULL,
`task_detail_code` varchar(12) DEFAULT NULL,
`title` varchar(128) DEFAULT NULL,
`alarm_time` datetime DEFAULT NULL,
`category0` int(11) DEFAULT '0',
`category1` int(11) DEFAULT '0',
`similar` int(11) DEFAULT '0',
`alarm_category` int(11) DEFAULT '1',
`alarm_level` int(11) DEFAULT '1',
`alarm_resp` varchar(12) DEFAULT NULL,
`site_code` varchar(10) DEFAULT NULL,
`url` varchar(512) DEFAULT NULL,
`url_md5` varchar(32) DEFAULT NULL,
`content` longblob,
`content_length` bigint(20) DEFAULT '0',
`content_summary` varchar(256) DEFAULT NULL,
`content_type` varchar(64) DEFAULT NULL,
`content_md5` varchar(32) DEFAULT NULL,
`description` varchar(128) DEFAULT NULL,
`state` int(11) DEFAULT '0',
PRIMARY KEY (`code`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
CREATE TABLE `icsp_alarm_summarieswithout` (
`code` char(12) NOT NULL,
`org_id` int(11) DEFAULT '10000',
`schema_code` varchar(12) DEFAULT NULL,
`task_code` varchar(12) DEFAULT NULL,
`task_detail_code` varchar(12) DEFAULT NULL,
`title` varchar(128) DEFAULT NULL,
`alarm_time` datetime DEFAULT NULL,
`category0` int(11) DEFAULT '0',
`category1` int(11) DEFAULT '0',
`similar` int(11) DEFAULT '0',
`alarm_category` int(11) DEFAULT '1',
`alarm_level` int(11) DEFAULT '1',
`alarm_resp` varchar(12) DEFAULT NULL,
`site_code` varchar(10) DEFAULT NULL,
`url` varchar(512) DEFAULT NULL,
`url_md5` varchar(32) DEFAULT NULL,
`content_length` bigint(20) DEFAULT '0',
`content_summary` varchar(256) DEFAULT NULL,
`content_type` varchar(64) DEFAULT NULL,
`content_md5` varchar(32) DEFAULT NULL,
`description` varchar(128) DEFAULT NULL,
`state` int(11) DEFAULT '0',
PRIMARY KEY (`code`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
接着分别插入500W的数据,其中的时间字段我们设为在两年内随机,插入我们采用JDBC进行插入
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.Types;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.*;
/**
* 创建者: Michaelssss
* 创建时间: 15-11-6
* 功能名字:
*/
public class InsertMutiple {
public static void main(String[] args) {
InsertMutiple t = new InsertMutiple();
t.fuck();
t.setStartTime();
}
public static int count = 100;
private void fuck() {
System.out.println("插入数据量 :" + count * 50000);
for (int thread = 0, ThreadNum = count; thread < ThreadNum; thread++) {
MutipltInsertin t = new MutipltInsertin(thread * 50000, thread * 50000 + 50000, "第" + (thread + 1) + "个线程");
new Thread(t).start();
}
}
public synchronized void subCount() {
count--;
if(count==0){
long endTime = System.currentTimeMillis();
long startTime = getStarttime();
System.out.println("insert " + 10 * 500000 + " cols cast:" + ((endTime - startTime) / 1000) + "s");
System.out.println("平均插入条数:" + (10*500000 / ((endTime - startTime) / 1000)) + "条/秒");
}
}
private long starttime;
public void setStartTime(){
starttime = System.currentTimeMillis();
}
public long getStarttime(){
return this.starttime;
}
public synchronized int getCount() {
return count;
}
public class MutipltInsertin implements Runnable {
private int begin, end;
private String processName;
MutipltInsertin(int begin, int end, String processName) {
this.begin = begin;
this.end = end;
this.processName = processName;
}
private String genRandomDate(){
GregorianCalendar now = new GregorianCalendar();
int random;
Random random1 = new Random();
random = random1.nextInt(365);
now.add(Calendar.DAY_OF_YEAR,random);
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:ss",Locale.US);
return simpleDateFormat.format(now.getTime());
}
public void run() {
String url = "jdbc:mysql://localhost:3306/icsp_nutchdb?"
+ "user=smmcuser&password=smmcpwd&useUnicode=true&characterEncoding=UTF8";
try {
new com.mysql.jdbc.Driver();
System.out.println(this.processName + "开始运行");
Connection conn = DriverManager.getConnection(url);
PreparedStatement ps = conn.prepareStatement("INSERT INTO `icsp_nutchdb`.`icsp_alarm_summarieswithblob`" +
"(`code`," +
"`org_id`," +
"`schema_code`," +
"`task_code`," +
"`task_detail_code`," +
"`title`," +
"`alarm_time`," +
"`category0`," +
"`category1`," +
"`similar`," +
"`alarm_category`," +
"`alarm_level`," +
"`alarm_resp`," +
"`site_code`," +
"`url`," +
"`url_md5`," +
"`content`,"+
"`content_length`," +
"`content_summary`," +
"`content_type`," +
"`content_md5`," +
"`description`," +
"`state`)" +
"VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)");
File file = new File("/home/michaelssss/CODE/MySQLBenckmen/src/LCA_HandbookThumbnail.jpg");
InputStream in = new BufferedInputStream(new FileInputStream(file));
for (; begin < end; ) {
for (int inloop = 0; inloop < 500; inloop++) {
ps.setString(1, String.valueOf(begin));
ps.setString(2, "21");
ps.setString(3, "000005F62F27");
ps.setString(4, "000005F60973");
ps.setString(5, "5F6081103E7B");
ps.setNull(6, Types.CHAR);
ps.setString(7, genRandomDate());
ps.setString(8, "0");
ps.setString(9, "0");
ps.setString(10, "0");
ps.setString(11, "2");
ps.setString(12, "1");
ps.setNull(13, Types.CHAR);
ps.setNull(14, Types.CHAR);
ps.setString(15, "http://www.nliaschool.org/images/LCA_HandbookThumbnail.jpg");
ps.setString(16, "52B7BD3BB1AC665A6B4A3448307376E2");
ps.setBinaryStream(17,in,(int)file.length());
ps.setString(18, "8879");
ps.setNull(19, Types.CHAR);
ps.setString(20, "image/jpeg");
ps.setString(21, "910175A39FC41885DBD2CFE9ACD5A9BA");
ps.setNull(22, Types.CHAR);
ps.setString(23, "1");
ps.addBatch();
begin++;
}
ps.executeBatch();
System.out.println("Insert the Batch " + begin);
}
in.close();
subCount();
System.out.println(this.processName + "结束");
} catch (
Exception e
)
{
System.out.println(e);
}
}
}
}
经过了硬盘的狂转,接下来就是测试了,以下是测试的代码:
import javax.sql.RowSet;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.Statement;
import java.text.SimpleDateFormat;
import java.util.*;
/**
* 创建者: Michaelssss
* 创建时间: 15-11-6
* 功能名字:
*/
public class ForSelectSingle {
public static void main(String[] agrs) throws Exception{
int loops=10;
ForSelectSingle f = new ForSelectSingle();
System.out.println("单线程读取:");
System.out.println("测试次数:"+loops+"次");
long totalTimeWithblob = 0;
long totalTimeWithoutblob = 0;
com.mysql.jdbc.Driver driver = new com.mysql.jdbc.Driver();
String url = "jdbc:mysql://localhost:3306/icsp_nutchdb?"
+ "user=&password=&useUnicode=true&characterEncoding=UTF8";
Connection conn = DriverManager.getConnection(url);
String date = f.genRandomDate();
System.out.println("Date : "+date );
for(int loop=0;loop<loops;loop++) {
System.out.println("The " + loop + " loop");
long startTime = System.currentTimeMillis();
f.readinicsp_alarm_summaries(conn,date);
long endTime = System.currentTimeMillis();
System.out.println("读取50万条数据用时" + (endTime - startTime) / 1000.0 + "s,in icsp_alarm_summaries with blob");
System.out.println("读取能力:"+50*10000/((endTime - startTime) / 1000.0)+"条/秒");
totalTimeWithblob += endTime-startTime;
startTime = System.currentTimeMillis();
f.readinicsp_alarm_summarieswithoutblob(conn,date);
endTime = System.currentTimeMillis();
System.out.println("读取50万条数据用时" + (endTime - startTime) / 1000.0 + "s,in icsp_alarm_summaries without blob");
System.out.println("读取能力:"+50*10000/((endTime - startTime) / 1000.0)+"条/秒");
totalTimeWithoutblob += endTime-startTime;
}
conn.close();
System.out.println(" 平均用时,带blob :"+totalTimeWithblob/(1000.0*loops)+" s");
System.out.println(" 平均用时,不带blob :"+totalTimeWithoutblob/(1000.0*loops)+" s");
}
private static int ROWS=5000000;
private String genRandomDate(){
GregorianCalendar now = new GregorianCalendar();
int random;
Random random1 = new Random();
random = random1.nextInt(365);
now.add(Calendar.DAY_OF_YEAR,random);
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:ss", Locale.US);
return simpleDateFormat.format(now.getTime());
}
private void readinicsp_alarm_summaries(Connection conn,String date){
try {
Statement statement = conn.createStatement();
String sql = "select SQL_NO_CACHE * from icsp_nutchdb.icsp_alarm_summarieswithblob where alarm_time<'"+date+"'";
ResultSet resultSet = statement.executeQuery(sql);
sql = "UPDATE icsp_nutchdb.icsp_alarm_summarieswithblob SET alarm_time=now() WHERE code=1 ";
//statement.execute(sql);
statement.close();
} catch (Exception e) {
System.out.println(e);
}
}
private void readinicsp_alarm_summarieswithoutblob(Connection conn,String date){
try {
Statement statement = conn.createStatement();
String sql = "select SQL_NO_CACHE * from icsp_nutchdb.icsp_alarm_summarieswithoutblob where alarm_time<'"+date+"'";
ResultSet resultSet = statement.executeQuery(sql);
sql = "UPDATE icsp_nutchdb.icsp_alarm_summarieswithoutblob SET alarm_time=now() WHERE code=1 ";
//statement.execute(sql);
statement.close();
} catch (Exception e) {
System.out.println(e);
}
}
}
测试语句 :select SQL_NO_CACHE count(*),now() from icsp_nutchdb.icsp_alarm_summarieswithoutblob
为了追求测试结果接近真实情况,
每次读取之后更新其中一个字段,强制更新索引和缓存
单线程读取:
测试次数:10次
平均用时,带blob23.4804 s
平均用时,不带blob19.9576 s
测试语句 :select SQL_NO_CACHE *,now() from icsp_nutchdb.icsp_alarm_summarieswithoutblob where code<500000
单线程读取:
测试次数:10次
平均用时,带blob 79.9929 s
平均用时,不带blob 62.1815 s
问题:
在详细观察每轮测试的数据发现以上平均值基本是没有任何意义的,每轮测试的时间基本都会有很大的波动,无论是否带blob字段,表的数据量只要在500W以上查询时间都是不可忍受的。(>10S~15S)
分析:
时间波动在我去掉每次的查询完后的更新操作会消失,同时查询时间会极大极大的缩短。
对于已经索引的查询,是否带blob在500W的数据量上从查询时间平均值上看,不带blob的要好一点,但前面已经提到,这些数据波动太大,并不可信。
结论:
1.可不带blob字段就不带,就算使用请打开压缩模式使其能够被高效索引;
2.尽量减少表的大小,使其行数低于100W;
3.尽量减少对同一个表读写同时操作;
4.如果要读写同一个表,那么尽可能让这个表变得小。