WebMagic Pipeline - MySQL

在官方提供的Pipeline中没有MysqL的实现。今天在做数据抓取时需要把数据都保存到MysqL中。

首先自定义一个类实现Pipeline:

package com.lacerta.weiwei.news;

import com.lacerta.util.JdbcUtil;

import us.codecraft.webmagic.ResultItems;
import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.pipeline.Pipeline;

public class ZiXunMysqLPipeline implements Pipeline {

	@Override
	public void process(ResultItems resultItems,Task task) {

		String source_url = resultItems.getRequest().getUrl();
		String title = resultItems.get("title");
		String source = resultItems.get("source");
		String publish_time = resultItems.get("publish_time");
		String browse_times = resultItems.get("browse_times");
		String content = resultItems.get("content");
		String type = resultItems.get("type");

		String sql = "INSERT INTO t_news "
				+ "(source_url,title,content,source,publish_time,browse_times,type ) VALUES ( '" + //
				source_url.replace("'","\\\'") + "','" + //
				title.replace("'",'" + //
				content.replace("'",'" + //
				source.replace("'",'" + //
				publish_time.replace("'"," + //
				browse_times.replace("'","\\\'") + ",'" + //
				type.replace("'","\\\'") + "' );";
		System.out.println(sql);
		JdbcUtil.executesql(sql);
	}
}

JdbcUtil.executesql(sql);方法

package com.lacerta.util;

import java.io.IOException;
import java.io.InputStream;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.sql.Connection;
import java.sql.Date;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.sqlException;
import java.sql.Statement;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Properties;

public class JdbcUtil {

	private static String driver;
	private static String url;
	private static String username;
	private static String password;

	static {// 静态方法块,加载驱动

		InputStream is = JdbcUtil.class.getResourceAsstream("/driver.properties");

		Properties prop = new Properties();

		try {
			prop.load(is);
		} catch (IOException e1) {
			e1.printstacktrace();
		}
		driver = prop.getProperty("driver");
		url = prop.getProperty("url");
		username = prop.getProperty("username");
		password = prop.getProperty("password");
		try {
			Class.forName(driver);
		} catch (ClassNotFoundException e) {
			e.printstacktrace();
		}
	}

	public static void executesql(String sql) {

		Connection conn = getConn();
		PreparedStatement ps = null;
		try {
			ps = conn.prepareStatement(sql);

			ps.executeUpdate();

		} catch (sqlException e) {
			e.printstacktrace();
		} finally {
			close(conn,ps,null);
		}
	}

	private static Connection getConn() {
		Connection conn = null;
		try {
			conn = DriverManager.getConnection(url,username,password);
		} catch (sqlException e) {
			e.printstacktrace();
		}
		return conn;
	}

	private static void close(Connection conn,Statement stat,ResultSet rs) {
		if (rs != null)
			try {
				rs.close();
			} catch (sqlException e) {
				e.printstacktrace();
			}
		if (stat != null)
			try {
				stat.close();
			} catch (sqlException e) {
				e.printstacktrace();
			}
		if (conn != null)
			try {
				conn.close();
			} catch (sqlException e) {
				e.printstacktrace();
			}
	}
}

driver.properties

driver=com.MysqL.cj.jdbc.Driver
url=jdbc:MysqL://127.0.0.1:3306/tableName?serverTimezone=UTC
username={你自己的用户名}
password={你自己的密码}

## Driver如果使用这个com.MysqL.cj.jdbc包小的,url后要加上  ?serverTimezone=UTC

MysqL驱动包的Maven依赖:

<dependency>
	<groupId>MysqL</groupId>
	<artifactId>mysql-connector-java</artifactId>
	<version>6.0.4</version>
</dependency>

相关文章

迭代器模式(Iterator)迭代器模式(Iterator)[Cursor]意图...
高性能IO模型浅析服务器端编程经常需要构造高性能的IO模型,...
策略模式(Strategy)策略模式(Strategy)[Policy]意图:定...
访问者模式(Visitor)访问者模式(Visitor)意图:表示一个...
命令模式(Command)命令模式(Command)[Action/Transactio...
生成器模式(Builder)生成器模式(Builder)意图:将一个对...