利用正则表达式读取txt文件中的邮箱,电话号码,url地址,手机号,将数据一行一个保存到一个新的文件中去

本人最近整合了从文本中提取信息,将之保存的新的文本中,在此处用到的是正则表达式,希望大家一起学习.

文件操作类:fileOperation.java

package dyx_13;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.OutputStreamWriter;

public class fileOperation {

// 创建文件,检测文件是否创建
public boolean crateFile(File fileName) {
boolean flag = false;
try {
if(!fileName.exists()) {
fileName.createNewFile();// 文件没有出现创建新文件
flag = true;
}
}catch(Exception e) {
e.printstacktrace();
}
return true;
}

// 读取txt文件
public static String readTxtFile(File fileName) {
StringBuffer str = new StringBuffer();
String result = null;
FileReader fileReader = null;// 读取文件
BufferedReader bufferReader = null;// 读取缓冲流
try {
fileReader = new FileReader(fileName);
bufferReader = new BufferedReader(fileReader);
try {
for(String line;(line = bufferReader.readLine())!=null;) {
str.append(line).append("\n");
}
result = str.toString();
}catch(Exception e) {
e.printstacktrace();
}
}catch(Exception e) {
e.printstacktrace();
}
//System.out.println("读出来的内容是:"+ result);
return result;
}

// 判定文件权限
public boolean writeTxtFile(String content,File fileName) {
//RandomAccessFile mm = null;
boolean flag = false;
FileOutputStream o = null;
try{
o = new FileOutputStream(fileName);// 文件输出
o.write(content.getBytes("GBK"));
o.close();
flag = true;
}catch(Exception e) {
e.printstacktrace();
}
return flag;

}

// 写入文件
public static void write(String filePath,String content) throws IOException {
File file2 = new File(filePath);//取得文件路径
BufferedWriter out = null;
try {
out = new BufferedWriter(new OutputStreamWriter(
new FileOutputStream(file2,true)));
out.write(content+"\r\n");
} catch (Exception e) {
e.printstacktrace();
} finally {
try {
out.close();
} catch (IOException e) {
e.printstacktrace();
}
}
}

// 清空文本信息
public static void clear(String path) throws IOException {
File f = new File(path);
FileWriter fw = new FileWriter(f);
fw.write("");
fw.close();
}

public static void contentToTxt(String filePath1,String filePath2) {
StringBuffer str = new StringBuffer(); // 原有TXT文件
String s1 = new String();// 新加入的内容
try {
File file1 = new File(filePath1);//取得文件路径
if(file1.exists()) {
System.out.println("源文件存在");
} else {
System.out.println("源文件不存在");
file1.createNewFile();// 不存在创建文件
}
File file2 = new File(filePath2);//取得文件路径
if(file2.exists()) {
System.out.println("目标文件存在");
} else {
System.out.println("目标文件不存在");
file1.createNewFile();// 不存在创建文件
}

BufferedReader input = new BufferedReader(new FileReader(file1));
for(String line;(line = input.readLine())!=null;) {
str.append(line).append("\n");
}
s1 = str.toString();
// String emailStr = regExp.email(s1);
// String teleStr = regExp.tele(s1);
// String netAddStr = regExp.netAddress(s1);
// String idStr = regExp.idCard(s1);

// System.out.println(s1);
input.close();

BufferedWriter output = new BufferedWriter(new FileWriter(file2));
// output.write(emailStr);
// output.write(teleStr+"\n");
// output.write(netAddStr+"\n");
// output.write(idStr+"\n");
output.close();

}catch(Exception e) {
e.printstacktrace();
}
}
}


正则表达式处理:regExp.java

package dyx_13;

import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class regExp {

public static String email(String filePath,String content) throws IOException {
String regex = "\\w+@\\w+(\\.\\w+)+";
String email = null;
Matcher m=Pattern.compile(regex).matcher(content);
while(m.find()){
email = m.group();
fileOperation.write(filePath,email);
System.out.println(email);
}
return email;
}

// 电话号码
public static String tele(String filePath,String content) throws IOException {
String telephone = null;
String regex = "0?(13[0-9]|15[012356789]|18[0236789]|14[57])[0-9]{8}";
Matcher m=Pattern.compile(regex).matcher(content);
while(m.find()){
telephone = m.group();
fileOperation.write(filePath,telephone);
System.out.println(telephone);
}
return telephone;
}

// 网址
public static String netAddress(String filePath,String content) throws IOException {
String netAdd = null;
String regex = "(https?|ftp|file)://[-a-zA-Z0-9+&@#/%?=~_|!:,.;]*[-a-zA-Z0-9+&@#/%=~_|]" ;
Matcher m=Pattern.compile(regex).matcher(content);
while(m.find()){
netAdd = m.group();
fileOperation.write(filePath,netAdd);
System.out.println(netAdd);
}
return netAdd;
}

// 身份证
public static String idCard(String filePath,String content) throws IOException {
String id = null;
String regex = "(\\d{14}\\w)|\\d{17}\\w" ;
Matcher m=Pattern.compile(regex).matcher(content);
while(m.find()){
id = m.group();
fileOperation.write(filePath,id);
System.out.println(id);
}
return id;
}
}

运行主程序zhu.java

package dyx_13; import java.io.File; import java.io.IOException; public class zhu { /** * @param args * @throws IOException */ public static void main(String[] args) throws IOException { // Todo Auto-generated method stub //o.contentToTxt(s1,s2);fileOperation o = new fileOperation(); String s1 = "D:\\java workspace\\dyx\\JAVA class\\dyx_2\\content\\file.txt"; String s2 = "D:\\java workspace\\dyx\\JAVA class\\dyx_2\\content\\outfile.txt"; fileOperation.clear(s2); File f = new File(s1); String s = fileOperation.readTxtFile(f); System.out.println(s); regExp.email(s2,s); regExp.tele(s2,s); regExp.netAddress(s2,s); regExp.idCard(s2,s); } }

相关文章

正则替换html代码中img标签的src值在开发富文本信息在移动端...
正则表达式
AWK是一种处理文本文件的语言,是一个强大的文件分析工具。它...
正则表达式是特殊的字符序列,利用事先定义好的特定字符以及...
Python界一名小学生,热心分享编程学习。
收集整理每周优质开发者内容,包括、、等方面。每周五定期发...