本文实例讲述了Java实现的文本字符串操作工具类。分享给大家供大家参考,具体如下:
package com.gcloud.common; import org.apache.commons.lang.StringUtils; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.text.BreakIterator; import java.util.ArrayList; import java.util.Date; import java.util.Random; /** * 文本字符串工具类 * Created by charlin on 2017/9/10. */ public class TextUtil { private static final char[] QUOTE_ENCODE = "".tochararray(); private static final char[] AMP_ENCODE = "&".tochararray(); private static final char[] LT_ENCODE = "<".tochararray(); private static final char[] GT_ENCODE = ">".tochararray(); private final static String[] hex = { "00","01","02","03","04","05","06","07","08","09","0A","0B","0C","0D","0E","0F","10","11","12","13","14","15","16","17","18","19","1A","1B","1C","1D","1E","1F","20","21","22","23","24","25","26","27","28","29","2A","2B","2C","2D","2E","2F","30","31","32","33","34","35","36","37","38","39","3A","3B","3C","3D","3E","3F","40","41","42","43","44","45","46","47","48","49","4A","4B","4C","4D","4E","4F","50","51","52","53","54","55","56","57","58","59","5A","5B","5C","5D","5E","5F","60","61","62","63","64","65","66","67","68","69","6A","6B","6C","6D","6E","6F","70","71","72","73","74","75","76","77","78","79","7A","7B","7C","7D","7E","7F","80","81","82","83","84","85","86","87","88","89","8A","8B","8C","8D","8E","8F","90","91","92","93","94","95","96","97","98","99","9A","9B","9C","9D","9E","9F","A0","A1","A2","A3","A4","A5","A6","A7","A8","A9","AA","AB","AC","AD","AE","AF","B0","B1","B2","B3","B4","B5","B6","B7","B8","B9","BA","BB","BC","BD","BE","BF","C0","C1","C2","C3","C4","C5","C6","C7","C8","C9","CA","CB","CC","CD","CE","CF","D0","D1","D2","D3","D4","D5","D6","D7","D8","D9","DA","DB","DC","DD","DE","DF","E0","E1","E2","E3","E4","E5","E6","E7","E8","E9","EA","EB","EC","ED","EE","EF","F0","F1","F2","F3","F4","F5","F6","F7","F8","F9","FA","FB","FC","FD","FE","FF" }; private final static byte[] val = { 0x3F,0x3F,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,0x3F }; private static MessageDigest digest = null; /** * 替换字符串 * @param line * @param oldStr * @param newStr * @return */ public static String replace(String line,String oldStr,String newStr){ if (StringUtils.isEmpty(line)) return null; int index = 0; if ((index = line.indexOf(oldStr,index)) >= 0){ char[] chararray = line.tochararray(); char[] newStrArray = newStr.tochararray(); int oldLen = oldStr.length(); StringBuffer buf = new StringBuffer(chararray.length); buf.append(chararray,index).append(newStrArray); index += oldLen; int i = index; while((index = line.indexOf(oldStr,index)) > 0){ buf.append(chararray,i,index - i).append(newStrArray); index += oldLen; i = index; } buf.append(chararray,chararray.length - i); return buf.toString(); } return line; } /** * 替换字符串,并存入替换次数 * @param line * @param oldStr * @param newStr * @return */ public static String replace(String line,String newStr,int[] countArr){ if (StringUtils.isEmpty(line)) return null; int index = 0; if ((index = line.indexOf(oldStr,index)) >= 0){ int count = 0; count++; char[] chararray = line.tochararray(); char[] newStrArray = newStr.tochararray(); int oldLen = oldStr.length(); StringBuffer buf = new StringBuffer(chararray.length); buf.append(chararray,index - i).append(newStrArray); index += oldLen; i = index; count++; } buf.append(chararray,chararray.length - i); countArr[0] = count; return buf.toString(); } return line; } /** * 替换字符串,不区分大小写 * @param line * @param oldStr * @param newStr * @return */ public static String replaceIgnoreCase(String line,String newStr){ if (StringUtils.isEmpty(line)) return null; int index = 0; line = line.toLowerCase(); oldStr = oldStr.toLowerCase(); if ((index = line.indexOf(oldStr,chararray.length - i); return buf.toString(); } return line; } /** * 替换字符串,不区分大小写,并存入替换次数 * @param line * @param oldStr * @param newStr * @return */ public static String replaceIgnoreCase(String line,int[] countArr){ if (StringUtils.isEmpty(line)) return null; int index = 0; line = line.toLowerCase(); oldStr = oldStr.toLowerCase(); if ((index = line.indexOf(oldStr,index).append(newStrArray); index += oldLen; int i = index; int count = 0; while((index = line.indexOf(oldStr,index)) > 0){ count ++; buf.append(chararray,chararray.length - i); countArr[0] = count; return buf.toString(); } return line; } /** * 替换页面html标签 * @param htmlStr * @return */ public static String escapeHTMLTags(String htmlStr) { if (StringUtils.isEmpty(htmlStr)) return null; char ch; int last = 0; char[] htmlStrArr = htmlStr.tochararray(); int len = htmlStrArr.length; StringBuffer outBuf = new StringBuffer((int) (len * 1.3)); int i = 0; for (; i < len; i++) { ch = htmlStrArr[i]; if (ch > '>') { continue; } else if (ch == '<') { if (i > last) { outBuf.append(htmlStrArr,last,i - last); } last = i + 1; outBuf.append(LT_ENCODE); } else if (ch == '>') { if (i > last) { outBuf.append(htmlStrArr,i - last); } last = i + 1; outBuf.append(GT_ENCODE); } } if (last == 0) { return htmlStr; } if (i > last) { outBuf.append(htmlStrArr,i - last); } return outBuf.toString(); } /** * Hashes a String using the Md5 algorithm and returns the result as a * String of hexadecimal numbers. This method is synchronized to avoid * excessive MessageDigest object creation. If calling this method becomes a * bottleneck in your code,you may wish to maintain a pool of MessageDigest * objects instead of using this method. * @param data * @return */ public synchronized static String hash(String data) { if (digest == null) { try { digest = MessageDigest.getInstance("MD5"); } catch (NoSuchAlgorithmException e) { System.err.println("Failed to load the MD5 MessageDigest.Jive will be unable to function normally."); e.printstacktrace(); } } digest.update(data.getBytes()); return encodeHex(digest.digest()); } /** * Turns an array of bytes into a String representing each byte as an * unsigned hex number. * @param bytes * @return */ public static final String encodeHex(byte[] bytes) { StringBuffer buf = new StringBuffer(bytes.length * 2); int i; for (i = 0; i < bytes.length; i++) { if (((int) bytes[i] & 0xff) < 0x10) { buf.append("0"); } buf.append(Long.toString((int) bytes[i] & 0xff,16)); } return buf.toString(); } /** * Turns a hex encoded string into a byte array. It is specifically meant to * "reverse" the toHex(byte[]) method. * @param hex * @return */ public static byte[] decodeHex(String hex) { char[] chars = hex.tochararray(); byte[] bytes = new byte[chars.length / 2]; int byteCount = 0; for (int i = 0; i < chars.length; i += 2) { byte newByte = 0x00; newByte |= hexCharToByte(chars[i]); newByte <<= 4; newByte |= hexCharToByte(chars[i + 1]); bytes[byteCount] = newByte; byteCount++; } return bytes; } /** * Returns the the byte value of a hexadecmical char (0-f). It's assumed * that the hexidecimal chars are lower case as appropriate. * @param ch a hexedicmal character (0-f) * @return the byte value of the character (0x00-0x0F) */ private static final byte hexCharToByte(char ch) { switch (ch) { case '0': return 0x00; case '1': return 0x01; case '2': return 0x02; case '3': return 0x03; case '4': return 0x04; case '5': return 0x05; case '6': return 0x06; case '7': return 0x07; case '8': return 0x08; case '9': return 0x09; case 'a': return 0x0A; case 'b': return 0x0B; case 'c': return 0x0C; case 'd': return 0x0D; case 'e': return 0x0E; case 'f': return 0x0F; } return 0x00; } // ********************************************************************* // * Base64 - a simple base64 encoder and decoder. // ********************************************************************* /** * Encodes a String as a base64 String. * @param data a String to encode. * @return a base64 encoded String. */ public static String encodeBase64(String data) { return encodeBase64(data.getBytes()); } /** * Encodes a byte array into a base64 String. * @param data a byte array to encode. * @return a base64 encode String. */ public static String encodeBase64(byte[] data) { int c; int len = data.length; StringBuffer ret = new StringBuffer(((len / 3) + 1) * 4); for (int i = 0; i < len; ++i) { c = (data[i] >> 2) & 0x3f; ret.append(cvt.charat(c)); c = (data[i] << 4) & 0x3f; if (++i < len) c |= (data[i] >> 4) & 0x0f; ret.append(cvt.charat(c)); if (i < len) { c = (data[i] << 2) & 0x3f; if (++i < len) c |= (data[i] >> 6) & 0x03; ret.append(cvt.charat(c)); } else { ++i; ret.append((char) fillchar); } if (i < len) { c = data[i] & 0x3f; ret.append(cvt.charat(c)); } else { ret.append((char) fillchar); } } return ret.toString(); } /** * Decodes a base64 String. * @param data a base64 encoded String to decode. * @return the decoded String. */ public static String decodeBase64(String data) { return decodeBase64(data.getBytes()); } /** * Decodes a base64 aray of bytes. * @param data a base64 encode byte array to decode. * @return the decoded String. */ public static String decodeBase64(byte[] data) { int c,c1; int len = data.length; StringBuffer ret = new StringBuffer((len * 3) / 4); for (int i = 0; i < len; ++i) { c = cvt.indexOf(data[i]); ++i; c1 = cvt.indexOf(data[i]); c = ((c << 2) | ((c1 >> 4) & 0x3)); ret.append((char) c); if (++i < len) { c = data[i]; if (fillchar == c) break; c = cvt.indexOf((char) c); c1 = ((c1 << 4) & 0xf0) | ((c >> 2) & 0xf); ret.append((char) c1); } if (++i < len) { c1 = data[i]; if (fillchar == c1) break; c1 = cvt.indexOf((char) c1); c = ((c << 6) & 0xc0) | c1; ret.append((char) c); } } return ret.toString(); } private static final int fillchar = '='; private static final String cvt = "ABCDEFGHIJKLMnopQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "0123456789+/"; /** * Converts a line of text into an array of lower case words using a * BreakIterator.wordInstance(). * This method is under the Jive Open Source Software License and was * written by Mark Imbriaco. * @param text a String of text to convert into an array of words * @return text broken up into an array of words. */ public static final String[] toLowerCaseWordArray(String text) { if (text == null || text.length() == 0) { return new String[0]; } ArrayList wordList = new ArrayList(); BreakIterator boundary = BreakIterator.getWordInstance(); boundary.setText(text); int start = 0; for (int end = boundary.next(); end != BreakIterator.DONE; start = end,end = boundary.next()) { String tmp = text.substring(start,end).trim(); tmp = replace(tmp,"+",""); tmp = replace(tmp,"/","\\","#","*",")","(","&",""); if (tmp.length() > 0) { wordList.add(tmp); } } return (String[]) wordList.toArray(new String[wordList.size()]); } /** * Pseudo-random number generator object for use with randomString(). The * Random class is not considered to be cryptographically secure,so only * use these random Strings for low to medium security applications. */ private static Random randGen = new Random(); /** * Array of numbers and letters of mixed case. Numbers appear in the list * twice so that there is a more equal chance that a number will be picked. * We can use the array to get a random number or letter by picking a random * array index. */ private static char[] numbersAndLetters = ("0123456789abcdefghijklmnopqrstuvwxyz" + "0123456789ABCDEFGHIJKLMnopQRSTUVWXYZ").tochararray(); /** * Returns a random String of numbers and letters (lower and upper case) of * the specified length. The method uses the Random class that is built-in * to Java which is suitable for low to medium grade security uses. This * means that the output is only pseudo random,i.e.,each number is * mathematically generated so is not truly random. * @param length the desired length of the random String to return. * @return a random String of numbers and letters of the specified length. */ public static final String randomString(int length) { if (length < 1) { return null; } char[] randBuffer = new char[length]; for (int i = 0; i < randBuffer.length; i++) { randBuffer[i] = numbersAndLetters[randGen.nextInt(71)]; } return new String(randBuffer); } /** * Intelligently chops a String at a word boundary (whitespace) that occurs * at the specified index in the argument or before. However,if there is a * newline character before <code>length</code>,the String will be * chopped there. If no newline or whitespace is found in * <code>string</code> up to the index <code>length</code>,the String * will chopped at <code>length</code>. * <p> * For example,chopAtWord("This is a nice String",10) will return "This is * a" which is the first word boundary less than or equal to 10 characters * into the original String. * @param string the String to chop. * @param length * @return a substring of <code>string</code> whose length is less than or * equal to <code>length</code>,and that is chopped at * whitespace. */ public static final String chopAtWord(String string,int length) { if (string == null) { return string; } char[] chararray = string.tochararray(); int sLength = string.length(); if (length < sLength) { sLength = length; } // First check if there is a newline character before length; if so,// chop word there. for (int i = 0; i < sLength - 1; i++) { // Windows if (chararray[i] == '\r' && chararray[i + 1] == '\n') { return string.substring(0,i + 1); } // Unix else if (chararray[i] == '\n') { return string.substring(0,i); } } // Also check boundary case of Unix newline if (chararray[sLength - 1] == '\n') { return string.substring(0,sLength - 1); } // Done checking for newline,Now see if the total string is less than // the specified chop point. if (string.length() < length) { return string; } // No newline,so chop at the first whitespace. for (int i = length - 1; i > 0; i--) { if (chararray[i] == ' ') { return string.substring(0,i).trim(); } } // Did not find word boundary so return original String chopped at // specified length. return string.substring(0,length); } /** * Escapes all necessary characters in the String so that it can be used in * an XML doc. * @param string the string to escape. * @return the string with appropriate characters escaped. */ public static final String escapeForXML(String string) { if (string == null) { return null; } char ch; int i = 0; int last = 0; char[] input = string.tochararray(); int len = input.length; StringBuffer out = new StringBuffer((int) (len * 1.3)); for (; i < len; i++) { ch = input[i]; if (ch > '>') { continue; } else if (ch == '<') { if (i > last) { out.append(input,i - last); } last = i + 1; out.append(LT_ENCODE); } else if (ch == '&') { if (i > last) { out.append(input,i - last); } last = i + 1; out.append(AMP_ENCODE); } else if (ch == '"') { if (i > last) { out.append(input,i - last); } last = i + 1; out.append(QUOTE_ENCODE); } } if (last == 0) { return string; } if (i > last) { out.append(input,i - last); } return out.toString(); } /** * Unescapes the String by converting XML escape sequences back into normal * characters. * @param string the string to unescape. * @return the string with appropriate characters unescaped. */ public static final String unescapeFromXML(String string) { string = replace(string,"<","<"); string = replace(string,">",">"); string = replace(string,"","\""); return replace(string,"&","&"); } public static String escape(String s) { StringBuffer sbuf = new StringBuffer(); int len = s.length(); for (int i = 0; i < len; i++) { int ch = s.charat(i); if (ch == ' ') { // space : map to '+' sbuf.append('+'); } else if ('A' <= ch && ch <= 'Z') { // 'A'..'Z' : as it was sbuf.append((char) ch); } else if ('a' <= ch && ch <= 'z') { // 'a'..'z' : as it was sbuf.append((char) ch); } else if ('0' <= ch && ch <= '9') { // '0'..'9' : as it was sbuf.append((char) ch); } else if (ch == '-' || ch == '_' // unreserved : as it was || ch == '.' || ch == '!' || ch == '~' || ch == '*' || ch == '\'' || ch == '(' || ch == ')') { sbuf.append((char) ch); } else if (ch <= 0x007F) { // other ASCII : map to %XX sbuf.append('%'); sbuf.append(hex[ch]); } else { // unicode : map to %uXXXX sbuf.append('%'); sbuf.append('u'); sbuf.append(hex[(ch >>> 8)]); sbuf.append(hex[(0x00FF & ch)]); } } return sbuf.toString(); } public static String unescape(String s) { StringBuffer sbuf = new StringBuffer(); int i = 0; int len = s.length(); while (i < len) { int ch = s.charat(i); if (ch == '+') { // + : map to ' ' sbuf.append(' '); } else if ('A' <= ch && ch <= 'Z') { // 'A'..'Z' : as it was sbuf.append((char) ch); } else if ('a' <= ch && ch <= 'z') { // 'a'..'z' : as it was sbuf.append((char) ch); } else if ('0' <= ch && ch <= '9') { // '0'..'9' : as it was sbuf.append((char) ch); } else if (ch == '-' || ch == '_' // unreserved : as it was || ch == '.' || ch == '!' || ch == '~' || ch == '*' || ch == '\'' || ch == '(' || ch == ')') { sbuf.append((char) ch); } else if (ch == '%') { int cint = 0; if ('u' != s.charat(i + 1)) { // %XX : map to ascii(XX) cint = (cint << 4) | val[s.charat(i + 1)]; cint = (cint << 4) | val[s.charat(i + 2)]; i += 2; } else { // %uXXXX : map to unicode(XXXX) cint = (cint << 4) | val[s.charat(i + 2)]; cint = (cint << 4) | val[s.charat(i + 3)]; cint = (cint << 4) | val[s.charat(i + 4)]; cint = (cint << 4) | val[s.charat(i + 5)]; i += 5; } sbuf.append((char) cint); } i++; } return sbuf.toString(); } private static final char[] zeroArray = "0000000000000000".tochararray(); /** * Pads the supplied String with 0's to the specified length and returns the * result as a new String. For example,if the initial String is "9999" and * the desired length is 8,the result would be "00009999". This type of * padding is useful for creating numerical values that need to be stored * and sorted as character data. Note: the current implementation of this * method allows for a maximum <tt>length</tt> of 16. * * @param string * the original String to pad. * @param length * the desired length of the new padded String. * @return a new String padded with the required number of 0's. */ public static final String zeroPadString(String string,int length) { if (string == null || string.length() > length) { return string; } StringBuffer buf = new StringBuffer(length); buf.append(zeroArray,length - string.length()).append(string); return buf.toString(); } /** * Formats a Date as a fifteen character long String made up of the Date's * padded millisecond value. * @return a Date encoded as a String. */ public static final String datetoMillis(Date date) { return zeroPadString(Long.toString(date.getTime()),15); } public static void main(String[] args) { System.out.println(replace("aaaaabbbcccc","aa","gg")); System.out.println(replaceIgnoreCase("AAAAbbbcccc","gg")); System.out.println(escapeHTMLTags("AAAAbb<bcccc>")); } }
更多关于java算法相关内容感兴趣的读者可查看本站专题:《Java字符与字符串操作技巧总结》、《Java数据结构与算法教程》、《Java操作DOM节点技巧总结》、《Java文件与目录操作技巧汇总》和《Java缓存操作技巧汇总》
希望本文所述对大家java程序设计有所帮助。