在请求某接口,接收其返回结果,这个结果放在json里,你取你想要的内容,如果是html内容的话很多东西传过来其实已经给转义了。这时候就需要过滤一下了。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46
| package com.vshow.control.tool;
import java.util.regex.Matcher; import java.util.regex.Pattern;
public class HtmlUtil { private static final String regEx_script = "<script[^>]*?>[\\s\\S]*?<\\/script>"; private static final String regEx_style = "<style[^>]*?>[\\s\\S]*?<\\/style>"; private static final String regEx_html = "<[^>]+>"; private static final String regEx_space = "\\s*|\t|\r|\n";
public static String delHTMLTag(String htmlStr) { htmlStr=htmlStr.replaceAll("<br>", "\n"); htmlStr=htmlStr.replaceAll("<BR>", "\n"); Pattern p_script = Pattern.compile(regEx_script, Pattern.CASE_INSENSITIVE); Matcher m_script = p_script.matcher(htmlStr); htmlStr = m_script.replaceAll(""); Pattern p_style = Pattern.compile(regEx_style, Pattern.CASE_INSENSITIVE); Matcher m_style = p_style.matcher(htmlStr); htmlStr = m_style.replaceAll(""); Pattern p_html = Pattern.compile(regEx_html, Pattern.CASE_INSENSITIVE); Matcher m_html = p_html.matcher(htmlStr); htmlStr = m_html.replaceAll(""); return htmlStr; } public static String getTextFromHtml(String htmlStr){ htmlStr = delHTMLTag(htmlStr); htmlStr = htmlStr.replaceAll(" ", ""); htmlStr = htmlStr.substring(0, htmlStr.indexOf("。")+1); return htmlStr; } }
|