敏感词过滤器
2017-11-01 本文已影响103人
小小蒜头
使用Decorator模式包装request对象实现敏感字符过滤功能。
敏感词包括了:
禁用词:反对共产党、色情。。。。
审核词:中共(我家“中共”有三头猪)
替换词:和谐社会 河蟹社会(****)
将所有的敏感词汇的文本放在config目录下作为源目录。(例如:在IDEA里面将鼠标放在config目录上右击选择Mark Directory as
,然后再选择Recources Root
)
WordsFilter
里面对敏感词进行过滤
package cn.itcast.filter;
import javax.servlet.*;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletRequestWrapper;
import javax.servlet.http.HttpServletResponse;
import java.io.*;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Created by yvettee on 2017/11/1.
*/
public class WordsFilter implements Filter {
private List<String> banWords = new ArrayList();//保存禁用词汇
private List<String> auditWords = new ArrayList();//保存审核词汇
private List<String> replaceWords = new ArrayList();//保存替换词汇
@Override
public void init(FilterConfig filterConfig) throws ServletException {
String path = WordsFilter.class.getClassLoader().getResource("cn/itcast/words").getPath();
File files[] = new File(path).listFiles();
for (File file : files) {
if (!file.getName().endsWith(".txt")) {
continue;
}
try {
//文本是一行的,所以用BufferedReader
InputStreamReader isr = new InputStreamReader(new FileInputStream(file), "UTF-8");
BufferedReader br = new BufferedReader(isr);
String line = null;
while ((line = br.readLine()) != null) {
/*
7大军区|3
双桨飞机|3
*/
String s[] = line.split("\\|");
if (s.length != 2) {
continue;
}
if (s[1].trim().equals("1")) {
banWords.add(s[0].trim());
}
if (s[1].trim().equals("2")) {
auditWords.add(s[0].trim());
}
if (s[1].trim().equals("3")) {
replaceWords.add(s[0].trim());
}
}
} catch (Exception e) {
throw new RuntimeException(e);
}
}
}
@Override
public void doFilter(ServletRequest servletRequest, ServletResponse servletResponse, FilterChain filterChain) throws IOException, ServletException {
HttpServletRequest request = (HttpServletRequest) servletRequest;
HttpServletResponse response = (HttpServletResponse) servletResponse;
//检查提交数据是否包含禁用词
Enumeration e = request.getParameterNames();//得到客户机提交过来的所有数据
while (e.hasMoreElements()) {
String name = (String) e.nextElement();
String value = request.getParameter(name);
//将每一个敏感词看做是一个正则表达式
for (String regex : banWords) {
Pattern pattern = Pattern.compile(regex);//编译表达式
//匹配器匹配
Matcher matcher = pattern.matcher(value);
if (matcher.find()) {
request.setAttribute("message", "文章中包含非法词汇,请检查后提交");
request.getRequestDispatcher("/message.jsp").forward(request, response);
return;
}
}
}
//检查提交数据是否包含审核词,有就高亮显示
//检查替换词
filterChain.doFilter(new MyRequest(request), response);
}
class MyRequest extends HttpServletRequestWrapper {
private HttpServletRequest request;
public MyRequest(HttpServletRequest request) {
super(request);
this.request = request;
}
@Override
public String getParameter(String name) {
String data = this.request.getParameter(name);
if (data == null) {
return null;
}
for (String regex : auditWords) {//auditWords是审核词
Pattern p = Pattern.compile(regex);//将每一个审核词作为正则表达式
Matcher m = p.matcher(data);//data是获取客户机传递过来的数据
if (m.find()) { //我有一把仿真手枪,你要电鸡吗??
String value = m.group(); //找出客户机提交的数据中和正则表达式相匹配的数据
data = data.replaceAll(regex, "<font color='red'>" + value + "</font>");
}
}
for (String regex : replaceWords) {
Pattern p = Pattern.compile(regex);
Matcher m = p.matcher(data);
if (m.find()) { //我有一把仿真手枪,你要电鸡吗??
data = data.replaceAll(regex, "*******");
}
}
return data;
}
}
@Override
public void destroy() {
}
}
web.xml
<filter>
<filter-name>WordsFilter</filter-name>
<filter-class>cn.itcast.filter.WordsFilter</filter-class>
</filter>
<filter-mapping>
<filter-name>WordsFilter</filter-name>
<url-pattern>/*</url-pattern>
</filter-mapping>
form.jsp
页面里测试
<form action="${pageContext.request.contextPath}/checkServlet" method="post">
<textarea rows="5" cols="50" name="resume"></textarea><br/>
<input type="submit" value="提交">
</form>
源代码:https://github.com/yvettee36/FilterWords
上篇:转义Filter
下篇:压缩过滤器