??? 很多时候都需要过滤字符串中的javascript等脚本程序,比如防止跨站攻击,采集信息等。网上找了很多,大多不能用,或者存在很多的漏洞和问题,并且以讹传讹的垃圾站也特别多,搜出来的就是那几个,管用的好用的寥寥无几。好歹找到几个能改造下的,看了看,都是使用正则表达式来匹配替换的。正则表达式我也看过很多遍了,但是总是记不住,没办法又查了查资料,边学习边改造。然后将改造的结果公布出来,方便大家批一批!
??? 先来看看效果,是不是你想要的,也证明不是吹的。
先来熟悉几个正则表达式的语法:
\s? 空白字符,包括换行符\n、回车符\r、制表符\t、垂直制表符\v、换页符\f
\S?? \s的补集
\w? 单词字符,指大小写字母、0-9的数字、下划线
\W? \w的补集
更多正则信息,可以参考:http://www.cnblogs.com/KissKnife/archive/2008/03/23/1118423.html
看看如何过滤javascript引用或区块:
有时候javascript会写到Dom元素的鼠标事件中或者链接中,这时候过滤起来比较麻烦,在测试的过程中我写了三个方法:
方法一:整体去除,不能去除不被单引号或双引号包含的属性值
这个方法匹配以on开头的属性,比如,过滤的时候会整体滤除这些字符。
方法二:去除属性值
这个方法首先匹配标签,获取元素的全部属性,然后在分析元素的属性,过滤掉以on开头的属性的的值。
比如:<div id=”id1″ >这里是内容</div>
过滤的时候会滤除onclick的值,即alert(‘123’)。
方法三:整体滤除,效果较好
这个方法是方法一的变形,定义一个“组”:ScriptBlock,然后获取到匹配的字符串,然后逐个替换。
通过上边两种方法,基本上可以过滤掉全部的javascript了,还有没有漏网之鱼呢?
诶~,还有一个href中的javascript:
这样就完整了吧。如果还有漏网的,欢迎给我提出来啊。
最后还有几个滤除的方法,和上边的一并贴出来,比如过滤frame、object、html,以及自定义的字符等。
using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Text.RegularExpressions; namespace VeryCodes.Text { ??? public class StringFilter ??? { ??????? /// <summary> ??????? /// 滤除script引用和区块 ??????? /// </summary> ??????? /// <param name="str"></param> ??????? /// <returns></returns> ??????? public static string FilterScript(string str) ??????? { ??????????? string pattern = @"<script[\s\S]+</script *>"; ??????????? return StripScriptAttributesFromTags(Regex.Replace(str, pattern, string.Empty, RegexOptions.IgnoreCase)); ??????? } ??????? /// <summary> ??????? /// 去除标签中的script属性 ??????? /// </summary> ??????? /// <param name="str"></param> ??????? /// <returns></returns> ??????? private static string StripScriptAttributesFromTags(string str) ??????? { ??????????? //\s 空白字符,包括换行符\n、回车符\r、制表符\t、垂直制表符\v、换页符\f ??????????? //\S \s的补集 ??????????? //\w 单词字符,指大小写字母、0-9的数字、下划线 ??????????? //\W \w的补集 ??????????? //方法一:整体去除,不能去除不被单引号或双引号包含的属性值 ??????????? //string pattern = @"on\w+=\s*(['""\s]?)([/s/S]*[^\1]*?)\1[\s]*"; ??????????? //content = Regex.Replace(str, pattern, string.Empty, RegexOptions.Compiled | RegexOptions.IgnoreCase); ??????????? ////方法二:去除属性值 ??????????? //string pattern = @"<\w+\s+(?<Attrs>[^>]*?)[>|/>]"; ??????????? //Regex r = new Regex(pattern, RegexOptions.Compiled | RegexOptions.IgnoreCase); ??????????? //foreach (Match m in r.Matches(content)) ??????????? //{ ??????????? //??? //获取标签的全部属性 ??????????? //??? string attrs = m.Groups["Attrs"].Value; ??????????? //??? if (!string.IsNullOrEmpty(attrs)) ??????????? //??? { ??????????? //??????? //获取每一个属性 ??????????? //??????? Regex rt = new Regex(@"(?<AttrName>\w+)\s*=(?<AttrPre>[\s]*(['""\s]?))(?<AttrVal>[^\1]*?)\1", RegexOptions.Compiled | RegexOptions.IgnoreCase); ??????????? //??????? foreach (Match mt in rt.Matches(attrs)) ??????????? //??????? { ??????????? //??????????? string attrName = mt.Groups["AttrName"].Value.Trim().ToLower(); ??????????? //??????????? string attrVal = mt.Groups["AttrVal"].Value.Trim().ToLower(); ??????????? //??????????? //匹配以on开头的属性 ??????????? //??????????? if (attrName.StartsWith("on") && !string.IsNullOrEmpty(attrVal)) ??????????? //??????????? { ??????????? //??????????????? //将属性值替换为空 ??????????? //??????????????? str = str.Replace(attrVal, string.Empty); ??????????? //??????????? } ??????????? //??????? } ??????????? //??? } ??????????? //} ??????????? //整体去除 ??????????? string pattern = @"(?<ScriptAttr>on\w+=\s*(['""\s]?)([/s/S]*[^\1]*?)\1)[\s|>|/>]"; ??????????? Regex r = new Regex(pattern, RegexOptions.Compiled | RegexOptions.IgnoreCase); ??????????? foreach (Match m in r.Matches(str)) ??????????? { ??????????????? string attrs = m.Groups["ScriptAttr"].Value; ??????????????? if (!string.IsNullOrEmpty(attrs)) ??????????????? { ??????????????????? str = str.Replace(attrs, string.Empty); ??????????????? } ??????????? } ??????????? //滤除包含script的href ??????????? str = FilterHrefScript(str); ??????????? return str; ??????? } ??????? /// <summary> ??????? /// 滤除包含script的href ??????? /// </summary> ??????? /// <param name="str"></param> ??????? /// <returns></returns> ??????? public static string FilterHrefScript(string str) ??????? { ??????????? //整体去除,不能去除不被单引号或双引号包含的属性值 ??????????? string regexstr = @" href[ ^=]*=\s*(['""\s]?)[\w]*script+?:([/s/S]*[^\1]*?)\1[\s]*"; ??????????? return Regex.Replace(str, regexstr, " ", RegexOptions.IgnoreCase); ??????? } ??????? /// <summary> ??????? /// 滤除src ??????? /// </summary> ??????? /// <param name="str"></param> ??????? /// <returns></returns> ??????? public static string FilterSrc(string str) ??????? { ??????????? //整体去除 ??????????? string regexstr = @" src *=\s*(['""\s]?)[^\.]+\.(\w+)\1[\s]*"; ??????????? return Regex.Replace(str, regexstr, " ", RegexOptions.IgnoreCase); ??????? } ??????? /// <summary> ??????? /// 滤除Html ??????? /// </summary> ??????? /// <param name="content"></param> ??????? /// <returns></returns> ??????? public static string FilterHtml(string str) ??????? { ??????????? string[] aryReg ={ ????????????? @"<style[\s\S]+</style>", ????????????? @"<.*?>", ????????????? @"<(.[^>]*)>", ????????????? @"([\r\n])[\s]+", ????????????? @"&(quot|#34);", ????????????? @"&(amp|#38);", ????????????? @"&(lt|#60);", ????????????? @"&(gt|#62);", ????????????? @"&(nbsp|#160);", ????????????? @"&(iexcl|#161);", ????????????? @"&(cent|#162);", ????????????? @"&(pound|#163);", ????????????? @"&(copy|#169);", ????????????? @"&#(\d+);", ????????????? @"-->", ????????????? @"<!--.*\n" ??????????? }; ??????????? string[] aryRep = { ?????????? "", ?????????? "", ?????????? "", ?????????? "", ?????????? "\"", ?????????? "&", ?????????? "<", ?????????? ">", ?????????? " ", ?????????? "\xa1",//chr(161), ?????????? "\xa2",//chr(162), ?????????? "\xa3",//chr(163), ?????????? "\xa9",//chr(169), ?????????? "", ?????????? "\r\n", ?????????? "" ????????? }; ??????????? string strOutput = str; ??????????? for (int i = 0; i < aryReg.Length; i++) ??????????? { ??????????????? Regex regex = new Regex(aryReg[i], RegexOptions.IgnoreCase); ??????????????? strOutput = regex.Replace(strOutput, aryRep[i]); ??????????? } ??????????? strOutput = strOutput.Replace("<", ""); ??????????? strOutput = strOutput.Replace(">", ""); ??????????? strOutput = strOutput.Replace("\r\n", ""); ??????????? return strOutput; ??????? } ??????? /// <summary> ??????? /// 过滤object ??????? /// </summary> ??????? /// <param name="content"></param> ??????? /// <returns></returns> ??????? public static string FilterObject(string content) ??????? { ??????????? string regexstr = @"<object[\s\S]+</object *>"; ??????????? return Regex.Replace(content, regexstr, string.Empty, RegexOptions.IgnoreCase); ??????? } ??????? /// <summary> ??????? /// 过滤Iframe ??????? /// </summary> ??????? /// <param name="content"></param> ??????? /// <returns></returns> ??????? public static string FilterIframe(string content) ??????? { ??????????? string regexstr = @"<iframe[\s\S]+</iframe *>"; ??????????? return Regex.Replace(content, regexstr, string.Empty, RegexOptions.IgnoreCase); ??????? } ??????? /// <summary> ??????? /// 过滤Frameset ??????? /// </summary> ??????? /// <param name="content"></param> ??????? /// <returns></returns> ??????? public static string FilterFrameset(string content) ??????? { ??????????? string regexstr = @"<frameset[\s\S]+</frameset *>"; ??????????? return Regex.Replace(content, regexstr, string.Empty, RegexOptions.IgnoreCase); ??????? } ??????? /// <summary> ??????? /// 过滤SQL注入 ??????? /// </summary> ??????? /// <returns></returns> ??????? public static string FilterSql(string str) ??????? { ??????????? str = str.Replace("'", "''"); ??????????? str = str.Replace("<", "<"); ??????????? str = str.Replace(">", ">"); ??????????? return str; ??????? } ??????? /// <summary> ??????? /// 移除非法或不友好字符 ??????? /// </summary> ??????? /// <param name="keyWord">非法或不友好字符</param> ??????? /// <param name="chkStr">要处理的字符串</param> ??????? /// <returns>处理后的字符串</returns> ??????? public static string FilterBadWords(string keyWord, string chkStr) ??????? { ??????????? if (chkStr == "") ??????????? { ??????????????? return ""; ??????????? } ??????????? string[] bwords = keyWord.Split('|'); ??????????? int i, j; ??????????? string str; ??????????? StringBuilder sb = new StringBuilder(); ??????????? for (i = 0; i < bwords.Length; i++) ??????????? { ??????????????? str = bwords[i].ToString().Trim(); ??????????????? string regStr, toStr; ??????????????? regStr = str; ??????????????? Regex r = new Regex(regStr, RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.Multiline); ??????????????? Match m = r.Match(chkStr); ??????????????? if (m.Success) ??????????????? { ??????????????????? j = m.Value.Length; ??????????????????? sb.Insert(0, "*", j); ??????????????????? toStr = sb.ToString(); ??????????????????? chkStr = Regex.Replace(chkStr, regStr, toStr, RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.Multiline); ??????????????? } ??????????????? sb.Remove(0, sb.Length); ??????????? } ??????????? return chkStr; ??????? } ??? } }
发表评论
相关文章
国内AI资源汇总,AI聊天、AI绘画、AI写作、AI视频、AI设计、AI编程、AI音乐等,国内顺畅访问,无需科学上网。
扫码或点击进入:萤火AI大全
文章分类
最新评论