.NET 6 实现敏感词过滤

  • .NET 6 实现敏感词过滤已关闭评论
  • 104 次浏览
  • A+
所属分类:.NET技术
摘要

敏感词过滤是一种处理网络内容的技术,可以检测和过滤出网络中的敏感/违禁词汇。它通过给定的关键字或字符串,判断网络内容是否包含某些敏感信息,从而防止违反法律法规的信息流通。
通常,可以使用两种方法来过滤敏感词:


一、什么是敏感词过滤?

敏感词过滤是一种处理网络内容的技术,可以检测和过滤出网络中的敏感/违禁词汇。它通过给定的关键字或字符串,判断网络内容是否包含某些敏感信息,从而防止违反法律法规的信息流通。
通常,可以使用两种方法来过滤敏感词:

  1. 黑名单过滤:即定义一个黑名单,将所有敏感词择记录在其中,然后对输入的文本进行对比,如果发现有敏感词,就将其过滤掉。
  2. 白名单过滤:即定义一个白名单,将所有不敏感的词汇记录在其中,然后对输入的文本进行对比,如果发现有不在白名单中的词汇,就将其过滤掉。

二、ToolGood.Words是什么?

ToolGood.Words是一款高性能非法词(敏感词)检测组件,附带繁体简体互换,支持全角半角互换,获取拼音首字母,获取拼音字母,拼音模糊搜索等功能。
ToolGood.Words的源码网站:ToolGood.Words源码网站

三、在Visual Studio中安装ToolGood.Words

3.1、右键项目解决方案,选择“管理NuGet程序包”,如下图所示:

.NET 6 实现敏感词过滤

3.2、切换到“浏览”选项卡,搜索“ToolGood.Words”并安装:

.NET 6 实现敏感词过滤
安装完之后最好重新编译生成项目

四、创建“subContentCheck”类

敏感/违禁词汇因特殊内容不便上传,可自行在网站上查找

using Microsoft.AspNetCore.DataProtection.KeyManagement; using Microsoft.AspNetCore.Http; using Microsoft.CodeAnalysis.Text; using Newtonsoft.Json; using System.Collections; using System.Text; using ToolGood.Words; using static System.Net.Mime.MediaTypeNames; using IHostingEnvironment = Microsoft.AspNetCore.Hosting.IHostingEnvironment;  namespace WebApplication1 //放在自己项目中时,需要更换为自己的命名空间 {     public class keywords     {         public List<string> IllegalKeywords { get; set; }     }      public class urlwords     {         public List<string> IllegalUrls { get; set; }     }      /// <summary>     /// 提交的内容敏感违禁词检查类     /// </summary>     public class subContentCheck     {         /// <summary>         /// 本地静态文件地址路径         /// </summary>         private IHostingEnvironment _hostingEnv;          /// <summary>         /// 敏感词库         /// </summary>         private string dictionaryPath = "/sensitiveWords/sensitiveWords.txt";          /// <summary>         /// 敏感链接、网站、网址库         /// </summary>         private string urlsPath = "/sensitiveWords/IllegalUrls.txt";          /// <summary>         /// 保存敏感词组         /// </summary>         public string[] Words { get; set; }          /// <summary>         /// 一个参数的构造函数         /// </summary>         /// <param name="hostingEnv">本地静态文件地址路径</param>         public subContentCheck(IHostingEnvironment hostingEnv)         {             _hostingEnv = hostingEnv;             InitDictionary();         }          /// <summary>         /// 初始化内存敏感词库         /// </summary>         public void InitDictionary()         {             Words = new string[] { };             string wordsPath = _hostingEnv.WebRootPath + dictionaryPath;             string urlPath = _hostingEnv.WebRootPath + urlsPath;             //List<keywords> keys = new List<keywords>();             //List<urlwords> urls = new List<urlwords>();             string[] readAllWords = System.IO.File.ReadAllLines(wordsPath, System.Text.Encoding.UTF8);             string[] readAllurl = System.IO.File.ReadAllLines(urlPath, System.Text.Encoding.UTF8);             //由于数组是非动态的,不能进行动态的添加,所有先将它转成list,操作             ArrayList arrayList = new ArrayList(Words.ToList());             if (readAllWords.Length > 0 || readAllurl.Length > 0)             {                 if (readAllWords.Length > 1)                 {                     //keywords key = new keywords();                     //key.IllegalKeywords = new List<string>();                     foreach (string itemWords in readAllWords)                     {                         string[] allSplitWords = itemWords.Split('|');                         foreach (string itemSplitWords in allSplitWords)                         {                             if (!string.IsNullOrEmpty(itemSplitWords))                             {                                 arrayList.Add(itemSplitWords);                                 //string aaa = itemSplitWords;                                 //key.IllegalKeywords.Add(aaa);                                 //IllegalKeywords.Add(itemSplitWords);                             }                         }                     }                     //keys.Add(key);                 }                 else                 {                     if (readAllWords.Length == 1)                     {                         string[] allSplitWords = readAllWords[0].Split('|');                         //keywords key = new keywords();                         //key.IllegalKeywords = new List<string>();                         foreach (string itemSplitWords in allSplitWords)                         {                             if (!string.IsNullOrEmpty(itemSplitWords))                             {                                 arrayList.Add(itemSplitWords);                                 //string aaa = itemSplitWords;                                 //key.IllegalKeywords.Add(aaa);                                 //IllegalKeywords.Add(itemSplitWords);                             }                         }                         //keys.Add(key);                     }                 }                  if (readAllurl.Length > 1)                 {                     //urlwords url = new urlwords();                     //url.IllegalUrls = new List<string>();                     foreach (string itemUrls in readAllurl)                     {                         string[] allSplitUrls = itemUrls.Split('|');                         foreach (string itemSplitUrls in allSplitUrls)                         {                             if (!string.IsNullOrEmpty(itemSplitUrls))                             {                                 arrayList.Add(itemSplitUrls);                                 //string Keyword = itemSplitUrls;                                 //url.IllegalUrls.Add(Keyword);                                 //IllegalUrls.Add(itemSplitUrls);                             }                         }                     }                     //urls.Add(url);                 }                 else                 {                     if (readAllurl.Length == 1)                     {                         string[] allSplitUrls = readAllurl[0].Split('|');                         //urlwords url = new urlwords();                         //url.IllegalUrls = new List<string>();                         foreach (string itemSplitUrls in allSplitUrls)                         {                             if (!string.IsNullOrEmpty(itemSplitUrls))                             {                                 arrayList.Add(itemSplitUrls);                                 //IllegalUrls.Add(itemSplitUrls);                                 //string Keyword = itemSplitUrls;                                 //url.IllegalUrls.Add(Keyword);                             }                         }                         //urls.Add(url);                     }                 }             }             //我们在将list转换成String[]数组              Words = (string[])arrayList.ToArray(typeof(string));         }          /// <summary>         /// 过滤替换敏感词         /// </summary>         /// <param name="sourceText">需要过滤替换的原内容</param>         /// <param name="replaceChar">敏感词替换的字符;默认替换为‘*’</param>         /// <returns>返回状态码;为空则表示传入的内容为空;“0”:设置违禁词时发生错误;“1”:敏感内容替换时发生错误;“2”:需要替换的文本内容为空;其余则返回替换成功的字符串内容</returns>         public string FilterWithChar(string sourceText, char replaceChar = '*')         {             if (!string.IsNullOrEmpty(sourceText))             {                 string result = "";                 WordsSearch wordsSearch = new WordsSearch();                 try                 {                     wordsSearch.SetKeywords(Words);                 }                 catch (Exception ex)                 {                     result = "0";                     return result;                 }                  try                 {                     result = wordsSearch.Replace(sourceText, replaceChar);                     return result;                 }                 catch (Exception ex)                 {                     return result = "1";                 }             }             else             {                 return "2";             }         }          /// <summary>         /// 查找原内容中知否包含敏感/违禁词         /// </summary>         /// <param name="sourceText">需要判断的原内容</param>         /// <returns>返回状态码;为空则表示传入的内容为空;“0”:设置违禁词时发生错误;“1”:敏感内容查询时发生错误;“2”:需要替换的文本内容为空;“3”:原内容中包含敏感/违禁词汇;“4”:原内容中不包含敏感/违禁词汇</returns>         public string FindSensitiveKey(string sourceText)         {             string result = "";             if (!string.IsNullOrEmpty(sourceText))             {                 WordsSearch wordsSearch = new WordsSearch();                 try                 {                     wordsSearch.SetKeywords(Words);                 }                 catch (Exception ex)                 {                     result = "0";                     return result;                 }                  try                 {                     bool res = wordsSearch.ContainsAny(sourceText);                     if (res)                     {                         result = "3";                         return result;                     }                     else                     {                         result = "4";                         return result;                     }                 }                 catch (Exception ex)                 {                     return result = "1";                 }             }             else             {                 result = "2";             }             return result;         }          /// <summary>         /// 把对象写入到json文件中         /// </summary>         /// <param name="obj"></param>         /// <returns></returns>         public static void Write(List<keywords> jsonData, List<urlwords> urlJsonData, string filename)         {             var directorypath = Directory.GetCurrentDirectory();             string strFileName = directorypath + "\" + filename + ".json";             string ListJson = "";             if (jsonData != null)             {                 ListJson = JsonConvert.SerializeObject(jsonData);             }             else             {                 ListJson = JsonConvert.SerializeObject(urlJsonData);             }              Console.WriteLine(ListJson);              writeJsonFile(strFileName, ListJson);              //将序列化的json字符串内容写入Json文件,并且保存             void writeJsonFile(string path, string jsonConents)             {                 using (FileStream fs = new FileStream(path, FileMode.OpenOrCreate, System.IO.FileAccess.ReadWrite, FileShare.ReadWrite))                 {                     //如果json文件中有中文数据,可能会出现乱码的现象,那么需要加上如下代码                     Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);                     using (StreamWriter sw = new StreamWriter(fs, Encoding.GetEncoding("GB2312")))                     {                         sw.WriteLine(jsonConents);                     }                 }             }         }     } } 

五、写API接口

/// <summary> /// 进行敏感词脱敏 /// </summary> /// <param name="sourctText">需要脱敏的文本内容</param> /// <returns></returns> [HttpPost] public IActionResult sensitive_words_replace2(string sourctText) {     string resultStr = "";     //实例化敏感词库     subContentCheck strCheck = new subContentCheck(_hostingEnv);     if (string.IsNullOrEmpty(sourctText))     {         return Json(new { code = 230, msg = "需要替换的文本内容为空!", resultStr = resultStr });     }     try     {         resultStr = strCheck.FilterWithChar(sourctText);         string resMsg = "";         int resCode = 200;         if (resultStr=="0")         {             resCode = 210;             resultStr = "";             resMsg = "设置违禁词时发生错误,请联系管理员!";         }else if (resultStr=="1")         {             resCode = 240;             resultStr = "";             resMsg = "敏感内容替换时发生错误!";         }         else if (resultStr == "2")         {             resCode = 260;             resultStr = "";             resMsg = "需要替换的文本内容为空!";         }         else         {             resCode = 200;             resMsg = "敏感词替换请求成功!";         }         return Json(new { code = resCode, msg = resMsg, resultStr = resultStr });     }     catch (Exception ex)     {         return Json(new { code = 220, msg = "敏感内容替换时发生错误!", resultStr = "" });     } }  /// <summary> /// 进行敏感词判断 /// </summary> /// <param name="sourctText">需要脱敏的文本内容</param> /// <returns></returns> [HttpPost] public IActionResult whether_sensitive_words(string sourctText) {     string resultStr = "";     //实例化敏感词库     subContentCheck strCheck = new subContentCheck(_hostingEnv);     if (string.IsNullOrEmpty(sourctText))     {         return Json(new { code = 230, msg = "需要替换的文本内容为空!", resultStr = resultStr });     }     try     {         resultStr = strCheck.FindSensitiveKey(sourctText);         string resMsg = "";         int resCode = 200;         if (resultStr == "0")         {             resCode = 210;             resultStr = "";             resMsg = "设置违禁词时发生错误,请联系管理员!";         }         else if (resultStr == "1")         {             resCode = 240;             resultStr = "";             resMsg = "敏感内容匹配时发生错误!";         }         else if (resultStr == "2")         {             resCode = 260;             resultStr = "";             resMsg = "需要判断的文本内容为空!";         }         else if (resultStr == "3")         {             resCode = 270;             resultStr = "";             resMsg = "内容中含有敏感/违禁词!";         }         else         {             resCode = 200;             resMsg = "内容中不含敏感/违禁词!";         }         return Json(new { code = resCode, msg = resMsg, resultStr = resultStr });     }     catch (Exception ex)     {         return Json(new { code = 220, msg = "敏感内容匹配时发生错误!", resultStr = "" });     } } 

六、前端封装JS方法

/**  * 敏感词/违禁词替换  * @param {string} sourctText 需要进行替换的内容  * @param {string} boxid 将替换成功之后的内容赋值的元素容器id属性名  * @param {object} layui Layui实例  * @returns 替换之后的文本内容  */ function sensitive_words_replace(sourctText, boxid, layui) {     let resultStr = "";     //let url = ["/Home/sensitive_words_replace", "/Home/sensitive_words_replace1", "/Home/sensitive_words_replace2"];     $.ajax({         url: "/Home/sensitive_words_replace2",//请求后端接口的路径         dataType: "JSON",         type: "POST",         data: {             "sourctText": sourctText         },         success: function (res) {             let resCode = res.code;             let resMsg = res.msg;             if ((resCode == "210" || resCode == 210) || (resCode == 220 || resCode == "220") || (resCode == 230 || resCode == "230") || (resCode == 240 || resCode == "240") || (resCode == 260 || resCode == "260")) {                 //返回数据后关闭loading                 layer.closeAll();                 resultStr = res.resultStr;                 layui.layer.alert(resMsg, { icon: 5, title: "温馨提示", closeBtn: 0 });             } else if (resCode == 200 || resCode == "200") {                 resultStr = res.resultStr;                 $("#" + boxid).val(resultStr);                 //返回数据后关闭loading                 layer.closeAll();             }         },         error: function (error) {             //返回数据后关闭loading             layer.closeAll();             layui.layer.alert(error, { icon: 5, title: "温馨提示", closeBtn: 0 });         }     });     return resultStr; }  /**  * 查询是否包含敏感/违禁词  * @param {string} sourctText 需要进行替换的内容  * @param {string} boxid 将替换成功之后的内容赋值的元素容器id属性名  * @param {object} layui Layui实例  * @returns 返回Bool;包含:“true”;不包含:“false”  */ function whether_sensitive_words(sourctText, boxid, layui) {     let resultBool = false;     $.ajax({         url: "/Home/whether_sensitive_words",//请求后端接口的路径         dataType: "JSON",         type: "POST",         async: false,//此处需要注意的是要想获取ajax返回的值这个async属性必须设置成同步的,否则获取不到返回值         data: {             "sourctText": sourctText         },         success: function (res) {             let resCode = res.code;             let resMsg = res.msg;             if ((resCode == "210" || resCode == 210) || (resCode == 220 || resCode == "220") || (resCode == 230 || resCode == "230") || (resCode == 240 || resCode == "240") || (resCode == 260 || resCode == "260")) {                 resultBool = false;                 layui.layer.alert(resMsg, { icon: 5, title: "温馨提示", closeBtn: 0 });             } else if (resCode == 270 || resCode == "270") {                 resultBool = true;             } else if (resCode == 200 || resCode == "200") {                 resultBool = false;                 //返回数据后关闭loading                 layer.closeAll();             }         },         error: function (error) {             layui.layer.alert(error, { icon: 5, title: "温馨提示", closeBtn: 0 });         }     });     return resultBool; }