php关键字过滤

xiaoxiao2021-02-28  32

<?php class TrieTree {     public $tree = array('我','过');     /**      * 增加关键词到字典树      *      * @param string $utf8_str                  */     public function add($utf8_str)     {         $chars = &UTF8Util::getChars($utf8_str);         // 串结尾字符         $chars[] = null;         $count = count($chars);         $T = &$this->tree;         for ($i = 0; $i < $count; $i ++) {             $c = $chars[$i];             if (! array_key_exists($c, $T)) {                 // 插入新字符,关联数组                 $T[$c] = array();             }             $T = &$T[$c];         }         return $this;     }     /**      * 从字典树移除关键词      *      * @param string $utf8_str                  */     public function remove($utf8_str)     {         $chars = &UTF8Util::getChars($utf8_str);         $chars[] = null;         // 先保证此串在树中         if ($this->_find($chars)) {             $chars[] = null;             $count = count($chars);             $T = &$this->tree;             for ($i = 0; $i < $count; $i ++) {                 $c = $chars[$i];                 // 表明仅有此串                 if (count($T[$c]) == 1) {                     unset($T[$c]);                     return;                 }                 $T = &$T[$c];             }         }         return $this;     }     /**      * 从字典树查找关键词      *      * @param string $utf8_str                  * @return boolean      */     public function exists($utf8_str)     {         $chars = &UTF8Util::getChars($utf8_str);         $chars[] = null;         return $this->_find($chars);     }     private function _find(&$chars)     {         $count = count($chars);         $T = &$this->tree;         for ($i = 0; $i < $count; $i ++) {             $c = $chars[$i];             if (! array_key_exists($c, $T)) {                 return false;             }             $T = &$T[$c];         }         return true;     }     /**      * 是否含有关键词      *      * @param string $utf8_str                  * @param boolean $do_count                  * @return boolean|number      */     public function contain($utf8_str, $do_count = false)     {         $chars = &UTF8Util::getChars($utf8_str);         $chars[] = null;         $len = count($chars);         $Tree = &$this->tree;         $count = 0;         for ($i = 0; $i < $len; $i ++) {             $c = $chars[$i];             // 起始字符匹配             if (array_key_exists($c, $Tree)) {                 $T = &$Tree[$c];                 for ($j = $i + 1; $j < $len; $j ++) {                     $c = $chars[$j];                     if (array_key_exists(null, $T)) {                         if ($do_count) {                             $count ++;                         } else {                             return true;                         }                     }                     if (! array_key_exists($c, $T)) {                         break;                     }                     $T = &$T[$c];                 }             }         }         return $do_count ? $count : false;     }     /**      * 批量检查是否包含关键词      *      * @param array $str_array                  * @return boolean      */     public function containMulti($str_array)     {         if (\is_array($str_array)) {             foreach ($str_array as $str) {                 if ($this->contain($str)) {                     return true;                 }             }         }         return false;     }     /**      * 导出序列化后的字典树      *      * @return string      */     public function export()     {         return serialize($this->tree);     }     /**      * 导入序列化后的字典树      *      * @param string $str                  */     public function import($str)     {         $this->tree = unserialize($str);     } } class UTF8Util {     public static function getChars($utf8_str)     {         $s = $utf8_str;         $len = strlen($s);         if ($len == 0)             return array();         $chars = array();         for ($i = 0; $i < $len; $i ++) {             $c = $s[$i];             $n = ord($c);             // 0xxx xxxx, asci, single             if (($n >> 7) == 0) {                 $chars[] = $c;             } else                 // 1111 xxxx, first in four char                 if (($n >> 4) == 15) {                     if ($i < $len - 3) {                         $chars[] = $c . $s[$i + 1] . $s[$i + 2] . $s[$i + 3];                         $i += 3;                     }                 } else                     // 111x xxxx, first in three char                     if (($n >> 5) == 7) {                         if ($i < $len - 2) {                             $chars[] = $c . $s[$i + 1] . $s[$i + 2];                             $i += 2;                         }                     } else                         // 11xx xxxx, first in two char                         if (($n >> 6) == 3) {                             if ($i < $len - 1) {                                 $chars[] = $c . $s[$i + 1];                                 $i ++;                             }                         }         }         return $chars;     } } $utf8_str = new UTF8Util(); $utf_char = $utf8_str->getChars('佛教飞机撒方式的回复回复后我我认为回复日无法核实的回复我五花肉覅福热火么光和热规划局狂热韩国关乎二后过过过过过过过群军过军若绿可我让我陪我二骗人富可敌国及时来构架了');
转载请注明原文地址: https://www.6miu.com/read-2624743.html

最新回复(0)