-
Notifications
You must be signed in to change notification settings - Fork 50
/
xsshtml.class.php
170 lines (151 loc) · 4.58 KB
/
xsshtml.class.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
<?php
/**
* PHP 富文本XSS过滤类
*
* @package XssHtml
*
*/
#
# Usage:
# <?php
# require('xsshtml.class.php');
# $html = '<html code>';
# $xss = new XssHtml($html);
# $html = $xss->getHtml();
# ?\>
#
# 需求:
# PHP Version > 5.0
# 浏览器版本:IE7+ 或其他浏览器,无法防御IE6及以下版本浏览器中的XSS
# 更多使用选项见 http://phith0n.github.io/XssHtml
class XssHtml {
private $m_dom;
private $m_xss;
private $m_ok;
private $m_AllowAttr = array('title', 'src', 'href', 'id', 'class', 'style', 'width', 'height', 'alt', 'target', 'align');
private $m_AllowTag = array('a', 'img', 'br', 'strong', 'b', 'code', 'pre', 'p', 'div', 'em', 'span', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'table', 'ul', 'ol', 'tr', 'th', 'td', 'hr', 'li', 'u');
/**
* 构造函数
*
* @param string $html 待过滤的文本
* @param string $charset 文本编码,默认utf-8
* @param array $AllowTag 允许的标签,如果不清楚请保持默认,默认已涵盖大部分功能,不要增加危险标签
*/
public function __construct($html, $charset = 'utf-8', $AllowTag = array()){
$this->m_AllowTag = empty($AllowTag) ? $this->m_AllowTag : $AllowTag;
$this->m_xss = strip_tags($html, '<' . implode('><', $this->m_AllowTag) . '>');
if (empty($this->m_xss)) {
$this->m_ok = FALSE;
return ;
}
$this->m_xss = "<meta http-equiv=\"Content-Type\" content=\"text/html;charset={$charset}\">" . $this->m_xss;
$this->m_dom = new DOMDocument();
$this->m_dom->strictErrorChecking = FALSE;
$this->m_ok = @$this->m_dom->loadHTML($this->m_xss);
}
/**
* 获得过滤后的内容
*/
public function getHtml()
{
if (!$this->m_ok) {
return '';
}
$nodeList = $this->m_dom->getElementsByTagName('*');
for ($i = 0; $i < $nodeList->length; $i++){
$node = $nodeList->item($i);
if (in_array($node->nodeName, $this->m_AllowTag)) {
if (method_exists($this, "__node_{$node->nodeName}")) {
call_user_func(array($this, "__node_{$node->nodeName}"), $node);
}else{
call_user_func(array($this, '__node_default'), $node);
}
}
}
return strip_tags($this->m_dom->saveHTML(), '<' . implode('><', $this->m_AllowTag) . '>');
}
private function __true_url($url){
if (preg_match('#^https?://.+#is', $url)) {
return $url;
}else{
return 'http://' . $url;
}
}
private function __get_style($node){
if ($node->attributes->getNamedItem('style')) {
$style = $node->attributes->getNamedItem('style')->nodeValue;
$style = str_replace('\\', ' ', $style);
$style = str_replace(array('&#', '/*', '*/'), ' ', $style);
$style = preg_replace('#e.*x.*p.*r.*e.*s.*s.*i.*o.*n#Uis', ' ', $style);
return $style;
}else{
return '';
}
}
private function __get_link($node, $att){
$link = $node->attributes->getNamedItem($att);
if ($link) {
return $this->__true_url($link->nodeValue);
}else{
return '';
}
}
private function __setAttr($dom, $attr, $val){
if (!empty($val)) {
$dom->setAttribute($attr, $val);
}
}
private function __set_default_attr($node, $attr, $default = '')
{
$o = $node->attributes->getNamedItem($attr);
if ($o) {
$this->__setAttr($node, $attr, $o->nodeValue);
}else{
$this->__setAttr($node, $attr, $default);
}
}
private function __common_attr($node)
{
$list = array();
foreach ($node->attributes as $attr) {
if (!in_array($attr->nodeName,
$this->m_AllowAttr)) {
$list[] = $attr->nodeName;
}
}
foreach ($list as $attr) {
$node->removeAttribute($attr);
}
$style = $this->__get_style($node);
$this->__setAttr($node, 'style', $style);
$this->__set_default_attr($node, 'title');
$this->__set_default_attr($node, 'id');
$this->__set_default_attr($node, 'class');
}
private function __node_img($node){
$this->__common_attr($node);
$this->__set_default_attr($node, 'src');
$this->__set_default_attr($node, 'width');
$this->__set_default_attr($node, 'height');
$this->__set_default_attr($node, 'alt');
$this->__set_default_attr($node, 'align');
}
private function __node_a($node){
$this->__common_attr($node);
$href = $this->__get_link($node, 'href');
$this->__setAttr($node, 'href', $href);
$this->__set_default_attr($node, 'target', '_blank');
}
private function __node_embed($node){
$this->__common_attr($node);
$link = $this->__get_link($node, 'src');
$this->__setAttr($node, 'src', $link);
$this->__setAttr($node, 'allowscriptaccess', 'never');
$this->__set_default_attr($node, 'width');
$this->__set_default_attr($node, 'height');
}
private function __node_default($node){
$this->__common_attr($node);
}
}
?>