压缩的原理就是去除换行符、空格、注释等等,感觉并没有啥卵用,大部分网页能压缩8-12%,能起到个心里安慰的作用吧。
PHP代码:(https://www.svachon.com/blog/html-minify/)
<?php
class HTML_Minify
{
// 设置
protected $compress_css; //压缩css
protected $compress_js; //压缩js
protected $info_comment; //显示压缩前与压缩后的对比信息
protected $remove_comments; //删除注释
protected $shorten_urls; //压缩url
// Variables
protected $html = '';
public function __construct($html, $compress_css=true, $compress_js=false, $info_comment=true, $remove_comments=true, $shorten_urls=false)
{
if ($html !== '')
{
$this->compress_css = $compress_css;
$this->compress_js = $compress_js;
$this->info_comment = $info_comment;
$this->remove_comments = $remove_comments;
$this->shorten_urls = $shorten_urls;
$this->html = $this->minifyHTML($html);
if ($this->info_comment)
{
$this->html = $this->bottomComment($html, $this->html) . $this->html;
}
}
}
public function __toString()
{
return $this->html;
}
protected function bottomComment($raw, $compressed)
{
$raw = strlen($raw);
$compressed = strlen($compressed);
$savings = ($raw-$compressed) / $raw * 100;
$savings = round($savings, 2);
return '<!--压缩前:'.$raw.', 压缩后:'.$compressed.'; 压缩比:'.$savings.'%-->';
}
protected function minifyHTML($html)
{
$pattern = '/<(?<script>script).*?<\/script\s*>|<(?<style>style).*?<\/style\s*>|<!(?<comment>--).*?-->|<(?<tag>[\/\w.:-]*)(?:".*?"|\'.*?\'|[^\'">]+)*>|(?<text>((<[^!\/\w.:-])?[^<]*)+)|/si';
if (preg_match_all($pattern, $html, $matches, PREG_SET_ORDER) === false)
{
// Invalid markup
return $html;
}
$overriding = false;
$raw_tag = false;
// Variable reused for output
$html = '';
foreach ($matches as $token)
{
$tag = (isset($token['tag'])) ? strtolower($token['tag']) : null;
$content = $token[0];
$relate = false;
$strip = false;
if (is_null($tag))
{
if ( !empty($token['script']) )
{
$strip = $this->compress_js;
// Will still end up shortening URLs within the script, but should be OK..
// Gets Shortened: test.href="http://domain.com/wp"+"-content";
// Gets Bypassed: test.href = "http://domain.com/wp"+"-content";
$relate = $this->compress_js;
}
else if ( !empty($token['style']) )
{
$strip = $this->compress_css;
// No sense in trying to relate at this point because currently only URLs within HTML attributes are shortened
//$relate = $this->compress_css;
}
else if ($content === '<!--wp-html-compression no compression-->')
{
$overriding = !$overriding;
// Don't print the comment
continue;
}
else if ($this->remove_comments)
{
if (!$overriding && $raw_tag !== 'textarea')
{
// Remove any HTML comments, except MSIE conditional comments
$content = preg_replace('/<!--(?!\s*(?:\[if [^\]]+]|<!|>))(?:(?!-->).)*-->/s', '', $content);
$relate = true;
$strip = true;
}
}
}
else // All tags except script, style and comments
{
if ($tag === 'pre' || $tag === 'textarea')
{
$raw_tag = $tag;
}
else if ($tag === '/pre' || $tag === '/textarea')
{
$raw_tag = false;
}
else if (!$raw_tag && !$overriding)
{
if ($tag !== '')
{
if (strpos($tag, '/') === false)
{
// Remove any empty attributes, except:
// action, alt, content, src
$content = preg_replace('/(\s+)(\w++(?<!action|alt|content|src)=(""|\'\'))/i', '$1', $content);
}
// Remove any space before the end of a tag (including closing tags and self-closing tags)
$content = preg_replace('/\s+(\/?\>)/', '$1', $content);
// Do not shorten canonical URL
if ($tag !== 'link')
{
$relate = true;
}
else if (preg_match('/rel=(?:\'|\")\s*canonical\s*(?:\'|\")/i', $content) === 0)
{
$relate = true;
}
}
else // Content between opening and closing tags
{
// Avoid multiple spaces by checking previous character in output HTML
if (strrpos($html,' ') === strlen($html)-1)
{
// Remove white space at the content beginning
$content = preg_replace('/^[\s\r\n]+/', '', $content);
}
}
$strip = true;
}
}
if ($strip)
{
$content = $this->removeWhiteSpace($content, $html);
}
$html .= $content;
}
return $html;
}
protected function removeWhiteSpace($html, $full_html)
{
$html = str_replace("\t", ' ', $html);
$html = str_replace("\r", ' ', $html);
$html = str_replace("\n", ' ', $html);
// This is over twice the speed of a RegExp
while (strpos($html, ' ') !== false)
{
$html = str_replace(' ', ' ', $html);
}
return $html;
}
}
function html_minify_buffer($html)
{
return new HTML_Minify($html);
}
?>
使用方法:
$html=new HTML_Minify($content);
echo $html;