1 Star 0 Fork 11

yunjia/setroubleshoot

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
backport-Considerably-simplify-html_util-for-Python-3.10-comp.patch 5.02 KB
一键复制 编辑 原始数据 按行查看 历史
From 838f53a97ce44ea0f8f4d361afcb62a441f8633f Mon Sep 17 00:00:00 2001
From: Adam Williamson <awilliam@redhat.com>
Date: Mon, 26 Jul 2021 13:11:17 -0700
Subject: [PATCH] Considerably simplify html_util for Python 3.10
compatibility (#58)
As reported in #58 and RHBZ #1972391, `formatter` was removed
from the Python standard library in Python 3.10. This heavily
simplifies `html_util.html_to_text()` by using the stdlib
`HTMLParser` class, which avoids the use of `formatter`.
Signed-off-by: Adam Williamson <awilliam@redhat.com>
---
src/setroubleshoot/html_util.py | 110 ++++----------------------------
1 file changed, 12 insertions(+), 98 deletions(-)
diff --git a/src/setroubleshoot/html_util.py b/src/setroubleshoot/html_util.py
index 5c6d07a..095eaeb 100644
--- a/src/setroubleshoot/html_util.py
+++ b/src/setroubleshoot/html_util.py
@@ -28,110 +28,29 @@ __all__ = [
import syslog
import sys
+import textwrap
if sys.version_info > (3,):
import html
- import html.parser
import html.entities
- from io import StringIO
+ from html.parser import HTMLParser
else:
import htmllib
- from StringIO import StringIO
-import formatter as Formatter
+ from HTMLParser import HTMLParser
import string
from types import *
#------------------------------------------------------------------------------
+class HTMLFilter(HTMLParser):
+ def __init__(self):
+ HTMLParser.__init__(self)
+ self.text = ""
-class TextWriter(Formatter.DumbWriter):
-
- def __init__(self, file=None, maxcol=80, indent_width=4):
- Formatter.DumbWriter.__init__(self, file, maxcol)
- self.indent_level = 0
- self.indent_width = indent_width
- self._set_indent()
-
- def _set_indent(self):
- self.indent_col = self.indent_level * self.indent_width
- self.indent = ' ' * self.indent_col
-
- def new_margin(self, margin, level):
- self.indent_level = level
- self._set_indent()
-
- def send_label_data(self, data):
- data = data + ' '
- if len(data) > self.indent_col:
- self.send_literal_data(data)
- else:
- offset = self.indent_col - len(data)
- self.send_literal_data(' ' * offset + data)
-
- def send_flowing_data(self, data):
- if not data:
- return
- atbreak = self.atbreak or data[0] in string.whitespace
- col = self.col
- maxcol = self.maxcol
- write = self.file.write
- col = self.col
- if col == 0:
- write(self.indent)
- col = self.indent_col
- for word in data.split():
- if atbreak:
- if col + len(word) >= maxcol:
- write('\n' + self.indent)
- col = self.indent_col
- else:
- write(' ')
- col = col + 1
- write(word)
- col = col + len(word)
- atbreak = 1
- self.col = col
- self.atbreak = data[-1] in string.whitespace
-
-if sys.version_info > (3,):
- class HTMLParserAnchor(html.parser.HTMLParser):
-
- def __init__(self, formatter, strict=False, convert_charrefs=False):
- super(HTMLParserAnchor, self).__init__()
- self.formatter = formatter
- self.anchor_href = None
-
- def handle_starttag(self, tag, attrs):
- if tag == 'a':
- for key, value in attrs:
- if key == 'href':
- self.anchor_href = value
-
- def handle_endtag(self, tag):
- if tag == 'a':
- if self.anchor_href != None:
- self.formatter.writer.send_flowing_data('(' + self.anchor_href + ')')
- self.anchor_href = None
-
- def handle_data(self, data):
- self.formatter.writer.send_flowing_data(data)
-
-else:
- class HTMLParserAnchor(htmllib.HTMLParser):
-
- def __init__(self, formatter, verbose=0):
- htmllib.HTMLParser.__init__(self, formatter, verbose)
-
- def anchor_bgn(self, href, name, type):
- self.anchor = href
-
- def anchor_end(self):
- if self.anchor:
- self.handle_data(' (%s) ' % self.anchor)
- self.anchor = None
+ def handle_data(self, data):
+ self.text += data
#------------------------------------------------------------------------------
-
def escape_html(s):
if s is None:
return None
@@ -161,14 +80,9 @@ def unescape_html(s):
def html_to_text(html, maxcol=80):
try:
- buffer = StringIO()
- formatter = Formatter.AbstractFormatter(TextWriter(buffer, maxcol))
- parser = HTMLParserAnchor(formatter)
- parser.feed(html)
- parser.close()
- text = buffer.getvalue()
- buffer.close()
- return text
+ filter = HTMLFilter()
+ filter.feed(html)
+ return textwrap.fill(filter.text, width=maxcol)
except Exception as e:
syslog.syslog(syslog.LOG_ERR, 'cannot convert html to text: %s' % e)
return None
--
2.27.0
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/yunjia_w/setroubleshoot.git
git@gitee.com:yunjia_w/setroubleshoot.git
yunjia_w
setroubleshoot
setroubleshoot
master

搜索帮助