代码拉取完成,页面将自动刷新
同步操作将从 src-openEuler/setroubleshoot 强制同步,此操作会覆盖自 Fork 仓库以来所做的任何修改,且无法恢复!!!
确定后同步将在后台操作,完成时将刷新页面,请耐心等待。
From 838f53a97ce44ea0f8f4d361afcb62a441f8633f Mon Sep 17 00:00:00 2001
From: Adam Williamson <awilliam@redhat.com>
Date: Mon, 26 Jul 2021 13:11:17 -0700
Subject: [PATCH] Considerably simplify html_util for Python 3.10
compatibility (#58)
As reported in #58 and RHBZ #1972391, `formatter` was removed
from the Python standard library in Python 3.10. This heavily
simplifies `html_util.html_to_text()` by using the stdlib
`HTMLParser` class, which avoids the use of `formatter`.
Signed-off-by: Adam Williamson <awilliam@redhat.com>
---
src/setroubleshoot/html_util.py | 110 ++++----------------------------
1 file changed, 12 insertions(+), 98 deletions(-)
diff --git a/src/setroubleshoot/html_util.py b/src/setroubleshoot/html_util.py
index 5c6d07a..095eaeb 100644
--- a/src/setroubleshoot/html_util.py
+++ b/src/setroubleshoot/html_util.py
@@ -28,110 +28,29 @@ __all__ = [
import syslog
import sys
+import textwrap
if sys.version_info > (3,):
import html
- import html.parser
import html.entities
- from io import StringIO
+ from html.parser import HTMLParser
else:
import htmllib
- from StringIO import StringIO
-import formatter as Formatter
+ from HTMLParser import HTMLParser
import string
from types import *
#------------------------------------------------------------------------------
+class HTMLFilter(HTMLParser):
+ def __init__(self):
+ HTMLParser.__init__(self)
+ self.text = ""
-class TextWriter(Formatter.DumbWriter):
-
- def __init__(self, file=None, maxcol=80, indent_width=4):
- Formatter.DumbWriter.__init__(self, file, maxcol)
- self.indent_level = 0
- self.indent_width = indent_width
- self._set_indent()
-
- def _set_indent(self):
- self.indent_col = self.indent_level * self.indent_width
- self.indent = ' ' * self.indent_col
-
- def new_margin(self, margin, level):
- self.indent_level = level
- self._set_indent()
-
- def send_label_data(self, data):
- data = data + ' '
- if len(data) > self.indent_col:
- self.send_literal_data(data)
- else:
- offset = self.indent_col - len(data)
- self.send_literal_data(' ' * offset + data)
-
- def send_flowing_data(self, data):
- if not data:
- return
- atbreak = self.atbreak or data[0] in string.whitespace
- col = self.col
- maxcol = self.maxcol
- write = self.file.write
- col = self.col
- if col == 0:
- write(self.indent)
- col = self.indent_col
- for word in data.split():
- if atbreak:
- if col + len(word) >= maxcol:
- write('\n' + self.indent)
- col = self.indent_col
- else:
- write(' ')
- col = col + 1
- write(word)
- col = col + len(word)
- atbreak = 1
- self.col = col
- self.atbreak = data[-1] in string.whitespace
-
-if sys.version_info > (3,):
- class HTMLParserAnchor(html.parser.HTMLParser):
-
- def __init__(self, formatter, strict=False, convert_charrefs=False):
- super(HTMLParserAnchor, self).__init__()
- self.formatter = formatter
- self.anchor_href = None
-
- def handle_starttag(self, tag, attrs):
- if tag == 'a':
- for key, value in attrs:
- if key == 'href':
- self.anchor_href = value
-
- def handle_endtag(self, tag):
- if tag == 'a':
- if self.anchor_href != None:
- self.formatter.writer.send_flowing_data('(' + self.anchor_href + ')')
- self.anchor_href = None
-
- def handle_data(self, data):
- self.formatter.writer.send_flowing_data(data)
-
-else:
- class HTMLParserAnchor(htmllib.HTMLParser):
-
- def __init__(self, formatter, verbose=0):
- htmllib.HTMLParser.__init__(self, formatter, verbose)
-
- def anchor_bgn(self, href, name, type):
- self.anchor = href
-
- def anchor_end(self):
- if self.anchor:
- self.handle_data(' (%s) ' % self.anchor)
- self.anchor = None
+ def handle_data(self, data):
+ self.text += data
#------------------------------------------------------------------------------
-
def escape_html(s):
if s is None:
return None
@@ -161,14 +80,9 @@ def unescape_html(s):
def html_to_text(html, maxcol=80):
try:
- buffer = StringIO()
- formatter = Formatter.AbstractFormatter(TextWriter(buffer, maxcol))
- parser = HTMLParserAnchor(formatter)
- parser.feed(html)
- parser.close()
- text = buffer.getvalue()
- buffer.close()
- return text
+ filter = HTMLFilter()
+ filter.feed(html)
+ return textwrap.fill(filter.text, width=maxcol)
except Exception as e:
syslog.syslog(syslog.LOG_ERR, 'cannot convert html to text: %s' % e)
return None
--
2.27.0
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。