1 Star 0 Fork 9

yang/libxml2_backup

forked from openKylin/libxml2 
加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
克隆/下载
HTMLtree.c 32.15 KB
一键复制 编辑 原始数据 按行查看 历史
LI, WENJIE 提交于 2022-11-30 19:55 . Repair CVE-2016-3709.
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220
/*
* HTMLtree.c : implementation of access function for an HTML tree.
*
* See Copyright for the status of this software.
*
* daniel@veillard.com
*/
#define IN_LIBXML
#include "libxml.h"
#ifdef LIBXML_HTML_ENABLED
#include <string.h> /* for memset() only ! */
#ifdef HAVE_CTYPE_H
#include <ctype.h>
#endif
#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif
#include <libxml/xmlmemory.h>
#include <libxml/HTMLparser.h>
#include <libxml/HTMLtree.h>
#include <libxml/entities.h>
#include <libxml/valid.h>
#include <libxml/xmlerror.h>
#include <libxml/parserInternals.h>
#include <libxml/globals.h>
#include <libxml/uri.h>
#include "buf.h"
/************************************************************************
* *
* Getting/Setting encoding meta tags *
* *
************************************************************************/
/**
* htmlGetMetaEncoding:
* @doc: the document
*
* Encoding definition lookup in the Meta tags
*
* Returns the current encoding as flagged in the HTML source
*/
const xmlChar *
htmlGetMetaEncoding(htmlDocPtr doc) {
htmlNodePtr cur;
const xmlChar *content;
const xmlChar *encoding;
if (doc == NULL)
return(NULL);
cur = doc->children;
/*
* Search the html
*/
while (cur != NULL) {
if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
if (xmlStrEqual(cur->name, BAD_CAST"html"))
break;
if (xmlStrEqual(cur->name, BAD_CAST"head"))
goto found_head;
if (xmlStrEqual(cur->name, BAD_CAST"meta"))
goto found_meta;
}
cur = cur->next;
}
if (cur == NULL)
return(NULL);
cur = cur->children;
/*
* Search the head
*/
while (cur != NULL) {
if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
if (xmlStrEqual(cur->name, BAD_CAST"head"))
break;
if (xmlStrEqual(cur->name, BAD_CAST"meta"))
goto found_meta;
}
cur = cur->next;
}
if (cur == NULL)
return(NULL);
found_head:
cur = cur->children;
/*
* Search the meta elements
*/
found_meta:
while (cur != NULL) {
if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
xmlAttrPtr attr = cur->properties;
int http;
const xmlChar *value;
content = NULL;
http = 0;
while (attr != NULL) {
if ((attr->children != NULL) &&
(attr->children->type == XML_TEXT_NODE) &&
(attr->children->next == NULL)) {
value = attr->children->content;
if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
&& (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
http = 1;
else if ((value != NULL)
&& (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
content = value;
if ((http != 0) && (content != NULL))
goto found_content;
}
attr = attr->next;
}
}
}
cur = cur->next;
}
return(NULL);
found_content:
encoding = xmlStrstr(content, BAD_CAST"charset=");
if (encoding == NULL)
encoding = xmlStrstr(content, BAD_CAST"Charset=");
if (encoding == NULL)
encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
if (encoding != NULL) {
encoding += 8;
} else {
encoding = xmlStrstr(content, BAD_CAST"charset =");
if (encoding == NULL)
encoding = xmlStrstr(content, BAD_CAST"Charset =");
if (encoding == NULL)
encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
if (encoding != NULL)
encoding += 9;
}
if (encoding != NULL) {
while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
}
return(encoding);
}
/**
* htmlSetMetaEncoding:
* @doc: the document
* @encoding: the encoding string
*
* Sets the current encoding in the Meta tags
* NOTE: this will not change the document content encoding, just
* the META flag associated.
*
* Returns 0 in case of success and -1 in case of error
*/
int
htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
htmlNodePtr cur, meta = NULL, head = NULL;
const xmlChar *content = NULL;
char newcontent[100];
newcontent[0] = 0;
if (doc == NULL)
return(-1);
/* html isn't a real encoding it's just libxml2 way to get entities */
if (!xmlStrcasecmp(encoding, BAD_CAST "html"))
return(-1);
if (encoding != NULL) {
snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",
(char *)encoding);
newcontent[sizeof(newcontent) - 1] = 0;
}
cur = doc->children;
/*
* Search the html
*/
while (cur != NULL) {
if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
if (xmlStrcasecmp(cur->name, BAD_CAST"html") == 0)
break;
if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
goto found_head;
if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
goto found_meta;
}
cur = cur->next;
}
if (cur == NULL)
return(-1);
cur = cur->children;
/*
* Search the head
*/
while (cur != NULL) {
if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
break;
if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
head = cur->parent;
goto found_meta;
}
}
cur = cur->next;
}
if (cur == NULL)
return(-1);
found_head:
head = cur;
if (cur->children == NULL)
goto create;
cur = cur->children;
found_meta:
/*
* Search and update all the remaining the meta elements carrying
* encoding informations
*/
while (cur != NULL) {
if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
xmlAttrPtr attr = cur->properties;
int http;
const xmlChar *value;
content = NULL;
http = 0;
while (attr != NULL) {
if ((attr->children != NULL) &&
(attr->children->type == XML_TEXT_NODE) &&
(attr->children->next == NULL)) {
value = attr->children->content;
if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
&& (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
http = 1;
else
{
if ((value != NULL) &&
(!xmlStrcasecmp(attr->name, BAD_CAST"content")))
content = value;
}
if ((http != 0) && (content != NULL))
break;
}
attr = attr->next;
}
if ((http != 0) && (content != NULL)) {
meta = cur;
break;
}
}
}
cur = cur->next;
}
create:
if (meta == NULL) {
if ((encoding != NULL) && (head != NULL)) {
/*
* Create a new Meta element with the right attributes
*/
meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
if (head->children == NULL)
xmlAddChild(head, meta);
else
xmlAddPrevSibling(head->children, meta);
xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
}
} else {
/* remove the meta tag if NULL is passed */
if (encoding == NULL) {
xmlUnlinkNode(meta);
xmlFreeNode(meta);
}
/* change the document only if there is a real encoding change */
else if (xmlStrcasestr(content, encoding) == NULL) {
xmlSetProp(meta, BAD_CAST"content", BAD_CAST newcontent);
}
}
return(0);
}
/**
* booleanHTMLAttrs:
*
* These are the HTML attributes which will be output
* in minimized form, i.e. <option selected="selected"> will be
* output as <option selected>, as per XSLT 1.0 16.2 "HTML Output Method"
*
*/
static const char* htmlBooleanAttrs[] = {
"checked", "compact", "declare", "defer", "disabled", "ismap",
"multiple", "nohref", "noresize", "noshade", "nowrap", "readonly",
"selected", NULL
};
/**
* htmlIsBooleanAttr:
* @name: the name of the attribute to check
*
* Determine if a given attribute is a boolean attribute.
*
* returns: false if the attribute is not boolean, true otherwise.
*/
int
htmlIsBooleanAttr(const xmlChar *name)
{
int i = 0;
while (htmlBooleanAttrs[i] != NULL) {
if (xmlStrcasecmp((const xmlChar *)htmlBooleanAttrs[i], name) == 0)
return 1;
i++;
}
return 0;
}
#ifdef LIBXML_OUTPUT_ENABLED
/*
* private routine exported from xmlIO.c
*/
xmlOutputBufferPtr
xmlAllocOutputBufferInternal(xmlCharEncodingHandlerPtr encoder);
/************************************************************************
* *
* Output error handlers *
* *
************************************************************************/
/**
* htmlSaveErrMemory:
* @extra: extra informations
*
* Handle an out of memory condition
*/
static void
htmlSaveErrMemory(const char *extra)
{
__xmlSimpleError(XML_FROM_OUTPUT, XML_ERR_NO_MEMORY, NULL, NULL, extra);
}
/**
* htmlSaveErr:
* @code: the error number
* @node: the location of the error.
* @extra: extra informations
*
* Handle an out of memory condition
*/
static void
htmlSaveErr(int code, xmlNodePtr node, const char *extra)
{
const char *msg = NULL;
switch(code) {
case XML_SAVE_NOT_UTF8:
msg = "string is not in UTF-8\n";
break;
case XML_SAVE_CHAR_INVALID:
msg = "invalid character value\n";
break;
case XML_SAVE_UNKNOWN_ENCODING:
msg = "unknown encoding %s\n";
break;
case XML_SAVE_NO_DOCTYPE:
msg = "HTML has no DOCTYPE\n";
break;
default:
msg = "unexpected error number\n";
}
__xmlSimpleError(XML_FROM_OUTPUT, code, node, msg, extra);
}
/************************************************************************
* *
* Dumping HTML tree content to a simple buffer *
* *
************************************************************************/
/**
* htmlBufNodeDumpFormat:
* @buf: the xmlBufPtr output
* @doc: the document
* @cur: the current node
* @format: should formatting spaces been added
*
* Dump an HTML node, recursive behaviour,children are printed too.
*
* Returns the number of byte written or -1 in case of error
*/
static size_t
htmlBufNodeDumpFormat(xmlBufPtr buf, xmlDocPtr doc, xmlNodePtr cur,
int format) {
size_t use;
int ret;
xmlOutputBufferPtr outbuf;
if (cur == NULL) {
return (-1);
}
if (buf == NULL) {
return (-1);
}
outbuf = (xmlOutputBufferPtr) xmlMalloc(sizeof(xmlOutputBuffer));
if (outbuf == NULL) {
htmlSaveErrMemory("allocating HTML output buffer");
return (-1);
}
memset(outbuf, 0, (size_t) sizeof(xmlOutputBuffer));
outbuf->buffer = buf;
outbuf->encoder = NULL;
outbuf->writecallback = NULL;
outbuf->closecallback = NULL;
outbuf->context = NULL;
outbuf->written = 0;
use = xmlBufUse(buf);
htmlNodeDumpFormatOutput(outbuf, doc, cur, NULL, format);
xmlFree(outbuf);
ret = xmlBufUse(buf) - use;
return (ret);
}
/**
* htmlNodeDump:
* @buf: the HTML buffer output
* @doc: the document
* @cur: the current node
*
* Dump an HTML node, recursive behaviour,children are printed too,
* and formatting returns are added.
*
* Returns the number of byte written or -1 in case of error
*/
int
htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
xmlBufPtr buffer;
size_t ret;
if ((buf == NULL) || (cur == NULL))
return(-1);
xmlInitParser();
buffer = xmlBufFromBuffer(buf);
if (buffer == NULL)
return(-1);
ret = htmlBufNodeDumpFormat(buffer, doc, cur, 1);
xmlBufBackToBuffer(buffer);
if (ret > INT_MAX)
return(-1);
return((int) ret);
}
/**
* htmlNodeDumpFileFormat:
* @out: the FILE pointer
* @doc: the document
* @cur: the current node
* @encoding: the document encoding
* @format: should formatting spaces been added
*
* Dump an HTML node, recursive behaviour,children are printed too.
*
* TODO: if encoding == NULL try to save in the doc encoding
*
* returns: the number of byte written or -1 in case of failure.
*/
int
htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc,
xmlNodePtr cur, const char *encoding, int format) {
xmlOutputBufferPtr buf;
xmlCharEncodingHandlerPtr handler = NULL;
int ret;
xmlInitParser();
if (encoding != NULL) {
xmlCharEncoding enc;
enc = xmlParseCharEncoding(encoding);
if (enc != XML_CHAR_ENCODING_UTF8) {
handler = xmlFindCharEncodingHandler(encoding);
if (handler == NULL)
htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
}
} else {
/*
* Fallback to HTML or ASCII when the encoding is unspecified
*/
if (handler == NULL)
handler = xmlFindCharEncodingHandler("HTML");
if (handler == NULL)
handler = xmlFindCharEncodingHandler("ascii");
}
/*
* save the content to a temp buffer.
*/
buf = xmlOutputBufferCreateFile(out, handler);
if (buf == NULL) return(0);
htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
ret = xmlOutputBufferClose(buf);
return(ret);
}
/**
* htmlNodeDumpFile:
* @out: the FILE pointer
* @doc: the document
* @cur: the current node
*
* Dump an HTML node, recursive behaviour,children are printed too,
* and formatting returns are added.
*/
void
htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
htmlNodeDumpFileFormat(out, doc, cur, NULL, 1);
}
/**
* htmlDocDumpMemoryFormat:
* @cur: the document
* @mem: OUT: the memory pointer
* @size: OUT: the memory length
* @format: should formatting spaces been added
*
* Dump an HTML document in memory and return the xmlChar * and it's size.
* It's up to the caller to free the memory.
*/
void
htmlDocDumpMemoryFormat(xmlDocPtr cur, xmlChar**mem, int *size, int format) {
xmlOutputBufferPtr buf;
xmlCharEncodingHandlerPtr handler = NULL;
const char *encoding;
xmlInitParser();
if ((mem == NULL) || (size == NULL))
return;
if (cur == NULL) {
*mem = NULL;
*size = 0;
return;
}
encoding = (const char *) htmlGetMetaEncoding(cur);
if (encoding != NULL) {
xmlCharEncoding enc;
enc = xmlParseCharEncoding(encoding);
if (enc != XML_CHAR_ENCODING_UTF8) {
handler = xmlFindCharEncodingHandler(encoding);
if (handler == NULL)
htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
}
} else {
/*
* Fallback to HTML or ASCII when the encoding is unspecified
*/
if (handler == NULL)
handler = xmlFindCharEncodingHandler("HTML");
if (handler == NULL)
handler = xmlFindCharEncodingHandler("ascii");
}
buf = xmlAllocOutputBufferInternal(handler);
if (buf == NULL) {
*mem = NULL;
*size = 0;
return;
}
htmlDocContentDumpFormatOutput(buf, cur, NULL, format);
xmlOutputBufferFlush(buf);
if (buf->conv != NULL) {
*size = xmlBufUse(buf->conv);
*mem = xmlStrndup(xmlBufContent(buf->conv), *size);
} else {
*size = xmlBufUse(buf->buffer);
*mem = xmlStrndup(xmlBufContent(buf->buffer), *size);
}
(void)xmlOutputBufferClose(buf);
}
/**
* htmlDocDumpMemory:
* @cur: the document
* @mem: OUT: the memory pointer
* @size: OUT: the memory length
*
* Dump an HTML document in memory and return the xmlChar * and it's size.
* It's up to the caller to free the memory.
*/
void
htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
htmlDocDumpMemoryFormat(cur, mem, size, 1);
}
/************************************************************************
* *
* Dumping HTML tree content to an I/O output buffer *
* *
************************************************************************/
void xmlNsListDumpOutput(xmlOutputBufferPtr buf, xmlNsPtr cur);
/**
* htmlDtdDumpOutput:
* @buf: the HTML buffer output
* @doc: the document
* @encoding: the encoding string
*
* TODO: check whether encoding is needed
*
* Dump the HTML document DTD, if any.
*/
static void
htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
const char *encoding ATTRIBUTE_UNUSED) {
xmlDtdPtr cur = doc->intSubset;
if (cur == NULL) {
htmlSaveErr(XML_SAVE_NO_DOCTYPE, (xmlNodePtr) doc, NULL);
return;
}
xmlOutputBufferWriteString(buf, "<!DOCTYPE ");
xmlOutputBufferWriteString(buf, (const char *)cur->name);
if (cur->ExternalID != NULL) {
xmlOutputBufferWriteString(buf, " PUBLIC ");
xmlBufWriteQuotedString(buf->buffer, cur->ExternalID);
if (cur->SystemID != NULL) {
xmlOutputBufferWriteString(buf, " ");
xmlBufWriteQuotedString(buf->buffer, cur->SystemID);
}
} else if (cur->SystemID != NULL &&
xmlStrcmp(cur->SystemID, BAD_CAST "about:legacy-compat")) {
xmlOutputBufferWriteString(buf, " SYSTEM ");
xmlBufWriteQuotedString(buf->buffer, cur->SystemID);
}
xmlOutputBufferWriteString(buf, ">\n");
}
/**
* htmlAttrDumpOutput:
* @buf: the HTML buffer output
* @doc: the document
* @cur: the attribute pointer
* @encoding: the encoding string
*
* Dump an HTML attribute
*/
static void
htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur,
const char *encoding ATTRIBUTE_UNUSED) {
xmlChar *value;
/*
* The html output method should not escape a & character
* occurring in an attribute value immediately followed by
* a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
* This is implemented in xmlEncodeEntitiesReentrant
*/
if (cur == NULL) {
return;
}
xmlOutputBufferWriteString(buf, " ");
if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
xmlOutputBufferWriteString(buf, ":");
}
xmlOutputBufferWriteString(buf, (const char *)cur->name);
if ((cur->children != NULL) && (!htmlIsBooleanAttr(cur->name))) {
value = xmlNodeListGetString(doc, cur->children, 0);
if (value) {
xmlOutputBufferWriteString(buf, "=");
if ((cur->ns == NULL) && (cur->parent != NULL) &&
(cur->parent->ns == NULL) &&
((!xmlStrcasecmp(cur->name, BAD_CAST "href")) ||
(!xmlStrcasecmp(cur->name, BAD_CAST "action")) ||
(!xmlStrcasecmp(cur->name, BAD_CAST "src")) ||
((!xmlStrcasecmp(cur->name, BAD_CAST "name")) &&
(!xmlStrcasecmp(cur->parent->name, BAD_CAST "a"))))) {
xmlChar *escaped;
xmlChar *tmp = value;
while (IS_BLANK_CH(*tmp)) tmp++;
/*
* the < and > have already been escaped at the entity level
* And doing so here breaks server side includes
*/
escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+<>");
if (escaped != NULL) {
xmlBufWriteQuotedString(buf->buffer, escaped);
xmlFree(escaped);
} else {
xmlBufWriteQuotedString(buf->buffer, value);
}
} else {
xmlBufWriteQuotedString(buf->buffer, value);
}
xmlFree(value);
} else {
xmlOutputBufferWriteString(buf, "=\"\"");
}
}
}
/**
* htmlAttrListDumpOutput:
* @buf: the HTML buffer output
* @doc: the document
* @cur: the first attribute pointer
* @encoding: the encoding string
*
* Dump a list of HTML attributes
*/
static void
htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) {
if (cur == NULL) {
return;
}
while (cur != NULL) {
htmlAttrDumpOutput(buf, doc, cur, encoding);
cur = cur->next;
}
}
/**
* htmlNodeListDumpOutput:
* @buf: the HTML buffer output
* @doc: the document
* @cur: the first node
* @encoding: the encoding string
* @format: should formatting spaces been added
*
* Dump an HTML node list, recursive behaviour,children are printed too.
*/
static void
htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
xmlNodePtr cur, const char *encoding, int format) {
if (cur == NULL) {
return;
}
while (cur != NULL) {
htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
cur = cur->next;
}
}
/**
* htmlNodeDumpFormatOutput:
* @buf: the HTML buffer output
* @doc: the document
* @cur: the current node
* @encoding: the encoding string
* @format: should formatting spaces been added
*
* Dump an HTML node, recursive behaviour,children are printed too.
*/
void
htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
xmlNodePtr cur, const char *encoding, int format) {
const htmlElemDesc * info;
xmlInitParser();
if ((cur == NULL) || (buf == NULL)) {
return;
}
/*
* Special cases.
*/
if (cur->type == XML_DTD_NODE)
return;
if ((cur->type == XML_HTML_DOCUMENT_NODE) ||
(cur->type == XML_DOCUMENT_NODE)){
htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding);
return;
}
if (cur->type == XML_ATTRIBUTE_NODE) {
htmlAttrDumpOutput(buf, doc, (xmlAttrPtr) cur, encoding);
return;
}
if (cur->type == HTML_TEXT_NODE) {
if (cur->content != NULL) {
if (((cur->name == (const xmlChar *)xmlStringText) ||
(cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
((cur->parent == NULL) ||
((xmlStrcasecmp(cur->parent->name, BAD_CAST "script")) &&
(xmlStrcasecmp(cur->parent->name, BAD_CAST "style"))))) {
xmlChar *buffer;
buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
if (buffer != NULL) {
xmlOutputBufferWriteString(buf, (const char *)buffer);
xmlFree(buffer);
}
} else {
xmlOutputBufferWriteString(buf, (const char *)cur->content);
}
}
return;
}
if (cur->type == HTML_COMMENT_NODE) {
if (cur->content != NULL) {
xmlOutputBufferWriteString(buf, "<!--");
xmlOutputBufferWriteString(buf, (const char *)cur->content);
xmlOutputBufferWriteString(buf, "-->");
}
return;
}
if (cur->type == HTML_PI_NODE) {
if (cur->name == NULL)
return;
xmlOutputBufferWriteString(buf, "<?");
xmlOutputBufferWriteString(buf, (const char *)cur->name);
if (cur->content != NULL) {
xmlOutputBufferWriteString(buf, " ");
xmlOutputBufferWriteString(buf, (const char *)cur->content);
}
xmlOutputBufferWriteString(buf, ">");
return;
}
if (cur->type == HTML_ENTITY_REF_NODE) {
xmlOutputBufferWriteString(buf, "&");
xmlOutputBufferWriteString(buf, (const char *)cur->name);
xmlOutputBufferWriteString(buf, ";");
return;
}
if (cur->type == HTML_PRESERVE_NODE) {
if (cur->content != NULL) {
xmlOutputBufferWriteString(buf, (const char *)cur->content);
}
return;
}
/*
* Get specific HTML info for that node.
*/
if (cur->ns == NULL)
info = htmlTagLookup(cur->name);
else
info = NULL;
xmlOutputBufferWriteString(buf, "<");
if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
xmlOutputBufferWriteString(buf, ":");
}
xmlOutputBufferWriteString(buf, (const char *)cur->name);
if (cur->nsDef)
xmlNsListDumpOutput(buf, cur->nsDef);
if (cur->properties != NULL)
htmlAttrListDumpOutput(buf, doc, cur->properties, encoding);
if ((info != NULL) && (info->empty)) {
xmlOutputBufferWriteString(buf, ">");
if ((format) && (!info->isinline) && (cur->next != NULL)) {
if ((cur->next->type != HTML_TEXT_NODE) &&
(cur->next->type != HTML_ENTITY_REF_NODE) &&
(cur->parent != NULL) &&
(cur->parent->name != NULL) &&
(cur->parent->name[0] != 'p')) /* p, pre, param */
xmlOutputBufferWriteString(buf, "\n");
}
return;
}
if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) &&
(cur->children == NULL)) {
if ((info != NULL) && (info->saveEndTag != 0) &&
(xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
(xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
xmlOutputBufferWriteString(buf, ">");
} else {
xmlOutputBufferWriteString(buf, "></");
if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
xmlOutputBufferWriteString(buf, ":");
}
xmlOutputBufferWriteString(buf, (const char *)cur->name);
xmlOutputBufferWriteString(buf, ">");
}
if ((format) && (cur->next != NULL) &&
(info != NULL) && (!info->isinline)) {
if ((cur->next->type != HTML_TEXT_NODE) &&
(cur->next->type != HTML_ENTITY_REF_NODE) &&
(cur->parent != NULL) &&
(cur->parent->name != NULL) &&
(cur->parent->name[0] != 'p')) /* p, pre, param */
xmlOutputBufferWriteString(buf, "\n");
}
return;
}
xmlOutputBufferWriteString(buf, ">");
if ((cur->type != XML_ELEMENT_NODE) &&
(cur->content != NULL)) {
/*
* Uses the OutputBuffer property to automatically convert
* invalids to charrefs
*/
xmlOutputBufferWriteString(buf, (const char *) cur->content);
}
if (cur->children != NULL) {
if ((format) && (info != NULL) && (!info->isinline) &&
(cur->children->type != HTML_TEXT_NODE) &&
(cur->children->type != HTML_ENTITY_REF_NODE) &&
(cur->children != cur->last) &&
(cur->name != NULL) &&
(cur->name[0] != 'p')) /* p, pre, param */
xmlOutputBufferWriteString(buf, "\n");
htmlNodeListDumpOutput(buf, doc, cur->children, encoding, format);
if ((format) && (info != NULL) && (!info->isinline) &&
(cur->last->type != HTML_TEXT_NODE) &&
(cur->last->type != HTML_ENTITY_REF_NODE) &&
(cur->children != cur->last) &&
(cur->name != NULL) &&
(cur->name[0] != 'p')) /* p, pre, param */
xmlOutputBufferWriteString(buf, "\n");
}
xmlOutputBufferWriteString(buf, "</");
if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
xmlOutputBufferWriteString(buf, ":");
}
xmlOutputBufferWriteString(buf, (const char *)cur->name);
xmlOutputBufferWriteString(buf, ">");
if ((format) && (info != NULL) && (!info->isinline) &&
(cur->next != NULL)) {
if ((cur->next->type != HTML_TEXT_NODE) &&
(cur->next->type != HTML_ENTITY_REF_NODE) &&
(cur->parent != NULL) &&
(cur->parent->name != NULL) &&
(cur->parent->name[0] != 'p')) /* p, pre, param */
xmlOutputBufferWriteString(buf, "\n");
}
}
/**
* htmlNodeDumpOutput:
* @buf: the HTML buffer output
* @doc: the document
* @cur: the current node
* @encoding: the encoding string
*
* Dump an HTML node, recursive behaviour,children are printed too,
* and formatting returns/spaces are added.
*/
void
htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
xmlNodePtr cur, const char *encoding) {
htmlNodeDumpFormatOutput(buf, doc, cur, encoding, 1);
}
/**
* htmlDocContentDumpFormatOutput:
* @buf: the HTML buffer output
* @cur: the document
* @encoding: the encoding string
* @format: should formatting spaces been added
*
* Dump an HTML document.
*/
void
htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
const char *encoding, int format) {
int type;
xmlInitParser();
if ((buf == NULL) || (cur == NULL))
return;
/*
* force to output the stuff as HTML, especially for entities
*/
type = cur->type;
cur->type = XML_HTML_DOCUMENT_NODE;
if (cur->intSubset != NULL) {
htmlDtdDumpOutput(buf, cur, NULL);
}
if (cur->children != NULL) {
htmlNodeListDumpOutput(buf, cur, cur->children, encoding, format);
}
xmlOutputBufferWriteString(buf, "\n");
cur->type = (xmlElementType) type;
}
/**
* htmlDocContentDumpOutput:
* @buf: the HTML buffer output
* @cur: the document
* @encoding: the encoding string
*
* Dump an HTML document. Formatting return/spaces are added.
*/
void
htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
const char *encoding) {
htmlDocContentDumpFormatOutput(buf, cur, encoding, 1);
}
/************************************************************************
* *
* Saving functions front-ends *
* *
************************************************************************/
/**
* htmlDocDump:
* @f: the FILE*
* @cur: the document
*
* Dump an HTML document to an open FILE.
*
* returns: the number of byte written or -1 in case of failure.
*/
int
htmlDocDump(FILE *f, xmlDocPtr cur) {
xmlOutputBufferPtr buf;
xmlCharEncodingHandlerPtr handler = NULL;
const char *encoding;
int ret;
xmlInitParser();
if ((cur == NULL) || (f == NULL)) {
return(-1);
}
encoding = (const char *) htmlGetMetaEncoding(cur);
if (encoding != NULL) {
xmlCharEncoding enc;
enc = xmlParseCharEncoding(encoding);
if (enc != XML_CHAR_ENCODING_UTF8) {
handler = xmlFindCharEncodingHandler(encoding);
if (handler == NULL)
htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
}
} else {
/*
* Fallback to HTML or ASCII when the encoding is unspecified
*/
if (handler == NULL)
handler = xmlFindCharEncodingHandler("HTML");
if (handler == NULL)
handler = xmlFindCharEncodingHandler("ascii");
}
buf = xmlOutputBufferCreateFile(f, handler);
if (buf == NULL) return(-1);
htmlDocContentDumpOutput(buf, cur, NULL);
ret = xmlOutputBufferClose(buf);
return(ret);
}
/**
* htmlSaveFile:
* @filename: the filename (or URL)
* @cur: the document
*
* Dump an HTML document to a file. If @filename is "-" the stdout file is
* used.
* returns: the number of byte written or -1 in case of failure.
*/
int
htmlSaveFile(const char *filename, xmlDocPtr cur) {
xmlOutputBufferPtr buf;
xmlCharEncodingHandlerPtr handler = NULL;
const char *encoding;
int ret;
if ((cur == NULL) || (filename == NULL))
return(-1);
xmlInitParser();
encoding = (const char *) htmlGetMetaEncoding(cur);
if (encoding != NULL) {
xmlCharEncoding enc;
enc = xmlParseCharEncoding(encoding);
if (enc != XML_CHAR_ENCODING_UTF8) {
handler = xmlFindCharEncodingHandler(encoding);
if (handler == NULL)
htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
}
} else {
/*
* Fallback to HTML or ASCII when the encoding is unspecified
*/
if (handler == NULL)
handler = xmlFindCharEncodingHandler("HTML");
if (handler == NULL)
handler = xmlFindCharEncodingHandler("ascii");
}
/*
* save the content to a temp buffer.
*/
buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
if (buf == NULL) return(0);
htmlDocContentDumpOutput(buf, cur, NULL);
ret = xmlOutputBufferClose(buf);
return(ret);
}
/**
* htmlSaveFileFormat:
* @filename: the filename
* @cur: the document
* @format: should formatting spaces been added
* @encoding: the document encoding
*
* Dump an HTML document to a file using a given encoding.
*
* returns: the number of byte written or -1 in case of failure.
*/
int
htmlSaveFileFormat(const char *filename, xmlDocPtr cur,
const char *encoding, int format) {
xmlOutputBufferPtr buf;
xmlCharEncodingHandlerPtr handler = NULL;
int ret;
if ((cur == NULL) || (filename == NULL))
return(-1);
xmlInitParser();
if (encoding != NULL) {
xmlCharEncoding enc;
enc = xmlParseCharEncoding(encoding);
if (enc != XML_CHAR_ENCODING_UTF8) {
handler = xmlFindCharEncodingHandler(encoding);
if (handler == NULL)
htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
}
htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
} else {
htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8");
/*
* Fallback to HTML or ASCII when the encoding is unspecified
*/
if (handler == NULL)
handler = xmlFindCharEncodingHandler("HTML");
if (handler == NULL)
handler = xmlFindCharEncodingHandler("ascii");
}
/*
* save the content to a temp buffer.
*/
buf = xmlOutputBufferCreateFilename(filename, handler, 0);
if (buf == NULL) return(0);
htmlDocContentDumpFormatOutput(buf, cur, encoding, format);
ret = xmlOutputBufferClose(buf);
return(ret);
}
/**
* htmlSaveFileEnc:
* @filename: the filename
* @cur: the document
* @encoding: the document encoding
*
* Dump an HTML document to a file using a given encoding
* and formatting returns/spaces are added.
*
* returns: the number of byte written or -1 in case of failure.
*/
int
htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) {
return(htmlSaveFileFormat(filename, cur, encoding, 1));
}
#endif /* LIBXML_OUTPUT_ENABLED */
#define bottom_HTMLtree
#include "elfgcchack.h"
#endif /* LIBXML_HTML_ENABLED */
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/kimjuncotton_y/libxml2_0213.git
git@gitee.com:kimjuncotton_y/libxml2_0213.git
kimjuncotton_y
libxml2_0213
libxml2_backup
openkylin/yangtze

搜索帮助