You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
311 lines
8.4 KiB
311 lines
8.4 KiB
/** |
|
* html2Json 改造来自: https://github.com/Jxck/html2json |
|
* |
|
* |
|
* author: Di (微信小程序开发工程师) |
|
* organization: WeAppDev(微信小程序开发论坛)(http://weappdev.com) |
|
* 垂直微信小程序开发交流社区 |
|
* |
|
* github地址: https://github.com/icindy/wxParse |
|
* |
|
* for: 微信小程序富文本解析 |
|
* detail : http://weappdev.com/t/wxparse-alpha0-1-html-markdown/184 |
|
*/ |
|
var __placeImgeUrlHttps = "https"; |
|
var __emojisReg = ''; |
|
var __emojisBaseSrc = ''; |
|
var __emojis = {}; |
|
|
|
var wxDiscode = require("./wxDiscode.js"); |
|
|
|
var HTMLParser = require("./htmlparser.js"); // Empty Elements - HTML 5 |
|
|
|
|
|
var empty = makeMap("area,base,basefont,br,col,frame,hr,img,input,link,meta,param,embed,command,keygen,source,track,wbr"); // Block Elements - HTML 5 |
|
|
|
var block = makeMap("br,a,code,address,article,applet,aside,audio,blockquote,button,canvas,center,dd,del,dir,div,dl,dt,fieldset,figcaption,figure,footer,form,frameset,h1,h2,h3,h4,h5,h6,header,hgroup,hr,iframe,ins,isindex,li,map,menu,noframes,noscript,object,ol,output,p,pre,section,script,table,tbody,td,tfoot,th,thead,tr,ul,video"); // Inline Elements - HTML 5 |
|
|
|
var inline = makeMap("abbr,acronym,applet,b,basefont,bdo,big,button,cite,del,dfn,em,font,i,iframe,img,input,ins,kbd,label,map,object,q,s,samp,script,select,small,span,strike,strong,sub,sup,textarea,tt,u,var"); // Elements that you can, intentionally, leave open |
|
// (and which close themselves) |
|
|
|
var closeSelf = makeMap("colgroup,dd,dt,li,options,p,td,tfoot,th,thead,tr"); // Attributes that have their values filled in disabled="disabled" |
|
|
|
var fillAttrs = makeMap("checked,compact,declare,defer,disabled,ismap,multiple,nohref,noresize,noshade,nowrap,readonly,selected"); // Special Elements (can contain anything) |
|
|
|
var special = makeMap("wxxxcode-style,script,style,view,scroll-view,block"); |
|
|
|
function makeMap(str) { |
|
var obj = {}, |
|
items = str.split(","); |
|
|
|
for (var i = 0; i < items.length; i++) obj[items[i]] = true; |
|
|
|
return obj; |
|
} |
|
|
|
function q(v) { |
|
return '"' + v + '"'; |
|
} |
|
|
|
function removeDOCTYPE(html) { |
|
return html.replace(/<\?xml.*\?>\n/, '').replace(/<.*!doctype.*\>\n/, '').replace(/<.*!DOCTYPE.*\>\n/, ''); |
|
} |
|
|
|
function trimHtml(html) { |
|
return html.replace(/\r?\n+/g, '').replace(/<!--.*?-->/ig, '').replace(/\/\*.*?\*\//ig, '').replace(/[ ]+</ig, '<'); |
|
} |
|
|
|
function html2json(html, bindName) { |
|
//处理字符串 |
|
html = removeDOCTYPE(html); |
|
html = trimHtml(html); |
|
html = wxDiscode.strDiscode(html); //生成node节点 |
|
|
|
var bufArray = []; |
|
var results = { |
|
node: bindName, |
|
nodes: [], |
|
images: [], |
|
imageUrls: [] |
|
}; |
|
var index = 0; |
|
HTMLParser(html, { |
|
start: function (tag, attrs, unary) { |
|
//debug(tag, attrs, unary); |
|
// node for this element |
|
var node = { |
|
node: 'element', |
|
tag: tag |
|
}; |
|
|
|
if (bufArray.length === 0) { |
|
node.index = index.toString(); |
|
index += 1; |
|
} else { |
|
var parent = bufArray[0]; |
|
|
|
if (parent.nodes === undefined) { |
|
parent.nodes = []; |
|
} |
|
|
|
node.index = parent.index + '.' + parent.nodes.length; |
|
} |
|
|
|
if (block[tag]) { |
|
node.tagType = "block"; |
|
} else if (inline[tag]) { |
|
node.tagType = "inline"; |
|
} else if (closeSelf[tag]) { |
|
node.tagType = "closeSelf"; |
|
} |
|
|
|
if (attrs.length !== 0) { |
|
node.attr = attrs.reduce(function (pre, attr) { |
|
var name = attr.name; |
|
var value = attr.value; |
|
|
|
if (name == 'class') { |
|
console.dir(value); // value = value.join("") |
|
|
|
node.classStr = value; |
|
} // has multi attibutes |
|
// make it array of attribute |
|
|
|
|
|
if (name == 'style') { |
|
console.dir(value); // value = value.join("") |
|
|
|
node.styleStr = value; |
|
} |
|
|
|
if (value.match(/ /)) { |
|
value = value.split(' '); |
|
} // if attr already exists |
|
// merge it |
|
|
|
|
|
if (pre[name]) { |
|
if (Array.isArray(pre[name])) { |
|
// already array, push to last |
|
pre[name].push(value); |
|
} else { |
|
// single value, make it array |
|
pre[name] = [pre[name], value]; |
|
} |
|
} else { |
|
// not exist, put it |
|
pre[name] = value; |
|
} |
|
|
|
return pre; |
|
}, {}); |
|
} //对img添加额外数据 |
|
|
|
|
|
if (node.tag === 'img') { |
|
node.imgIndex = results.images.length; |
|
var imgUrl = node.attr.src; |
|
|
|
if (imgUrl[0] == '') { |
|
imgUrl.splice(0, 1); |
|
} |
|
|
|
imgUrl = wxDiscode.urlToHttpUrl(imgUrl, __placeImgeUrlHttps); |
|
node.attr.src = imgUrl; |
|
node.from = bindName; |
|
results.images.push(node); |
|
results.imageUrls.push(imgUrl); |
|
} // 处理font标签样式属性 |
|
|
|
|
|
if (node.tag === 'font') { |
|
var fontSize = ['x-small', 'small', 'medium', 'large', 'x-large', 'xx-large', '-webkit-xxx-large']; |
|
var styleAttrs = { |
|
'color': 'color', |
|
'face': 'font-family', |
|
'size': 'font-size' |
|
}; |
|
if (!node.attr.style) node.attr.style = []; |
|
if (!node.styleStr) node.styleStr = ''; |
|
|
|
for (var key in styleAttrs) { |
|
if (node.attr[key]) { |
|
var value = key === 'size' ? fontSize[node.attr[key] - 1] : node.attr[key]; |
|
node.attr.style.push(styleAttrs[key]); |
|
node.attr.style.push(value); |
|
node.styleStr += styleAttrs[key] + ': ' + value + ';'; |
|
} |
|
} |
|
} //临时记录source资源 |
|
|
|
|
|
if (node.tag === 'source') { |
|
results.source = node.attr.src; |
|
} |
|
|
|
if (unary) { |
|
// if this tag doesn't have end tag |
|
// like <img src="hoge.png"/> |
|
// add to parents |
|
var parent = bufArray[0] || results; |
|
|
|
if (parent.nodes === undefined) { |
|
parent.nodes = []; |
|
} |
|
|
|
parent.nodes.push(node); |
|
} else { |
|
bufArray.unshift(node); |
|
} |
|
}, |
|
end: function (tag) { |
|
//debug(tag); |
|
// merge into parent tag |
|
var node = bufArray.shift(); |
|
if (node.tag !== tag) console.error('invalid state: mismatch end tag'); //当有缓存source资源时于于video补上src资源 |
|
|
|
if (node.tag === 'video' && results.source) { |
|
node.attr.src = results.source; |
|
delete results.source; |
|
} |
|
|
|
if (bufArray.length === 0) { |
|
results.nodes.push(node); |
|
} else { |
|
var parent = bufArray[0]; |
|
|
|
if (parent.nodes === undefined) { |
|
parent.nodes = []; |
|
} |
|
|
|
parent.nodes.push(node); |
|
} |
|
}, |
|
chars: function (text) { |
|
//debug(text); |
|
var node = { |
|
node: 'text', |
|
text: text, |
|
textArray: transEmojiStr(text) |
|
}; |
|
|
|
if (bufArray.length === 0) { |
|
node.index = index.toString(); |
|
index += 1; |
|
results.nodes.push(node); |
|
} else { |
|
var parent = bufArray[0]; |
|
|
|
if (parent.nodes === undefined) { |
|
parent.nodes = []; |
|
} |
|
|
|
node.index = parent.index + '.' + parent.nodes.length; |
|
parent.nodes.push(node); |
|
} |
|
}, |
|
comment: function (text) {//debug(text); |
|
// var node = { |
|
// node: 'comment', |
|
// text: text, |
|
// }; |
|
// var parent = bufArray[0]; |
|
// if (parent.nodes === undefined) { |
|
// parent.nodes = []; |
|
// } |
|
// parent.nodes.push(node); |
|
} |
|
}); |
|
return results; |
|
} |
|
|
|
; |
|
|
|
function transEmojiStr(str) { |
|
// var eReg = new RegExp("["+__reg+' '+"]"); |
|
// str = str.replace(/\[([^\[\]]+)\]/g,':$1:') |
|
var emojiObjs = []; //如果正则表达式为空 |
|
|
|
if (__emojisReg.length == 0 || !__emojis) { |
|
var emojiObj = {}; |
|
emojiObj.node = "text"; |
|
emojiObj.text = str; |
|
array = [emojiObj]; |
|
return array; |
|
} //这个地方需要调整 |
|
|
|
|
|
str = str.replace(/\[([^\[\]]+)\]/g, ':$1:'); |
|
var eReg = new RegExp("[:]"); |
|
var array = str.split(eReg); |
|
|
|
for (var i = 0; i < array.length; i++) { |
|
var ele = array[i]; |
|
var emojiObj = {}; |
|
|
|
if (__emojis[ele]) { |
|
emojiObj.node = "element"; |
|
emojiObj.tag = "emoji"; |
|
emojiObj.text = __emojis[ele]; |
|
emojiObj.baseSrc = __emojisBaseSrc; |
|
} else { |
|
emojiObj.node = "text"; |
|
emojiObj.text = ele; |
|
} |
|
|
|
emojiObjs.push(emojiObj); |
|
} |
|
|
|
return emojiObjs; |
|
} |
|
|
|
function emojisInit(reg = '', baseSrc = "/wxParse/emojis/", emojis) { |
|
__emojisReg = reg; |
|
__emojisBaseSrc = baseSrc; |
|
__emojis = emojis; |
|
} |
|
|
|
module.exports = { |
|
html2json: html2json, |
|
emojisInit: emojisInit |
|
}; |