问题描述
TLDR
我想转换下面的代码
<nav id="TableOfContents">
<ul>
<li><a href="#js">JS</a>
<ul>
<li><a href="#h3-1">H3-1</a></li>
<li><a href="#h3-2">H3-2</a></li>
</ul>
</li>
<li><a href="#python">Python</a>
<ul>
<li><a href="#h3-1-1">H3-1</a></li>
<li><a href="#h3-2-1">H3-2</a></li>
</ul>
</li>
</ul>
</nav>
使用 Javascript(或 Hugo)转换为以下 JSON 格式
{"t": "root","d": 0,"v": "","c": [
{"t": "h","d": 1,"v": "<a href=\"#js\">JS</a>","c": [
{"t": "h","d": 2,"v": "<a href=\"#h3-1\">H3-1</a>"},{"t": "h","v": "<a href=\"#h3-2\">H3-2</a>"}
]
},"v": "<a href=\"#python\">Python</a>","v": "<a href=\"#h3-1-1\">H3-1</a>"},"v": "<a href=\"#h3-2-1\">H3-2</a>"}
]
}
]}
上面只是一个例子,你提供的函数应该能够像上面那样将任何类似的结构转换成 JSON 字符串,没有任何问题(实际上,唯一的要求是不要使用硬编码完成)
<!DOCTYPE html>
<html>
<head>
<script>
(()=>{
var input_text = `<nav id="TableOfContents">...`;
var output = my_func(input_text);
console.log(output); /* expected result: {"t": "root","c": [ ... */
}
)()
function my_func(text) {
/* ... */
}
</script>
</head>
</html>
长话
我想做什么?
我想在 markmap 上使用思维导图 (markmap-github,Hugo) 并将其应用到文章 (single.html) 作为目录
Hugo 已经通过 TableOfContents
提供了目录即我的.md
## JS
### H3-1
### H3-2
## Python
### H3-1
### H3-2
然后{{ .TableOfContents }}
会输出
<nav id="TableOfContents">
<ul>
<li><a href="#js">JS</a>
<ul>
<li><a href="#h3-1">H3-1</a></li>
<li><a href="#h3-2">H3-2</a></li>
</ul>
</li>
<li><a href="#python">Python</a>
<ul>
<li><a href="#h3-1-1">H3-1</a></li>
<li><a href="#h3-2-1">H3-2</a></li>
</ul>
</li>
</ul>
</nav>
但是,如果我使用 markmap 那么我必须向它提供一个 JSON 字符串,如下所示,
{"t": "root","v": "<a href=\"#h3-2-1\">H3-2</a>"}
]
}
]}
我做了什么?
我试图只用 Hugo 的 functions 来做,但失败了 (逻辑上可行,但语法上难以实现)
所以我把希望寄托在了javascript上,但一直以来,我的js就是找别人的代码修改一下,而且 这个例子对我来说是一个全新的例子;我无法开始(坦率地说,我是 JS 门外汉)。
以下是我用Python实现的。
from bs4 import BeautifulSoup,Tag
from typing import Union
import os
from pathlib import Path
import json
def main():
data = """<nav id="TableOfContents">
<ul>
<li><a href="#js">JS</a>
<ul>
<li><a href="#h3-1">H3-1</a></li>
<li><a href="#h3-2">H3-2</a></li>
</ul>
</li>
<li><a href="#python">Python</a>
<ul>
<li><a href="#h3-1-1">H3-1</a></li>
<li><a href="#h3-2-1">H3-2</a></li>
</ul>
</li>
</ul>
</nav>"""
soup = BeautifulSoup(data,"lxml")
sub_list = []
cur_level = 0
dict_tree = dict(t='root',d=cur_level,v='',c=sub_list)
root_ul: Tag = soup.find('ul')
toc2json(root_ul,cur_level + 1,sub_list) # <-- core
toc_json: str = json.dumps(dict_tree)
print(toc_json)
output_file = Path('test.temp.html')
create_markmap_html(toc_json,output_file)
os.startfile(output_file)
def toc2json(tag: Tag,cur_level: int,sub_list):
for li in tag:
if not isinstance(li,Tag):
continue
a: Tag = li.find('a')
ul: Union[Tag,None] = li.find('ul')
if ul:
new_sub_list = []
toc2json(ul,new_sub_list)
cur_obj = dict(t='h',v=str(a),c=new_sub_list)
else:
cur_obj = dict(t='h',v=str(a))
sub_list.append(cur_obj)
def create_markmap_html(json_string: str,output_file: Path):
import jinja2
markmap_template = jinja2.Template("""
<!DOCTYPE html>
<html lang=en>
<head>
<script src="https://d3js.org/d3.v6.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/[email protected]"></script>
<style>
.mindmap {
width: 100vw;
height: 100vh;
}
</style>
</head>
<body>
<main>
<svg id="mindmap-test" class="mindmap"></svg>
</main>
<script>
(
(e,json_data)=>{
const{Markmap:r}=e();
window.mm=r.create("svg#mindmap-test",null,json_data);
}
)(
()=>window.markmap,{{ input_json }}
);
</script>
</body>
</html>
""")
html_string = markmap_template.render(dict(input_json=json_string))
with open(output_file,'w',encoding='utf-8') as f:
f.write(html_string)
if __name__ == '__main__':
main()
为了这小小的努力,请帮帮我。
解决方法
如有必要,我很乐意继续改进代码:)
class Parser {
constructor(htmlExpr){
this.result = {t: 'root',d: 0,v: "",c:[]};
this.data = htmlExpr.split('\n').map(row => row.trim()) // to lines ?
this.open = RegExp('^<(?<main>[^\/>]*)>') // some open tag
this.close = RegExp('^<\/(?<main>[^>]*)>') // some close tag
this.target = RegExp('(?<main><a[^>]*>[^<]*<\/a>)') // what we looking for
}
test(str){
let [o,c,v] = [ // all matches
this.open.exec(str),this.close.exec(str),this.target.exec(str)
];
// extract tagNames and value
if (o) o = o.groups.main
if (c) c = c.groups.main
if (v) v = v.groups.main
return [o,v];
}
parse(){
const parents = [];
let level = 0;
let lastNode = this.result;
let length = this.data.length;
for(let i = 0; i < length; i++){
const [o,v] = this.test(this.data[i])
if (o === 'ul') {
level +=1;
lastNode.c=[]
parents.push(lastNode.c)
} else if (c === 'ul') {
level -=1;
parents.pop()
}
if (v) {
lastNode = {t: 'h',d: level,v}; // template
parents[level - 1].push(lastNode) // insert to result
}
}
return this.result;
}
}
let htmlExpr = `<nav id="TableOfContents">
<ul>
<li><a href="#js">JS</a>
<ul>
<li><a href="#h3-1">H3-1</a></li>
<li><a href="#h3-2">H3-2</a></li>
</ul>
</li>
<li><a href="#python">Python</a>
<ul>
<li><a href="#h3-1-1">H3-1</a></li>
<li><a href="#h3-2-1">H3-2</a></li>
</ul>
</li>
</ul>
</nav>`
const parser = new Parser(htmlExpr);
const test = parser.parse();
console.log(JSON.stringify(test,null,2))
我发现我的问题描述可能不够准确,
导致一些人使用特殊的解决方案来解决问题。
严格来说,它们没有问题,但不适用于我的其他一些示例(参见 id=toc-compress
和 id=toc-double_link
),
而且我自己找到了答案,可以满足我的所有需求,
请看以下内容。
class Toc {
constructor(node_nav){
this.data = node_nav;
}
create_mind_map(svg_id,dict_data){
let e = ()=>window.markmap
const {Markmap:r} = e();
window.mm = r.create("svg#"+svg_id,dict_data)
}
_get_element(ul_node,cur_level){
let li_list = Array.prototype.slice.call(ul_node.childNodes).filter(node => node.nodeName === 'LI' )
li_list.forEach(li => {
const inner_a = li.firstElementChild;
const value = (()=>{
// If it contains two links (one is an internal link and the other is an external link,then the internal link is used as the primary link)
const inner_a_copy = inner_a.cloneNode(true); // avoid modifying the original innerText
const outer_a = ((RegExp('<a[^>]*>[^<]*<\/a><a[^>]*>[^<]*<\/a>').exec(li.innerHTML)) != null ?
Array.prototype.slice.call(li.childNodes).filter(node => node.nodeName === 'A' )[1] :
undefined
);
if (outer_a !== undefined) {
inner_a_copy.innerText = outer_a.innerText
}
return inner_a_copy.outerHTML;
})();
let ul = Array.prototype.slice.call(li.childNodes).filter(node => node.nodeName === 'UL' )
if (ul.length > 0){
let sub_list = [];
this._get_element(ul[0],sub_list,cur_level+1)
c.push({t: 'h',d: cur_level,v: value,c:sub_list})
}
else {
c.push({t: 'h',v: value})
}
});
}
convert2dict(){
let root_ul = Array.prototype.slice.call(this.data.childNodes).filter(node => node instanceof HTMLUListElement)[0]
const sub_c = []
const result_dict = {t: 'root',c:sub_c};
const level = 1
this._get_element(root_ul,sub_c,level);
// console.log(result_dict)
// console.log(JSON.stringify(result_dict,2))
return result_dict
}
};
function getNode(n,v) {
/* https://stackoverflow.com/a/37411738/9935654 */
n = document.createElementNS("http://www.w3.org/2000/svg",n);
for (var p in v)
n.setAttributeNS(null,p.replace(/[A-Z]/g,function(m,p,o,s) { return "-" + m.toLowerCase(); }),v[p]);
return n
};
(
()=>{
let nav_html_collection = document.getElementsByTagName('nav');
let idx = 0;
for (let node_nav of nav_html_collection){
const toc = new Toc(node_nav);
const dict_data = toc.convert2dict();
const id_name = 'mindmap' + idx.toString();
let node_svg = getNode("svg",{id: id_name});
node_nav.appendChild(node_svg)
toc.create_mind_map(id_name,dict_data)
idx += 1;
};
}
)();
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<script src="https://d3js.org/d3.v6.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/[email protected]"></script>
<style>
.mindmap {
/*
width: 100vw;
height: 100vh;
*/
}
</style>
</head>
<body>
<nav id="toc-normal" class="mindmap">
<ul>
<li><a href="#js">JS</a>
<ul>
<li><a href="#h3-1">H3-1</a></li>
<li><a href="#h3-2">H3-2</a></li>
</ul>
</li>
<li><a href="#python">Python</a>
<ul>
<li><a href="#h3-1-1">H3-1</a></li>
<li><a href="#h3-2-1">H3-2</a></li>
</ul>
</li>
</ul>
</nav>
<nav id="toc-compress"><ul><li><a href="#js">JS</a><ul><li><a href="#h3-1">H3-1</a></li><li><a href="#h3-2">H3-2</a></li></ul></li></ul></nav>
<nav id="toc-double_link">
<ul>
<li><a href="#js"><a href="https://www.w3schools.com/js/DEFAULT.asp">JS</a></a>
<ul>
<li><a href="#h3-1">H3-1</a></li>
<li><a href="#h3-2">H3-2</a></li>
</ul>
</li>
</ul>
</nav>
</body>
</html>