javascript, Hugo:将无序列表转换为 JSON 我想做什么?我做了什么?

问题描述

TLDR

我想转换下面的代码

<nav id="TableOfContents">
  <ul>
    <li><a href="#js">JS</a>
      <ul>
        <li><a href="#h3-1">H3-1</a></li>
        <li><a href="#h3-2">H3-2</a></li>
      </ul>
    </li>
    <li><a href="#python">Python</a>
      <ul>
        <li><a href="#h3-1-1">H3-1</a></li>
        <li><a href="#h3-2-1">H3-2</a></li>
      </ul>
    </li>
  </ul>
</nav>

使用 Javascript(或 Hugo)转换为以下 JSON 格式

{"t": "root","d": 0,"v": "","c": [
    {"t": "h","d": 1,"v": "<a href=\"#js\">JS</a>","c": [
            {"t": "h","d": 2,"v": "<a href=\"#h3-1\">H3-1</a>"},{"t": "h","v": "<a href=\"#h3-2\">H3-2</a>"}
        ]
    },"v": "<a href=\"#python\">Python</a>","v": "<a href=\"#h3-1-1\">H3-1</a>"},"v": "<a href=\"#h3-2-1\">H3-2</a>"}
        ]
     }
]}

上面只是一个例子,你提供的函数应该能够像上面那样将任何类似的结构转换成 JSON 字符串,没有任何问题(实际上,唯一的要求是不要使用硬编码完成)

你可以像下面这样输出你的代码

<!DOCTYPE html>
<html>
<head>
  <script>
    (()=>{
      var input_text = `<nav id="TableOfContents">...`;
      var output = my_func(input_text);
      console.log(output); /* expected result: {"t": "root","c": [ ... */
    }
    )()

    function my_func(text) {
      /* ... */
    }
  </script>
</head>
</html>

长话

我想做什么?

我想在 markmap 上使用思维导图 (markmap-github,Hugo) 并将其应用到文章 (single.html) 作为目录

Hugo 已经通过 TableOfContents

提供了目录

即我的.md

## JS

### H3-1

### H3-2

## Python

### H3-1

### H3-2

然后{{ .TableOfContents }}输出

<nav id="TableOfContents">
  <ul>
    <li><a href="#js">JS</a>
      <ul>
        <li><a href="#h3-1">H3-1</a></li>
        <li><a href="#h3-2">H3-2</a></li>
      </ul>
    </li>
    <li><a href="#python">Python</a>
      <ul>
        <li><a href="#h3-1-1">H3-1</a></li>
        <li><a href="#h3-2-1">H3-2</a></li>
      </ul>
    </li>
  </ul>
</nav>

但是,如果我使用 markmap 那么我必须向它提供一个 JSON 字符串,如下所示,

{"t": "root","v": "<a href=\"#h3-2-1\">H3-2</a>"}
        ]
     }
]}

我做了什么?

我试图只用 Hugo 的 functions 来做,但失败了 (逻辑上可行,但语法上难以实现)

所以我把希望寄托在了javascript上,但一直以来,我的js就是找别人的代码修改一下,而且 这个例子对我来说是一个全新的例子;我无法开始(坦率地说,我是 JS 门外汉)。

以下是我用Python实现的。

from bs4 import BeautifulSoup,Tag
from typing import Union
import os
from pathlib import Path
import json


def main():
    data = """<nav id="TableOfContents">
      <ul>
        <li><a href="#js">JS</a>
          <ul>
            <li><a href="#h3-1">H3-1</a></li>
            <li><a href="#h3-2">H3-2</a></li>
          </ul>
        </li>
        <li><a href="#python">Python</a>
          <ul>
            <li><a href="#h3-1-1">H3-1</a></li>
            <li><a href="#h3-2-1">H3-2</a></li>
          </ul>
        </li>
      </ul>
    </nav>"""
    soup = BeautifulSoup(data,"lxml")

    sub_list = []
    cur_level = 0
    dict_tree = dict(t='root',d=cur_level,v='',c=sub_list)
    root_ul: Tag = soup.find('ul')
    
    toc2json(root_ul,cur_level + 1,sub_list)  # <-- core
    
    toc_json: str = json.dumps(dict_tree)
    print(toc_json)
    output_file = Path('test.temp.html')
    create_markmap_html(toc_json,output_file)
    os.startfile(output_file)


def toc2json(tag: Tag,cur_level: int,sub_list):
    for li in tag:
        if not isinstance(li,Tag):
            continue
        a: Tag = li.find('a')
        ul: Union[Tag,None] = li.find('ul')
        if ul:
            new_sub_list = []
            toc2json(ul,new_sub_list)
            cur_obj = dict(t='h',v=str(a),c=new_sub_list)
        else:
            cur_obj = dict(t='h',v=str(a))
        sub_list.append(cur_obj)


def create_markmap_html(json_string: str,output_file: Path):
    import jinja2
    markmap_template = jinja2.Template("""
    <!DOCTYPE html>
    <html lang=en>
    <head>
      <script src="https://d3js.org/d3.v6.min.js"></script>
      <script src="https://cdn.jsdelivr.net/npm/[email protected]"></script>
      <style>      
      .mindmap {
        width: 100vw;
        height: 100vh;
      }
      </style>
    </head>
    <body>
      <main>
        <svg id="mindmap-test" class="mindmap"></svg>
      </main>
      
      <script>
        (
          (e,json_data)=>{
            const{Markmap:r}=e();
            window.mm=r.create("svg#mindmap-test",null,json_data);
          }
        )(
          ()=>window.markmap,{{ input_json }}
         );
      </script>
    </body>
    </html>
    """)

    html_string = markmap_template.render(dict(input_json=json_string))

    with open(output_file,'w',encoding='utf-8') as f:
        f.write(html_string)


if __name__ == '__main__':
    main()

为了这小小的努力,请帮帮我。

解决方法

如有必要,我很乐意继续改进代码:)

class Parser {
  constructor(htmlExpr){
    this.result = {t: 'root',d: 0,v: "",c:[]};
    this.data = htmlExpr.split('\n').map(row => row.trim()) // to lines ?
    this.open = RegExp('^<(?<main>[^\/>]*)>')           // some open tag
    this.close = RegExp('^<\/(?<main>[^>]*)>')          // some close tag
    this.target = RegExp('(?<main><a[^>]*>[^<]*<\/a>)') // what we looking for
  }
  test(str){
    let [o,c,v] = [ // all matches
      this.open.exec(str),this.close.exec(str),this.target.exec(str)
    ];
    // extract tagNames and value
    if (o) o = o.groups.main
    if (c) c = c.groups.main
    if (v) v = v.groups.main
    return [o,v];
  }
  parse(){
    const parents = [];
    let level = 0;
    let lastNode = this.result;
    let length = this.data.length;
    for(let i = 0; i < length; i++){
      const [o,v] = this.test(this.data[i])
      if (o === 'ul') {
        level +=1;
        lastNode.c=[]
        parents.push(lastNode.c)
      } else if (c === 'ul') {
        level -=1;
        parents.pop()
      }
      if (v) {
        lastNode = {t: 'h',d: level,v}; // template
        parents[level - 1].push(lastNode) // insert to result
      }
    }
    return this.result;
  }
}

let htmlExpr = `<nav id="TableOfContents">
      <ul>
        <li><a href="#js">JS</a>
          <ul>
            <li><a href="#h3-1">H3-1</a></li>
            <li><a href="#h3-2">H3-2</a></li>
          </ul>
        </li>
        <li><a href="#python">Python</a>
          <ul>
            <li><a href="#h3-1-1">H3-1</a></li>
            <li><a href="#h3-2-1">H3-2</a></li>
          </ul>
        </li>
      </ul>
    </nav>`
const parser = new Parser(htmlExpr);
const test = parser.parse();
console.log(JSON.stringify(test,null,2))

,

我发现我的问题描述可能不够准确,

导致一些人使用特殊的解决方案来解决问题。

严格来说,它们没有问题,但不适用于我的其他一些示例(参见 id=toc-compressid=toc-double_link),

而且我自己找到了答案,可以满足我的所有需求,

请看以下内容。

class Toc {
  constructor(node_nav){
    this.data = node_nav;
  }
  create_mind_map(svg_id,dict_data){
    let e = ()=>window.markmap
    const {Markmap:r} = e();
    window.mm = r.create("svg#"+svg_id,dict_data)
  }

  _get_element(ul_node,cur_level){

    let li_list = Array.prototype.slice.call(ul_node.childNodes).filter(node => node.nodeName === 'LI' )
    li_list.forEach(li => {
      const inner_a = li.firstElementChild;
      const value = (()=>{
        // If it contains two links (one is an internal link and the other is an external link,then the internal link is used as the primary link)
        const inner_a_copy = inner_a.cloneNode(true);  // avoid modifying the original innerText
        const outer_a = ((RegExp('<a[^>]*>[^<]*<\/a><a[^>]*>[^<]*<\/a>').exec(li.innerHTML)) != null ?
         Array.prototype.slice.call(li.childNodes).filter(node => node.nodeName === 'A' )[1] :
         undefined
         );
        if (outer_a !== undefined) {
          inner_a_copy.innerText = outer_a.innerText
        }
        return inner_a_copy.outerHTML;
      })();

      let ul = Array.prototype.slice.call(li.childNodes).filter(node => node.nodeName === 'UL' )

      if (ul.length > 0){
        let sub_list = [];
        this._get_element(ul[0],sub_list,cur_level+1)
        c.push({t: 'h',d: cur_level,v: value,c:sub_list})
      }
      else {
        c.push({t: 'h',v: value})
      }
    });
  }

  convert2dict(){
    let root_ul = Array.prototype.slice.call(this.data.childNodes).filter(node => node instanceof HTMLUListElement)[0]
    const sub_c = []
    const result_dict = {t: 'root',c:sub_c};
    const level = 1
    this._get_element(root_ul,sub_c,level);
    // console.log(result_dict)
    // console.log(JSON.stringify(result_dict,2))
    return result_dict
  }
};

function getNode(n,v) {
  /* https://stackoverflow.com/a/37411738/9935654 */
  n = document.createElementNS("http://www.w3.org/2000/svg",n);
  for (var p in v)
    n.setAttributeNS(null,p.replace(/[A-Z]/g,function(m,p,o,s) { return "-" + m.toLowerCase(); }),v[p]);
  return n
};

(
  ()=>{
    let nav_html_collection = document.getElementsByTagName('nav');
    let idx = 0;
    for (let node_nav of nav_html_collection){
      const toc = new Toc(node_nav);
      const dict_data = toc.convert2dict();
      const id_name = 'mindmap' + idx.toString();
      let node_svg = getNode("svg",{id: id_name});
      node_nav.appendChild(node_svg)
      toc.create_mind_map(id_name,dict_data)
      idx += 1;
    };
  }
)();
<!DOCTYPE html>
<html>
<head>
  <meta charset="UTF-8">
  <script src="https://d3js.org/d3.v6.min.js"></script>
  <script src="https://cdn.jsdelivr.net/npm/[email protected]"></script>
  <style>
    .mindmap {
      /*
      width: 100vw;
      height: 100vh;
      */
    }
  </style>
</head>
<body>
  <nav id="toc-normal" class="mindmap">
    <ul>
      <li><a href="#js">JS</a>
        <ul>
          <li><a href="#h3-1">H3-1</a></li>
          <li><a href="#h3-2">H3-2</a></li>
        </ul>
      </li>
      <li><a href="#python">Python</a>
        <ul>
          <li><a href="#h3-1-1">H3-1</a></li>
          <li><a href="#h3-2-1">H3-2</a></li>
        </ul>
      </li>
    </ul>
  </nav>

  <nav id="toc-compress"><ul><li><a href="#js">JS</a><ul><li><a href="#h3-1">H3-1</a></li><li><a href="#h3-2">H3-2</a></li></ul></li></ul></nav>

  <nav id="toc-double_link">
    <ul>
      <li><a href="#js"><a href="https://www.w3schools.com/js/DEFAULT.asp">JS</a></a>
        <ul>
          <li><a href="#h3-1">H3-1</a></li>
          <li><a href="#h3-2">H3-2</a></li>
        </ul>
      </li>
    </ul>
  </nav>
</body>
</html>