将路径数组转换为数据结构消毒和重新构建/映射排序路径部分的分割和聚类组装最终索引

问题描述

我有一系列这样的路径：

/doc/data/main.js
/doc/data/xl.js
/doc/data/dandu/sdasa.js
/mnt/data/la.js

我正在尝试构建以下结构：

{
  "directories": {
    "/doc/data": {
      "directories": {
        "dandu": {
          "files": {
            "sdasa.js": 1
          }
        }
      },"files": {
        "main.js": 1,"xl.js": 1
      }
    },"/mnt/data": {
      "directories": {},"files": {
        "la.js": 1
      }
    }
  },"files": {}
}

请忽略该示例中文件的值。将来，我将为此分配更复杂的数据。当前值为1。

从先前的topic中我发现我可以使用以下函数来获得相似的内容：

var parsePathArray = function() {
    var parsed = {};
    for(var i = 0; i < paths.length; i++) {
        var position = parsed;
        var split = paths[i].split('/');
        for(var j = 0; j < split.length; j++) {
            if(split[j] !== "") {
                if(typeof position[split[j]] === 'undefined')
                    position[split[j]] = {};
                position = position[split[j]];
            }
        }
    }
    return parsed;
}

该解决方案的主要问题是它会拆分每个目录。但是我不想拆分每个目录，而是获取包含至少一个文件的目录。例如，在我的示例中，/doc没有文件（只有目录-/data），因此我们继续进行操作。我尝试了一下更改功能，但是没有用：

var str = '';
for (var j = 0; j < split.length; j++) {
    if (j < split.length - 1 && typeof this.files[str] === 'undefined') {
        str += '/' + split[j];
        continue;
    }
    if (str !== '') {
        if (typeof this.files[str] === 'undefined')
            this.files[str] = {};
        this.files = this.files[str];
    }
}

将这些字符串转换为该数据结构的最佳方法是什么？

解决方法

这是我想出的解决方案。它的工作方式是每次构建一条路径，并将其与现有数据结构进行比较。它也应该自己处理文件，因为您的原始帖子似乎暗示这是必要的。最后，我决定将其分为两个功能，因为这可能使解释更容易。

代码：

const paths = [
    '/doc/data/main.js','doc/data/xl.js','/etc/further/owy.js','/etc/further/abc.js','etc/mma.js','/mnt/data/it.js','/mnt/data/path/is/long/la.js','mnt/data/path/is/la.js','/doc/data/dandu/sdasa.js','/etc/i/j/k/l/thing.js','/etc/i/j/areallylongname.js','thing.js'
];

function buildStructure(paths) {
    let structure = {
        directories: {},files: {}
    };

    const compare = (a,b) => {
        return a.split('/').length - b.split('/').length;
    };

    [...paths]
    .map(path => path = path.charAt(0) === '/' ? path : `/${path}`)
    .sort((a,b) => compare(a,b)).forEach(path => {
        const nodes = path.split('/').slice(1);
        const file = nodes.pop();
        
        let pointer = findDirectory(nodes[0] ? structure.directories : structure,'',[...nodes]);

        pointer.files = pointer.files || {};
        pointer.files = {
            ...pointer.files,[file]: 1
        };
    });

    return structure;
};

function findDirectory(pointer,subPath,nodes) {
    if (nodes.length === 0) {
        if (subPath) {
            pointer[subPath] = {};
            pointer = pointer[subPath];
        };
        return pointer;
    };

    let newPath = `${subPath}/${nodes[0]}`;
    nodes.shift();

    if (pointer[newPath]) {
        pointer = pointer[newPath];

        if (nodes.length >= 1) {
            pointer.directories = pointer.directories || {};
            pointer = pointer.directories;
        };

        newPath = '';
    };

    return findDirectory(pointer,newPath,nodes);
};

const structure = buildStructure(paths);
console.log(structure);

.as-console-wrapper { min-height: 100%!important; top: 0; }

说明：

这比我开始研究时想象的要复杂得多（并且有趣得多）。一旦开始连接目录，操作顺序就很重要。

从buildStructure开始，我们在路径数组上进行映射，以捕获没有前导斜线的所有条目。然后，根据它们引用的目录数对它们进行排序。这样可以确保我们从结构的顶部向底部进行工作。

将每个路径分成节点数组，然后弹出文件字符串。你剩下这样的东西了：

const nodes = ['doc','data'];
const file = 'main.js';

现在，我们必须通过findDirectory来提供这些节点，以查找/创建文件的位置。变量pointer用于跟踪我们在structure对象中的位置，由于对指针的任何更改都共享引用相等性，因此我们对指针所做的任何更改都将在结构中复制。

findDirectory函数以递归方式处理每个节点，以逐渐建立完整的路径。每当我们创建structure目录中已经存在的路径时，我们就会在其中移动并重新开始构建该路径，以尝试找到下一个路径。如果找不到它，那么我们就有一个全新的目录。目的是当我们退出该功能时始终将其保存在正确的目录中-如果需要的话，可以一直创建它。

为简化起见，假设我们只有两条记录路径：

const paths = [
  'doc/data/main.js','doc/data/dandu/sdasa.js'
];

对于第一个路径，findDirectory将进行三遍。这些是每次通过时都会赋予它的参数：

pointer = structure.directories > same > same

subPath = '' > '/doc' > '/doc/data'

nodes = ['doc','data'] > ['data'] > []

我们从未找到匹配项，因此函数退出时，它将在structure.directories上创建该目录。现在，第二条路径将经过四遍：

pointer = 
  structure.directories > 
  same > 
  structure.directories./doc/data.directories > 
  same

subPath = '' > '/doc' > '' > '/dandu' 

nodes = ['doc','data','dandu'] > ['data','dandu'] > ['dandu'] > []

如您所见，在第二遍中，我们创建了字符串/doc/data，它确实存在于structure.directories上。因此，我们进入其中，由于要处理的节点更多，因此我们在其中创建了一个新的目录对象，然后也输入了该对象。如果没有更多的要处理的节点，我们将知道已经达到了正确的级别，这将不是必需的。从这里开始，只是简单地再次构建路径并重复该过程。

一旦我们在正确的目录中，我们可以将文件直接放在指针上，并将其注册在结构上。一旦我们移至下一条路径，指针将再次指向structure.directories。

如果没有要处理的节点（仅文件名），则将整个结构对象传递给findDirectory，文件将进入对象的顶层。

希望这可以很好地说明问题，对您有用。我很乐意为此工作，并希望就如何改进它提出任何建议。

这个挑战确实不是那么容易。尽管如此，该方法还是可以工作的，易于理解和理解，因此可以维护子任务，从而达到OP的目标...

const pathList = [
  '/doc/data/main.js','/doc/data/fame.js','/doc/data/fame.es','/doc/data/xl.js','/mnt/data/la.js','/mnt/la.es','foo/bar/baz/biz/foo.js','foo/bar/baz/biz/bar.js','/foo/bar.js','/foo/bar/baz/foo.js','foo/bar/baz/bar.js','foo/bar/baz/biz.js','/foobar.js','bazbiz.js','/etc/i/j/areallylongname.js'
];


function createSeparatedPathAndFileData(path) {
  const regXReplace = (/^\/+/);     // for replacing leading slash sequences in `path`.
  const regXSplit = (/\/([^/]*)$/); // for retrieving separated path- and file-name data.
  
  const filePartials = path.replace(regXReplace,'').split(regXSplit);
  if (filePartials.length === 1) {

    // assure at least an empty `pathName`.
    filePartials.unshift('');
  }
  const [pathName,fileName] = filePartials;

  return {
    pathName,fileName
  };
}

function compareByPathAndFileNameAndExtension(a,b) {
  const regXSplit = (/\.([^.]*)$/); // split for filename and captured file extension. 

  const [aName,aExtension] = a.fileName.split(regXSplit);
  const [bName,bExtension] = b.fileName.split(regXSplit);

  return (
       a.pathName.localeCompare(b.pathName)
    || aName.localeCompare(bName)
    || aExtension.localeCompare(bExtension)
  )
}


function getRightPathPartial(root,pathName) {
  let rightPartial = null; // null || string.

  const partials = pathName.split(`${ root }\/`);
  if ((partials.length === 2) && (partials[0] === '')) {

    rightPartial = partials[1];
  }
  return rightPartial; // null || string.
}

function getPathPartials(previousPartials,pathName) {
  let pathPartials = Array.from(previousPartials);
  let rightPartial;

  while (!rightPartial && pathPartials.pop() && (pathPartials.length >= 1)) {

    rightPartial = getRightPathPartial(pathPartials.join('\/'),pathName);
  }
  if (pathPartials.length === 0) {

    pathPartials.push(pathName);

  } else if (rightPartial) {

    pathPartials = pathPartials.concat(rightPartial);
  }
  return pathPartials;
}

function createPathPartialDataFromCurrentAndPreviousItem(fileData,idx,list) {
  const previousItem = list[idx - 1];
  if (previousItem) {

    const previousPathName = previousItem.pathName;
    const currentPathName = fileData.pathName;

    if (previousPathName === currentPathName) {

      // duplicate/copy path partials.
      fileData.pathPartials = [].concat(previousItem.pathPartials);

    } else {
      // a) try an instant match first ...

      const rightPartial = getRightPathPartial(previousPathName,currentPathName);
      if (rightPartial || (previousPathName === currentPathName)) {

        // concat path partials.
        fileData.pathPartials = previousItem.pathPartials.concat(rightPartial);

      } else {
        // ... before b) programmatically work back the root-path
        //               and look each time for another partial match.

        fileData.pathPartials = getPathPartials(
          previousItem.pathPartials,fileData.pathName
        );
      }
    }
  } else {
    // initialize partials by adding path name.
    fileData.pathPartials = [fileData.pathName];
  }
  return fileData;
}


function isUnassignedIndex(index) {
  return (Object.keys(index).length === 0);
}
function assignInitialIndexProperties(index) {
  return Object.assign(index,{
    directories: {},files: {}
  });
}

function assignFileDataToIndex(index,fileData) {
  if (isUnassignedIndex(index)) {
    assignInitialIndexProperties(index);
  }
  const { pathPartials,fileName } = fileData;

  let path,directories;
  let subIndex = index;

  while (path = pathPartials.shift()) {
    directories = subIndex.directories;

    if (path in directories) {

      subIndex = directories[path];
    } else {
      subIndex = directories[path] = assignInitialIndexProperties({});
    }
  }
  subIndex.files[fileName] = 1;

  return index;
}


console.log(
  'input :: path list ...',pathList
  //.map(createSeparatedPathAndFileData)
  //.sort(compareByPathAndFileNameAndExtension)
  //.map(createPathPartialDataFromCurrentAndPreviousItem)
  //.reduce(assignFileDataToIndex,{})
);
console.log(
  '1st :: create separated path and file data from the original list ...',pathList
    .map(createSeparatedPathAndFileData)
  //.sort(compareByPathAndFileNameAndExtension)
  //.map(createPathPartialDataFromCurrentAndPreviousItem)
  //.reduce(assignFileDataToIndex,{})
);
console.log(
  '2nd :: sort previous data by comparing path- and file-names and its extensions ...',pathList
    .map(createSeparatedPathAndFileData)
    .sort(compareByPathAndFileNameAndExtension)
  //.map(createPathPartialDataFromCurrentAndPreviousItem)
  //.reduce(assignFileDataToIndex,{})
);
console.log(
  '3rd :: create partial path data from current/previous items of the sorted list ...',pathList
    .map(createSeparatedPathAndFileData)
    .sort(compareByPathAndFileNameAndExtension)
    .map(createPathPartialDataFromCurrentAndPreviousItem)
  //.reduce(assignFileDataToIndex,{})
);
console.log(
  '4th :: output :: assemble final index from before created list of partial path data ...',pathList
    .map(createSeparatedPathAndFileData)
    .sort(compareByPathAndFileNameAndExtension)
    .map(createPathPartialDataFromCurrentAndPreviousItem)
    .reduce(assignFileDataToIndex,{})
);

.as-console-wrapper { min-height: 100%!important; top: 0; }

...从上面的日志中可以看到，这些任务是...

消毒和（重新）构建/映射

通过删除可能的前导斜杠序列来对每个路径进行清理/标准化。
构建一个文件数据项列表，其中每个数据项都包含相应路径项的pathName和fileName（以后者的已清理/规范化形式显示）。

例如'/doc/data/dandu/sdasa.js'被映射到...

{
  "pathName": "doc/data/dandu","fileName": "sdasa.js"
}

排序

通过比较以下两个当前映射文件数据项的属性来完成排序...

比较pathName
按fileName比较，不带扩展名
按文件扩展名比较

因此原始文件列表看起来像这样...

[
  '/doc/data/main.js','/foobar.js'
]

...（经过消毒/归一化的映射并）分类为类似的内容...

[{
  "pathName": "","fileName": "foobar.js"
},{
  "pathName": "doc/data","fileName": "fame.es"
},"fileName": "fame.js"
},"fileName": "main.js"
},{
  "pathName": "doc/data/dandu","fileName": "sdasa.js"
},{
  "pathName": "foo","fileName": "bar.js"
},{
  "pathName": "foo/bar/baz","fileName": "biz.js"
},{
  "pathName": "foo/bar/baz/biz","fileName": "bar.js"
}]

排序是基本的，因为紧随其后的算法依赖于排序整齐的pathName。

路径部分的分割和聚类

为了使此任务保持“愚蠢” ，它是通过映射过程完成的，该过程不仅使用当前处理的项目，还使用该项目的先前同级（或前任）。

通过将当前pathPartials与前一个分开，将建立另一个pathName列表。

例如'foo/bar/baz'将与前一个'foo'分开（通过正则表达式）。因此，'bar/baz'已经是一个聚集的部分路径，该路径将通过将该部分与之前的同级文件的pathPartials列表连接起来而用于创建当前文件数据项的pathPartials列表。时间是['foo']。因此前者的结果将是['foo','bar/baz']。

'foo/bar/baz/biz'也会发生同样的情况，先前的路径名是'foo/bar/baz'，而先前的部分列表是['foo','bar/baz']。拆分结果将为'biz'，新的部分列表将为['foo','bar/baz','biz']。

从上方排序的文件数据列表然后映射到该新列表中...

[{
  "pathName": "","fileName": "foobar.js","pathPartials": [
    ""
  ]
},"fileName": "fame.es","pathPartials": [
    "doc/data"
  ]
},"fileName": "fame.js","fileName": "main.js","fileName": "sdasa.js","pathPartials": [
    "doc/data","dandu"
  ]
},"fileName": "bar.js","pathPartials": [
    "foo"
  ]
},"fileName": "biz.js","pathPartials": [
    "foo","bar/baz"
  ]
},"bar/baz","biz"
  ]
}]

组装最终索引

最后一步是一个简单的列表精简任务，因为在这一点上，已经完成了正确分割和聚类每个路径部分的最困难的部分。

您可以使用某种递归函数来完成它。请记住，这只是一种可能的解决方案，可能不是最佳解决方案。

const workPath = (path,structure) => {
    if(!structure) structure = {};

    const folders = path.split("/");
    const file = folders.pop();

    // Check weather any of the possible paths are available
    let breakPoint = null;
    let tempPath;
    for(let i = 0; i< folders.length; i++){
        const copy = [... folders];
        tempPath = copy.splice(0,i+1).join("/");

        if(structure[tempPath]){
            breakPoint = i;
            break;
        }        
    }

    // If there was no path available,we create it in the structure
    if(breakPoint == null){
        const foldersPath = folders.join("/");
        structure[foldersPath]= {};
        structure[foldersPath]["files"] = {};
        structure[foldersPath]["files"][file] = 1;
    }

    // If there is a path inside of the structure,that also is the entire path we are working with,// We just add the file to the path
    else if(breakPoint && breakPoint == folders.length - 1){
        structure[folders.join("/")]["files"][file] = 1;
    }
    
    // If we get here,it means that some part of the path is available but not the entire path
    // So,we just call the workPath function recursively with only one portion of the path
    else{
        const subPath = folders.splice(breakPoint + 1).join("/") + "/" + file;
        
        structure[tempPath]["directories"] = workPath(subPath,structure[tempPath]["directories"]);  
    }

    return structure;
}

const convert = array => {
    let structure = {};
    for(let path of array){
        structure = workPath(path,structure);
    }

    return structure;
}

“转换”功能需要包含所有路径的数组。

请记住，此解决方案不会考虑其中没有文件的条目。

algorithm arrays javascript

将路径数组转换为数据结构 消毒和重新构建/映射排序路径部分的分割和聚类组装最终索引