挂载系统调用不适用于 MS_PRIVATE 标志

问题描述

我们正在尝试学习如何在 Linux 上构建类似容器的应用程序(我使用的是 Ubuntu 20.04)。

我们使用以下命令获取一个 Ubuntu 文件系统:

sudo debootstrap --variant=minbase bionic /home/some/path/ubuntu_fs http://ftp.heanet.ie/pub/ubuntu/

这是我们目前正在做的事情:

#include <bits/stdc++.h>
#include <unistd.h>
#include <sched.h>
#include <sys/wait.h>
#include <sys/mount.h>

using namespace std;


#define STACK_SIZE (1024 * 1024)
#define CHROOT_PATH "/home/some/path/ubuntu_fs"

struct ChildArgs{
    char ** argv;
};



int child(void* args)
{
    sethostname("conman",6);   
    chroot(CHROOT_PATH);
    chdir("/");

    ChildArgs* child_args = static_cast<ChildArgs*>(args);

    // Removing the `if` block immediately below makes the program run fine
    if(mount("","/","",MS_PRIVATE | MS_REC,NULL))
    {
        perror("Mount Failed\n");
        return errno;
    }

    if(mount("proc","/proc","proc",NULL))
    {
        perror("Mount Failed\n");
        return errno;
    }

    pid_t fork_pid = fork();
    if(fork_pid < 0)
        return EXIT_FAILURE;

    else if(fork_pid == 0)
    {

        execvp(child_args->argv[0],child_args->argv);
    }

    else
    {
        waitpid(fork_pid,NULL,0);
        umount("/proc");
        umount("/");
    }
    return 0;
}

int main(int argc,char *argv[])
{
    static char child_stack[STACK_SIZE];

    int flags = CLONE_NEWUTS | CLONE_NEWNS | CLONE_NEWPID | SIGCHLD;

    ChildArgs child_args {&argv[1]};
    pid_t childpid = clone(child,child_stack + STACK_SIZE,flags,&child_args);

    if(waitpid(childpid,0) == -1)
    {   
        perror("Error");
        exit(EXIT_FAILURE);
    }

    return 0;
}

我们要将 chroot 目录挂载为私有目录的原因是我们不希望子挂载命名空间中的挂载传播到任何其他命名空间。如果我们不这样做,新命名空间中的任何挂载都会从父挂载继承 MS_SHARED 挂载类型,因此在父命名空间中可见。 This SO answer 也建议这样做。

然而,执行我们的代码会返回以下错误

Mount Failed: Invalid argument

MS_PRIVATE | MS_REC 更改为 0 会返回以下结果:

Mount Failed: No such device

/proc 目录中挂载 /syschroot 工作正常,但在父命名空间中可见 (如预期).

关于如何将坐骑设为私有的任何建议都会非常有帮助。

解决方法

我猜您为其调用 mount() 的目录 CHROOT_PATH 不是挂载点。因此出现错误。

我用 C 语言重新编写了您的程序,以实现几乎相同的效果。但是 mount(MS_PRIVATE | MS_REC) 操作是在 chroot() 之前而不是之后的新命名空间中的“/”挂载点上完成的。该程序通过一些绑定挂载即时构建精简的文件系统,以访问主机可执行文件/库。

#define _GNU_SOURCE
#include <unistd.h>
#include <sched.h>
#include <sys/wait.h>
#include <sys/mount.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <sched.h>
#include <sys/stat.h>

#define STACK_SIZE (1024 * 1024)
#define CHROOT_PATH "/tmp/try"

#define ERR(msg,...) fprintf(stderr,"Line#%d - " msg,__LINE__,##__VA_ARGS__)

struct bind_mount_points_t {
  const char *source;
  const char *target;
  int mount;
} bind_mount_points[] = {

  { "/bin",CHROOT_PATH "/bin",1 },{ "/lib",CHROOT_PATH "/lib",{ "/lib64",CHROOT_PATH "/lib64",{ "/usr",CHROOT_PATH "/usr",{ "/etc",CHROOT_PATH "/etc",{ "/tmp",CHROOT_PATH "/tmp",0 },{ 0,0 }

};


int child(void *args)
{
  char **av;
  int rc;
  int i;

  rc = sethostname("conman",6);
  if (rc != 0) {
    ERR("sethostname(): '%m' (%d)\n",errno);
    return errno;
  }

  av = (char **)args;

  if(mount("none","/",MS_PRIVATE | MS_REC,NULL)) {
    ERR("mount(): '%m' (%d)\n",errno);
    return errno;
  }

  // Make the file system with some bind mounts to get access to executables
  i = 0;
  while(bind_mount_points[i].source) {

    rc = mkdir(bind_mount_points[i].target,0777);
    if (rc != 0) {
      ERR("mkdir(%s): '%m' (%d)\n",bind_mount_points[i].target,errno);
      return errno;
    }

    if (bind_mount_points[i].mount) {
      if(mount(bind_mount_points[i].source,MS_BIND|MS_REC,NULL)) {
        ERR("mount(%s): '%m' (%d)\n",errno);
        return errno;
      }
    }

    i ++;
  }

  // Change root
  rc = chroot(CHROOT_PATH);
  if (rc != 0) {
    ERR("chroot(): '%m' (%d)\n",errno);
    return errno;
  }

  rc = chdir("/");
  if (rc != 0) {
    ERR("chroot(/): '%m' (%d)\n",errno);
    return errno;
  }

  rc = mkdir("/proc",0777);
  if (rc != 0) {
    ERR("mkdir(/proc): '%m' (%d)\n",errno);
    return errno;
  }

  if(mount("proc","/proc","proc",NULL)) {
    ERR("mount(proc): '%m' (%d)\n",errno);
    return errno;
  }

  pid_t fork_pid = fork();
  if(fork_pid < 0) {
    ERR("fork(): '%m' (%d)",errno);
    return EXIT_FAILURE;
  } else if(fork_pid == 0) {
    execv(av[0],av);
    _exit(2);
  } else {
    waitpid(fork_pid,NULL,0);

    // Cleanup
    umount("/proc");
    rmdir("/proc");

    i = 0;
    while(bind_mount_points[i].source) {

      if (bind_mount_points[i].mount) {
        if(umount(bind_mount_points[i].source)) {
          ERR("umount(%s): '%m' (%d)\n",bind_mount_points[i].source,errno);
          return errno;
        }
      }

      rc = rmdir(bind_mount_points[i].source);
      if (rc != 0) {
        ERR("rmdir(%s): '%m' (%d)\n",errno);
        return errno;
      }

      i ++;
    }

  }
  return 0;
}

int main(int argc,char *argv[])
{
  static char child_stack[STACK_SIZE];

  int flags = CLONE_NEWUTS | CLONE_NEWNS | CLONE_NEWPID | SIGCHLD;

  pid_t childpid = clone(child,child_stack + STACK_SIZE,flags,&(argv[1]));

  if (childpid < 0) {
    ERR("clone(): '%m' (%d)",errno);
    exit(EXIT_FAILURE);
  }

  if(waitpid(childpid,0) == -1) {   
    ERR("Error");
    exit(EXIT_FAILURE);
  }

  return 0;
}

程序的构建:

$ gcc ctr.c -o ctr

制作出现 chroot() 的目录:

$ mkdir /tmp/try

用shell运行程序:

$ sudo ./ctr /bin/bash
root@conman:/# hostname
conman
root@conman:/# pwd
/
root@conman:/# ps -ef
UID          PID    PPID  C STIME TTY          TIME CMD
root           1       0  0 07:20 ?        00:00:00 ./ctr /bin/bash
root           2       1  0 07:20 ?        00:00:00 /bin/bash
root           4       2  0 07:20 ?        00:00:00 ps -ef
root@conman:/# ls -l /
total 108
drwxr-xr-x   2 root root 69632 mars   25 06:09 bin
drwxr-xr-x 152 root root 12288 mars   24 08:07 etc
drwxr-xr-x 152 root root 12288 mars   25 06:09 lib
drwxr-xr-x   2 root root  4096 févr.  4 10:02 lib64
dr-xr-xr-x 393 root root     0 mars   25 07:20 proc
drwxr-xr-x   2 root root  4096 mars   25 07:20 tmp
drwxr-xr-x  16 root root  4096 nov.   22 20:02 usr
root@conman:/# exit
exit

我修改了之前的程序以使用通过debootstrap安装的Ubuntu文件系统:

#define _GNU_SOURCE
#include <unistd.h>
#include <sched.h>
#include <sys/wait.h>
#include <sys/mount.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <sched.h>
#include <sys/stat.h>

#define STACK_SIZE (1024 * 1024)
#define CHROOT_PATH "/tmp/try"

#define ERR(msg,##__VA_ARGS__)

int child(void *args)
{
  char **av;
  int rc;

  rc = sethostname("conman",NULL)) {
      ERR("mount(): '%m' (%d)\n",errno);
      return errno;
  }

  // Change root
  rc = chroot(CHROOT_PATH);
  if (rc != 0) {
    ERR("chroot(): '%m' (%d)\n",0);

    // Cleanup
    umount("/proc");
  }
  return 0;
}

int main(int argc,0) == -1) {   
    ERR("Error");
    exit(EXIT_FAILURE);
  }

  return 0;
}

文件系统的安装(在/tmp/try中),程序的构建和执行:

$ sudo debootstrap --variant=minbase bionic /tmp/try http://ftp.heanet.ie/pub/ubuntu/
[...]
$ gcc ctr2.c -o ctr2
$ sudo ./ctr2 /bin/bash
root@conman:/# hostname       
conman
root@conman:/# pwd
/
root@conman:/# ls -la
total 80
drwxr-xr-x  21 root root 4096 Mar 25 06:40 .
drwxr-xr-x  21 root root 4096 Mar 25 06:40 ..
drwxr-xr-x   2 root root 4096 Mar 25 06:40 bin
drwxr-xr-x   2 root root 4096 Apr 24  2018 boot
drwxr-xr-x   4 root root 4096 Mar 25 06:40 dev
drwxr-xr-x  29 root root 4096 Mar 25 06:40 etc
drwxr-xr-x   2 root root 4096 Apr 24  2018 home
drwxr-xr-x   8 root root 4096 May 23  2017 lib
drwxr-xr-x   2 root root 4096 Mar 25 06:40 lib64
drwxr-xr-x   2 root root 4096 Mar 25 06:40 media
drwxr-xr-x   2 root root 4096 Mar 25 06:40 mnt
drwxr-xr-x   2 root root 4096 Mar 25 06:40 opt
dr-xr-xr-x 393 root root    0 Mar 25 06:46 proc
drwx------   2 root root 4096 Mar 25 06:40 root
drwxr-xr-x   4 root root 4096 Mar 25 06:40 run
drwxr-xr-x   2 root root 4096 Mar 25 06:40 sbin
drwxr-xr-x   2 root root 4096 Mar 25 06:40 srv
drwxr-xr-x   2 root root 4096 Apr 24  2018 sys
drwxrwxrwt   2 root root 4096 Mar 25 06:40 tmp
drwxr-xr-x  10 root root 4096 Mar 25 06:40 usr
drwxr-xr-x  11 root root 4096 Mar 25 06:40 var
root@conman:/# ps -ef
UID          PID    PPID  C STIME TTY          TIME CMD
root           1       0  0 06:46 ?        00:00:00 ./ctr2 /bin/bash
root           2       1  0 06:46 ?        00:00:00 /bin/bash
root           9       2  0 06:46 ?        00:00:00 ps -ef
root@conman:/# 

在这个“来宾”文件系统中安装 USB 密钥在主机端是不可见的。例如,在我的系统上,USB 密钥插入 /dev/sdd1(如果主机上有自动挂载,请卸载它):

$ lsblk
[...]
sdd      8:48   1   7,5G  0 disk 
`-sdd1   8:49   1   7,5G  0 part 

我在“来宾”文件系统中创建这些节点并挂载密钥:

root@conman:/# df        
df: no file systems processed
root@conman:/# mknod /dev/sdd b 8 48
root@conman:/# mknod /dev/sdd1 b 8 49
root@conman:/# mount -t ntfs /dev/sdd1 /media
root@conman:/# df
Filesystem     1K-blocks   Used Available Use% Mounted on
/dev/sdd1        7830524 357764   7472760   5% /media

在主机端,密钥(挂载点)不可见。