实现简单文件系统

文件系统注册与挂载

static struct file_system_type simplefs_fs_type = {
    .owner = THIS_MODULE,
    .name = "simplefs",
    .mount = simplefs_mount,
    .kill_sb = simplefs_kill_sb,
};

static int __init init_simplefs(void)
{
    return register_filesystem(&simplefs_fs_type);
}

  调用register_filesystem注册一个文件系统,传入参数为file_system_type结构体,用于描述文件系统类型,其中name为文件系统的名称,在使用mount -t xxx指定文件系统类型是即为该名称,使用cat /proc/filesystems可以查询linux系统中所有注册的文件系统类型。file_system_type中mount和kill_sb分别对应mount和umount的操作。

static struct dentry *simplefs_mount(struct file_system_type *fs_type,
        int flags, const char *dev_name, void *data)
{
    return mount_nodev(fs_type, flags, data, simplefs_fill_super);
}

  在执行mount是会触发simplefs_mount调用,该函数中调用mount_nodev来mount一个文件系统,mount_nodev表示该文件系统没有对应的磁盘,对应mount -t xxx none /xxx的命令,通常情况下对应直接使用内存作为存储空间的使用该函数来进行,类似的还有如tmpfs,devfs等。而如果挂载的文件系统存储介质对应磁盘需调用mount_bdev,对应具体的磁盘设备,对应的命令mount -t ext4 /dev/xxx /xxx命令。
  上面示例中,mount_nodev中其中重要的参数simplefs_fill_super,在执行mount_nodev会回调该函数,原义为用于填充超级块对象,同时创建根目录的inode信息,完成inode和dentry的初始化关联。在挂载文件系统时,在fill_super中完成了根目录inode的初始化,填充inode的函数操作集合,后续访问数据时就可以通过该inode函数操作集合来对文件访问。

static int simplefs_fill_super(struct super_block *sb, void *data, int silent)
{
    struct inode *root;
    struct dentry *root_dentry;

    sb->s_magic = SIMPLEFS_MAGIC_NUMBER;

    root = simplefs_get_inode(sb, NULL, S_IFDIR | 0755);
    if (!root) {
        printk("get inode failed\\n");
        return -ENOMEM;
    }

    root_dentry = d_make_root(root);
    if (!root_dentry) {
        iput(root);
        printk("make root failed\\n");
        return -ENOMEM;
    }

    sb->s_root = root_dentry;

    return 0;
}

  调用simplefs_get_inode获取一个新的inode节点,然后调用用d_make_root生成inode对应的dentry,再将dentry赋值给sb->s_root即可完成根目录的挂载,后续切到当前的目录,相应的操作就转为当前文件系统类型的操作。

static struct inode *simplefs_get_inode(struct super_block *sb,
        const struct inode *dir, umode_t mode)
{
    struct inode *inode;

    inode = new_inode(sb);
    if (inode) {
        inode->i_ino = get_next_ino();
        inode->i_sb = sb;
        inode_init_owner(&init_user_ns, inode, dir, mode);
        inode->i_op = &simplefs_dir_inode_ops;
        inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
        switch (mode & S_IFMT) {
            case S_IFDIR:
                inode->i_fop = &simplefs_dir_ops;
                break;
            case S_IFREG:
                inode->i_fop = &simplefs_file_ops;
            default:
                break;
        }
    }
    return inode;
}

  上面是创建一个新的inode示例,inode用于描述一个文件或目录。调用new_node来分配一个新的inode,接着对inode进行基本的初始化,其中最重要的是填充inode->i_op和inode->i_fop。

static struct inode_operations simplefs_dir_inode_ops = {
    .lookup = simplefs_lookup,
    .create = simplefs_create,
    .unlink = simplefs_unlink,
};

  inode->i_op是inode的函数操作方法,上面示例lookup用于查找dentry是否存在,dentry是关于文件路径的描述,包括文件和目录,通过指定的文件路径名进行搜索是否存在找到对应dentry,dentry中的数据结构关联了inode,进而实现通过dentry找到对应的inode。create对应的是文件或目录的创建,unlink是文件的删除。

static struct file_operations simplefs_dir_ops = {
    .owner = THIS_MODULE,
    .iterate = simplefs_iterate,
};

static struct file_operations simplefs_file_ops = {
    .read = simplefs_read_file,
    .write = simplefs_write_file,
};

  inode->i_fop就是实际对文件的操作,与struc file->f_op相关联。这里区分目录和文件,如果inode是目录的话,那么目录存储的是目录下各个文件或目录的信息,所以重点的操作函数是遍历目录对应上面的simplefs_iterate。而inode是文件的话,主要的操作就是对文件具体的读或者写。

文件创建与删除

  在文件系统注册章节,执行mount文件系统操作后,回调了填充super函数,在该函数中创建了一个根目录,后续对应文件或目录的创建就可以基于这个根目录进行拓展。根目录对应一个inode,inode填充了i_fop操作函数,因此当我们创建文件时,就会调用对应的操作函数create。

static int simplefs_create (struct user_namespace *ns,
        struct inode *dir,struct dentry *dentry,
               umode_t mode, bool excl)
{
    struct inode *inode;
    struct simplefs_file *s_file;
    int block = -1;

    if (strlen(dentry->d_name.name) > SIMPLEFS_FILENAME_LEN)
        return -ENAMETOOLONG;
    //分配一个新的inode
    inode = simplefs_get_inode(dir->i_sb, dir, mode);
    if (!inode) {
        printk("get new inode faild\\n");
        return -ENOSPC;
    }

    block = simplefs_get_block(i_block);
    if (block < 0)
        return -ENOSPC;

    s_file = kmalloc(sizeof(struct simplefs_file), GFP_KERNEL);

    s_file->inode = inode->i_ino;

    s_file->mode = mode;

    strcpy(s_file->filename, dentry->d_name.name);

    i_block[block].data = s_file;

    dir->i_mtime = dir->i_ctime = current_time(dir);
    //将新分配的inode填充到当前目录项
    d_instantiate(dentry, inode);

    return 0;
}

  crate入参函数中,dir为父目录的inode,dentry为新创建的dentry,没有关联inode,需要在该函数中新创建一个inode,最后调用d_instantiate与新创建的inode进行关联。因此在create操作中,主要的工作就是创建一个新的inode,然后调用d_instantiate将这个inode与dentry进行关联。dentry应该是在上级调用的时候就创建了。

目录遍历

  当我们在执行ll或ls命令的时候,会列出当前目录下有那些文件或目录,这个操作就会调用file_operations中iterate成员函数。

static int simplefs_iterate (struct file *dir, struct dir_context *ctx)
{
    struct simplefs_file *f = NULL;
    int i;

    if (ctx->pos >= SIMPLEFS_BLOCK_SIZE +2)
        return 0;

    if (!dir_emit_dots(dir, ctx)) // . ..
        return 0;

    for (i = 0; i < SIMPLEFS_BLOCK_SIZE; i++) {
        ctx->pos ++;

        if (!i_block[i].use)
            continue;
        f = (struct simplefs_file *)i_block[i].data;
        if (f && !dir_emit(ctx, f->filename,
                SIMPLEFS_FILENAME_LEN, f->inode, DT_UNKNOWN))
            break;
    }

    return 0;
}

  上面示例中,关于iterate的实现最关键的是调用dir_emit将文件名或文件列到屏幕上显示。

文件读写

  文件的读写调用的是file_operations中read和write函数。

static ssize_t simplefs_write_file(struct file *f,
        const char __user *buf, size_t len, loff_t *ppos)
{
    struct inode *inode = file_inode(f);
    struct blks_desc *blk_desc = (struct blks_desc *)inode->i_private;
    int newdatalen = *ppos + len;
    char *newdata;

    int i;

    if (!buf || len == 0) {
        return -EINVAL;
    }

    if (!blk_desc) {
        i = simplefs_get_block(d_block);
        if (i < 0)
            return -ENOSPC;
        else {
            blk_desc = inode->i_private = &d_block[i];
            d_block[i].use = 1;
        }
    }

    newdata = krealloc(blk_desc->data, newdatalen, GFP_KERNEL);
    if (!newdata)
        return -ENOMEM;

    if (copy_from_user(newdata + *ppos, buf, len))
        return -EFAULT;

    blk_desc->data = newdata;

    *ppos += len;

    blk_desc->size = *ppos;
    return len;
}

static ssize_t simplefs_read_file(struct file *f,
        char __user *buf, size_t len, loff_t *ppos)
{
    struct inode *inode = file_inode(f);
    struct blks_desc *blk_desc = (struct blks_desc *)inode->i_private;
    ssize_t ret = 0;

    if (!blk_desc) {
        ret = -EINVAL;
        goto out;
    }
    printk("

    if (*ppos >= blk_desc->size)
        return 0;

    len = min((size_t) blk_desc->size, len);

    if (copy_to_user(buf, blk_desc->data + *ppos, len)) {
        ret = -EFAULT;
        goto out;
    }
    *ppos += len;
    ret = len;

out:
    return ret;
}

  读写函数就比较简单了,因为没有操作具体的磁盘文件,所以直接调用copy_to/from_user从用户空间到内核空间的数据搬运,实际的文件操作系统中尤其涉及磁盘的操作会复杂不少,会涉及到page cache相关的操作,本章节只是简单介绍下概念有个整体的认识,后续章节我们会具体再介绍。

小结

  最后编译生成ko文件加载到内核中,就可以测试了,下面是测试命令。

insmod simplefs.ko
mkdir -p /mnt/simplefs
mount -t simplefs none /mnt/simplefs
cd /mnt/simplefs
touch a
echo 11111 >  a
cat a
ll

以下是基于linux5.15完整的测试代码:

#include <linux/module.h>
#include <linux/fs.h>
#include <linux/pagemap.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/namei.h>

#define SIMPLEFS_FILENAME_LEN 255
#define SIMPLEFS_BLOCK_SIZE 255

struct simplefs_file {
    unsigned long inode;
    umode_t mode;
    char filename[SIMPLEFS_FILENAME_LEN];
};

struct blks_desc {
    void *data;
    uint32_t size;
    uint8_t use;
};

static struct blks_desc i_block[SIMPLEFS_BLOCK_SIZE];
static struct blks_desc d_block[SIMPLEFS_BLOCK_SIZE];

#define SIMPLEFS_MAGIC_NUMBER 0x13131313

MODULE_IMPORT_NS(VFS_internal_I_am_really_a_filesystem_and_am_NOT_a_driver);

static struct file_operations simplefs_dir_ops;

static struct file_operations simplefs_file_ops;

static struct inode_operations simplefs_dir_inode_ops;

static int simplefs_get_block(struct blks_desc *blks)
{
    int i;
    for (i = 0; i < SIMPLEFS_BLOCK_SIZE; i++) {
        if (!blks[i].use) {
            blks[i].use = 1;
            return i;
        }
    }
    return -1;
}
static struct inode *simplefs_get_inode(struct super_block *sb,
        const struct inode *dir, umode_t mode)
{
    struct inode *inode;

    inode = new_inode(sb);
    if (inode) {
        inode->i_ino = get_next_ino();
        inode->i_sb = sb;
        inode_init_owner(&init_user_ns, inode, dir, mode);
        inode->i_op = &simplefs_dir_inode_ops;
        inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
        switch (mode & S_IFMT) {
            case S_IFDIR:
                inode->i_fop = &simplefs_dir_ops;
                break;
            case S_IFREG:
                inode->i_fop = &simplefs_file_ops;
            default:
                break;
        }
    }
    return inode;
}
static int simplefs_create (struct user_namespace *ns,
        struct inode *dir,struct dentry *dentry,
               umode_t mode, bool excl)
{
    struct inode *inode;
    struct simplefs_file *s_file;
    int block = -1;

    if (strlen(dentry->d_name.name) > SIMPLEFS_FILENAME_LEN)
        return -ENAMETOOLONG;

    inode = simplefs_get_inode(dir->i_sb, dir, mode);
    if (!inode) {
        printk("get new inode faild\\n");
        return -ENOSPC;
    }

    block = simplefs_get_block(i_block);
    if (block < 0)
        return -ENOSPC;

    s_file = kmalloc(sizeof(struct simplefs_file), GFP_KERNEL);

    s_file->inode = inode->i_ino;

    s_file->mode = mode;

    strcpy(s_file->filename, dentry->d_name.name);

    i_block[block].data = s_file;

    dir->i_mtime = dir->i_ctime = current_time(dir);

    d_instantiate(dentry, inode);

    return 0;
}
static int simplefs_unlink (struct inode *dir,struct dentry *dentry)
{
    int i;
    struct simplefs_file *s_file;
    struct inode *inode = dentry->d_inode;
    struct blks_desc *blk_desc = (struct blks_desc *)inode->i_private;

    if (blk_desc && blk_desc->data) {
        kfree(blk_desc->data);
        blk_desc->use = 0;
    }

    for (i = 0; i < SIMPLEFS_BLOCK_SIZE; i++) {
        if (i_block[i].use) {
            s_file = (struct simplefs_file *) i_block[i].data;
            if (!strcmp(s_file->filename, dentry->d_name.name)) {
                kfree(s_file);
                i_block[i].data = NULL;
                i_block[i].use = 0;
                drop_nlink(inode);
            }
        }
    }
    return 0;
}
static struct dentry *simplefs_lookup (struct inode *parent_inode,
        struct dentry *child_dentry, unsigned int flags)
{
    int i;

    for (i = 0 ; i < SIMPLEFS_BLOCK_SIZE; i ++) {

        struct simplefs_file *f = (struct simplefs_file *)i_block[i].data;

        if (f && !strcmp(f->filename, child_dentry->d_name.name)) {

            struct inode *inode = simplefs_get_inode(parent_inode->i_sb,
                    parent_inode, f->mode);

            d_add(child_dentry, inode);

            return NULL;
        }
    }
    return NULL;
}

static ssize_t simplefs_read_file(struct file *f,
        char __user *buf, size_t len, loff_t *ppos)
{
    struct inode *inode = file_inode(f);
    struct blks_desc *blk_desc = (struct blks_desc *)inode->i_private;
    ssize_t ret = 0;

    if (!blk_desc) {
        ret = -EINVAL;
        goto out;
    }

    if (*ppos >= blk_desc->size)
        return 0;

    len = min((size_t) blk_desc->size, len);

    if (copy_to_user(buf, blk_desc->data + *ppos, len)) {
        ret = -EFAULT;
        goto out;
    }
    *ppos += len;
    ret = len;

out:
    return ret;
}
static ssize_t simplefs_write_file(struct file *f,
        const char __user *buf, size_t len, loff_t *ppos)
{
    struct inode *inode = file_inode(f);
    struct blks_desc *blk_desc = (struct blks_desc *)inode->i_private;
    int newdatalen = *ppos + len;
    char *newdata;
int i;

    if (!buf || len == 0) {
        return -EINVAL;
    }

    if (!blk_desc) {
        i = simplefs_get_block(d_block);
        if (i < 0)
            return -ENOSPC;
        else {
            blk_desc = inode->i_private = &d_block[i];
            d_block[i].use = 1;
        }
    }

    newdata = krealloc(blk_desc->data, newdatalen, GFP_KERNEL);
    if (!newdata)
        return -ENOMEM;

    if (copy_from_user(newdata + *ppos, buf, len))
        return -EFAULT;

    blk_desc->data = newdata;

    *ppos += len;

    blk_desc->size = *ppos;
    return len;
}
static int simplefs_iterate (struct file *dir, struct dir_context *ctx)
{
    struct simplefs_file *f = NULL;
    int i;

    if (ctx->pos >= SIMPLEFS_BLOCK_SIZE +2)
        return 0;

    if (!dir_emit_dots(dir, ctx)) // . ..
        return 0;

    for (i = 0; i < SIMPLEFS_BLOCK_SIZE; i++) {

        ctx->pos ++;

        if (!i_block[i].use)
            continue;

        f = (struct simplefs_file *)i_block[i].data;
        if (f && !dir_emit(ctx, f->filename,
                SIMPLEFS_FILENAME_LEN, f->inode, DT_UNKNOWN))
            break;
    }

    return 0;
}
static int simplefs_fill_super(struct super_block *sb, void *data, int silent)
{
    struct inode *root;
    struct dentry *root_dentry;

    sb->s_magic = SIMPLEFS_MAGIC_NUMBER;

    root = simplefs_get_inode(sb, NULL, S_IFDIR | 0755);
    if (!root) {
        printk("get inode failed\\n");
        return -ENOMEM;
    }

    root_dentry = d_make_root(root);
    if (!root_dentry) {
        iput(root);
        printk("make root failed\\n");
        return -ENOMEM;
    }

    sb->s_root = root_dentry;

    return 0;
}

static struct dentry *simplefs_mount(struct file_system_type *fs_type,
        int flags, const char *dev_name, void *data)
{
    return mount_nodev(fs_type, flags, data, simplefs_fill_super);
}

static void simplefs_kill_sb(struct super_block *sb)
{
    kill_anon_super(sb);
}

static struct file_operations simplefs_dir_ops = {
    .owner = THIS_MODULE,
    .iterate = simplefs_iterate,
};

static struct file_operations simplefs_file_ops = {
    .read = simplefs_read_file,
    .write = simplefs_write_file,
};
static struct file_system_type simplefs_fs_type = {
    .owner = THIS_MODULE,
    .name = "simplefs",
    .mount = simplefs_mount,
    .kill_sb = simplefs_kill_sb,
};

static int __init init_simplefs(void)
{
    return register_filesystem(&simplefs_fs_type);
}

static void __exit exit_simplefs(void)
{
    unregister_filesystem(&simplefs_fs_type);
}

module_init(init_simplefs);
module_exit(exit_simplefs);

MODULE_LICENSE("Dual BSD/GPL");
MODULE_AUTHOR("Laumy");
MODULE_DESCRIPTION("a simple file system");

  更完善的文件系统示例:https://github.com/sysprog21/simplefs/tree/master,一个简单的文件系统主要就是围绕四大对象进行填充描述,而超级块和inode是基础。

  • 分配超级块结构,填充超级块信息。
  • 定义具体文件系统inode,如struct ext4_inode。其中每个具体的文件系统inode会内嵌一个VFS inode,具体文件系统的inode在.alloc_inode中分配。
  • 实现类型文件系统inode的操作函数集合,包括创建目录/文件。
  • 实现file操作函数集合,包括目录遍历,文件的读写操作等。
  • 实现文件与磁盘的映射address space操作集合。