虚拟文件系统

  Linux系统中支持多种不同的文件系统,为了是用户可以通过一个文件系统操作界面,对各种不同的文件系统进行操作,在具体的文件系统(ext2/ext4等)之上增加了一层抽象一个统一的虚拟文件系统界面,向上提供归一化的文件操作,这个抽象层就称为虚拟文件系统。
为了实现抽象层,Linux内核定义了4个重要的数据结构对象。

  • supper block: 管理文件系统的相关描述信息。
  • Inode:一个文件对应一个inode,包含文件的相关信息,包括文件大小、创建时间、块大小等。
  • Dentry:表示一个目录项。
  • File:进程打开的文件。

上面4个对象都有对应的函数操作方法

  • supper_operations:文件系统的操作方法,如read_inode
  • inode_operations:文件的操作方法,如create、link。
  • dentry_operations:目录项的操作方法,如d_compare、d_delete。
  • file:进程打开文件后的操作方法,如read、write。

四大对象数据结构

超级块对象

  超级块,用于描述设备上的文件系统的总体信息如块大小、文件大小上限、文件系统类型、挂载点信息等。在构建一个文件系统时,内核会从存储设备特定位置获取相关的控制信息来填充内存中的超级块对象,当构建完成一个文件系统时就会对应一个超级块对象。

struct super_block

include/linux/fs.h

struct super_block {
    struct list_head    s_list;     /* Keep this first */
    dev_t           s_dev;      /* search index; _not_ kdev_t */
    unsigned char       s_blocksize_bits;
unsigned long       s_blocksize;
loff_t          s_maxbytes; //文件大小上限
    struct file_system_type *s_type; //文件系统类型
    const struct super_operations   *s_op; //超级块的方法
    const struct dquot_operations   *dq_op;//磁盘限额的方法
    const struct quotactl_ops   *s_qcop;
    const struct export_operations *s_export_op;
    unsigned long       s_flags;
    unsigned long       s_iflags;   /* internal SB_I_* flags */
    unsigned long       s_magic;
    struct dentry       *s_root; //文件系统目录挂载点
struct rw_semaphore s_umount;
const struct dentry_operations *s_d_op; /* default d_op for dentries */
struct block_device *s_bdev; //对应的块设备,在文件系统mount调用mount_bdev时会根据设备的
名称找到对应的bdev填充,得到块设备描述后后续就可以调用s_read/s_write等操作块设备。
......
}

struct super_operations

include/linux/fs.h

struct super_operations {
struct inode *(*alloc_inode)(struct super_block *sb); 
//在给定超级块下创建并初始化一个inode,inode即对应一个目录或文件的实例。
    void (*destroy_inode)(struct inode *);
    void (*free_inode)(struct inode *);
    void (*dirty_inode) (struct inode *, int flags);
    int (*write_inode) (struct inode *, struct writeback_control *wbc); //指定索引点写磁盘
    int (*drop_inode) (struct inode *);
    void (*evict_inode) (struct inode *);
    void (*put_super) (struct super_block *);
    int (*sync_fs)(struct super_block *sb, int wait); //文件系统与磁盘上的数据同步
    int (*freeze_super) (struct super_block *);
    int (*freeze_fs) (struct super_block *);
    int (*thaw_super) (struct super_block *);
    int (*unfreeze_fs) (struct super_block *);
    int (*statfs) (struct dentry *, struct kstatfs *);
    int (*remount_fs) (struct super_block *, int *, char *);
    void (*umount_begin) (struct super_block *);
    int (*show_options)(struct seq_file *, struct dentry *);
    int (*show_devname)(struct seq_file *, struct dentry *);
    int (*show_path)(struct seq_file *, struct dentry *);
    int (*show_stats)(struct seq_file *, struct dentry *);
#ifdef CONFIG_QUOTA
    ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
    ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
    struct dquot **(*get_dquots)(struct inode *);
#endif
    long (*nr_cached_objects)(struct super_block *,
                  struct shrink_control *);
    long (*free_cached_objects)(struct super_block *,
                    struct shrink_control *);
};

索引节点对象

  Inode对象代表了一个实际的文件,当文件被访问前需要先获取到该文件的inode,struct inode结构体包含了通用的属性和方法,如文件类型,文件大小,权限,创建时间等信息。

struct inode

include/linux/fs.h

struct inode {
    umode_t         i_mode;//访问权限
    unsigned short      i_opflags;
    kuid_t          i_uid;
    kgid_t          i_gid;
    unsigned int        i_flags; //文件系统标志

#ifdef CONFIG_FS_POSIX_ACL
    struct posix_acl    *i_acl;
    struct posix_acl    *i_default_acl;
#endif

    const struct inode_operations   *i_op; //索引节点的操作方法
    struct super_block  *i_sb; //所属超级块
struct address_space    *i_mapping; //文件缓存
......
    union {
        const struct file_operations    *i_fop; /* former ->i_op->default_file_ops */
        void (*free_inode)(struct inode *);
    };
    struct file_lock_context    *i_flctx;
    struct address_space    i_data;
    struct list_head    i_devices;
    ......
}

struct inode_operations

include/linux/fs.h

struct inode_operations {
struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int);
//在指定目录下搜索目录项,要获取inode,需要先获取dentry
    const char * (*get_link) (struct dentry *, struct inode *, struct delayed_call *);
    int (*permission) (struct user_namespace *, struct inode *, int);
    struct posix_acl * (*get_acl)(struct inode *, int, bool);

    int (*readlink) (struct dentry *, char __user *,int);

    int (*create) (struct user_namespace *, struct inode *,struct dentry *,
               umode_t, bool);
    //create或open系统调用创建或打开文件
    int (*link) (struct dentry *,struct inode *,struct dentry *);
    int (*unlink) (struct inode *,struct dentry *);
    int (*symlink) (struct user_namespace *, struct inode *,struct dentry *,
            const char *);
    int (*mkdir) (struct user_namespace *, struct inode *,struct dentry *,
              umode_t);//创建目录
    int (*rmdir) (struct inode *,struct dentry *);//删除目录
    int (*mknod) (struct user_namespace *, struct inode *,struct dentry *,
              umode_t,dev_t);//创建管道、设备等特殊文件
    int (*rename) (struct user_namespace *, struct inode *, struct dentry *,
            struct inode *, struct dentry *, unsigned int);
    int (*setattr) (struct user_namespace *, struct dentry *,
            struct iattr *);
    int (*getattr) (struct user_namespace *, const struct path *,
            struct kstat *, u32, unsigned int);
    ssize_t (*listxattr) (struct dentry *, char *, size_t);
    int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
              u64 len);
    int (*update_time)(struct inode *, struct timespec64 *, int);
    int (*atomic_open)(struct inode *, struct dentry *,
               struct file *, unsigned open_flag,
               umode_t create_mode);
    int (*tmpfile) (struct user_namespace *, struct inode *,
            struct dentry *, umode_t);
    int (*set_acl)(struct user_namespace *, struct inode *,
               struct posix_acl *, int);
    int (*fileattr_set)(struct user_namespace *mnt_userns,
                struct dentry *dentry, struct fileattr *fa);
    int (*fileattr_get)(struct dentry *dentry, struct fileattr *fa);

} ____cacheline_aligned;

  在VFS层定义了通用的struct inode,在具体的文件系统中可能还会定义属于具体文件系统的inode,如struct ext2_inode、struct ext4_inode,这些xxx_inode是对具体文件的描述如元数据相关信息即具体文件系统磁盘信息的描述,在分配struct inode时,会将xxx_inode的值赋值到struct inode中。其他像超级块,目录等对象也类似。

目录项对象

  dentry虽翻译为目录项,但和文件系统中的目录并不是同一个概念,dentry属于文件系统的对象,包括目录、文件等,反映的是文件系统对象在内核中所在文件系统树的位置。每个文件除了有inode,同时也会有一个dentry结构,记录了文件的名称,父目录,子目录等信息,形成我们看到的层级树状结构。与inode不同时,dentry只存在于内存,磁盘上并没有对应的实体文件,因此目录项目不会涉及回写磁盘的操作。
  dentry其中重要的是对文件搜索找出对应的文件的inode。遍历目录时比较耗时的,为了加快遍历和查找,内核中使用hash表来缓存dentry。
  一个路径的各个组成部分,不管目录还是普通的文件,都是一个dentry对象,如/home/test.c,/,home,test.c都是一个目录项。为了增加搜索效率,这些目录项目缓存到hash表中。

struct dentry

include/linux/dcache.h

struct dentry {
    /* RCU lookup touched fields */
    unsigned int d_flags;       /* protected by d_lock */
    seqcount_spinlock_t d_seq;  /* per dentry seqlock */
    struct hlist_bl_node d_hash;  //用于目录项目查找的hash表
    struct dentry *d_parent;    //父目录项
    struct qstr d_name; //目录项目名称
    struct inode *d_inode;  //目录项关联的索引节点
    unsigned char d_iname[DNAME_INLINE_LEN];    /* small names */

    /* Ref lookup also touches following */
    struct lockref d_lockref;   /* per-dentry lock and refcount */
    const struct dentry_operations *d_op;
    struct super_block *d_sb;   /* The root of the dentry tree */
    unsigned long d_time;       /* used by d_revalidate */
    void *d_fsdata;         /* fs-specific data */ 具体文件系统中内存目录项目。

    union {
        struct list_head d_lru;     /* LRU list */
        wait_queue_head_t *d_wait;  /* in-lookup ones only */
    };
    struct list_head d_child;   /* child of parent list */
    struct list_head d_subdirs; /* our children */
    /*
     * d_alias and d_rcu can share memory
     */
    union {
        struct hlist_node d_alias;  /* inode alias list */
        struct hlist_bl_node d_in_lookup_hash;  /* only for in-lookup ones */
        struct rcu_head d_rcu;
    } d_u;

    ANDROID_KABI_RESERVE(1);
    ANDROID_KABI_RESERVE(2);
} __randomize_layout;

struct dentry

include/linux/dcache.h

struct dentry_operations {
    int (*d_revalidate)(struct dentry *, unsigned int);
    int (*d_weak_revalidate)(struct dentry *, unsigned int);
    int (*d_hash)(const struct dentry *, struct qstr *); //为目录项目生成hash表
    int (*d_compare)(const struct dentry *,
            unsigned int, const char *, const struct qstr *); //比较两个文件
    int (*d_delete)(const struct dentry *);
    int (*d_init)(struct dentry *);
    void (*d_release)(struct dentry *);
    void (*d_prune)(struct dentry *);
    void (*d_iput)(struct dentry *, struct inode *);
    char *(*d_dname)(struct dentry *, char *, int);
    struct vfsmount *(*d_automount)(struct path *);
    int (*d_manage)(const struct path *, bool);
    struct dentry *(*d_real)(struct dentry *, const struct inode *);
    void (*d_canonical_path)(const struct path *, struct path *);
    ANDROID_KABI_RESERVE(1);
    ANDROID_KABI_RESERVE(2);
    ANDROID_KABI_RESERVE(3);
    ANDROID_KABI_RESERVE(4);
} ____cacheline_aligned;

文件对象

  文件对象描述的是进程和文件直接的关系,对文件的操作都是由进程发起的,进程每打开一个文件,内核就创建一个文件对象,同一个文件可以被不同的进程打开,创建不同的文件对象。

struct file

include/linux/fs.h

struct file {
    union {
        struct llist_node   fu_llist;
        struct rcu_head     fu_rcuhead;
    } f_u;
    struct path     f_path; 
    struct inode        *f_inode;   /* cached value */
    const struct file_operations    *f_op; //文件的操作方法

    /*
     * Protects f_ep, f_flags.
     * Must not be taken from IRQ context.
     */
    spinlock_t      f_lock;
    enum rw_hint        f_write_hint;
    atomic_long_t       f_count;
    unsigned int        f_flags;
    fmode_t         f_mode;
    struct mutex        f_pos_lock;
    loff_t          f_pos;
    struct fown_struct  f_owner;
    const struct cred   *f_cred;
    struct file_ra_state    f_ra;

    u64         f_version;
#ifdef CONFIG_SECURITY
    void            *f_security;
#endif
    /* needed for tty driver, and maybe others */
    void            *private_data;

#ifdef CONFIG_EPOLL
    /* Used by fs/eventpoll.c to link all the hooks to this file */
    struct hlist_head   *f_ep;
#endif /* #ifdef CONFIG_EPOLL */
    struct address_space    *f_mapping;
    errseq_t        f_wb_err;
    errseq_t        f_sb_err; /* for syncfs */

    ANDROID_KABI_RESERVE(1);
    ANDROID_KABI_RESERVE(2);
} __randomize_layout

struct file_operations

include/linux/fs.h

struct file_operations {
    struct module *owner;
    loff_t (*llseek) (struct file *, loff_t, int);
    ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
    ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
    ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
    ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
    int (*iopoll)(struct kiocb *kiocb, bool spin);
    int (*iterate) (struct file *, struct dir_context *); //目录读取
    int (*iterate_shared) (struct file *, struct dir_context *);
    __poll_t (*poll) (struct file *, struct poll_table_struct *);
    long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
    long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
    int (*mmap) (struct file *, struct vm_area_struct *);
    unsigned long mmap_supported_flags;
    int (*open) (struct inode *, struct file *);
    int (*flush) (struct file *, fl_owner_t id);
    int (*release) (struct inode *, struct file *);
    int (*fsync) (struct file *, loff_t, loff_t, int datasync);
    int (*fasync) (int, struct file *, int);
    int (*lock) (struct file *, int, struct file_lock *);
    ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
    unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
    int (*check_flags)(int);
    int (*flock) (struct file *, int, struct file_lock *);
    ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int);
    ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
    int (*setlease)(struct file *, long, struct file_lock **, void **);
    long (*fallocate)(struct file *file, int mode, loff_t offset,
              loff_t len);
    void (*show_fdinfo)(struct seq_file *m, struct file *f);
#ifndef CONFIG_MMU
    unsigned (*mmap_capabilities)(struct file *);
#endif
    ssize_t (*copy_file_range)(struct file *, loff_t, struct file *,
            loff_t, size_t, unsigned int);
    loff_t (*remap_file_range)(struct file *file_in, loff_t pos_in,
                   struct file *file_out, loff_t pos_out,
                   loff_t len, unsigned int remap_flags);
    int (*fadvise)(struct file *, loff_t, loff_t, int);

    ANDROID_KABI_RESERVE(1);
    ANDROID_KABI_RESERVE(2);
    ANDROID_KABI_RESERVE(3);
    ANDROID_KABI_RESERVE(4);
} __randomize_layout;

下面描述进程与文件操作联系

  每个进程打开一个文件后,都有一个文件描述符fd。struct file *fd_array存储的就是这个进程打开的所有文件,称为文件描述符表,文件描述表的每一项都是一个指针,指向一个用于描述打开的struct file对象,struct file对象描述了文件的打开模式,当进程打开一个文件是,内核就会创建一个file对象,但是需要注意的是file对象不是专属某个进程,fd才是专属于某个进程,不同的文件描述符指针可以指向相同的file对象,表示共享打开的文件,struct file中有一个引用计数,描述了被多个进程引用的次数,只有引入计数为0时,内核才会销毁file对象。

其他数据结构

文件系统类型

  Linux支持多种文件系统,内部用一个特殊的数据结构来描述每种文件系统的功能和行为。

include/linux/fs.h

struct file_system_type {
    const char *name; //文件系统名称
    int fs_flags;
#define FS_REQUIRES_DEV     1
#define FS_BINARY_MOUNTDATA 2
#define FS_HAS_SUBTYPE      4
#define FS_USERNS_MOUNT     8   /* Can be mounted by userns root */
#define FS_DISALLOW_NOTIFY_PERM 16  /* Disable fanotify permission events */
#define FS_ALLOW_IDMAP         32      /* FS has been updated to handle vfs idmappings. */
#define FS_THP_SUPPORT      8192    /* Remove once all fs converted */
#define FS_RENAME_DOES_D_MOVE   32768   /* FS will handle d_move() during rename() internally. */
    int (*init_fs_context)(struct fs_context *);
    const struct fs_parameter_spec *parameters;
    struct dentry *(*mount) (struct file_system_type *, int,
               const char *, void *);//挂载文件系统
    void (*kill_sb) (struct super_block *);
    struct module *owner;
    struct file_system_type * next;
    struct hlist_head fs_supers;

    struct lock_class_key s_lock_key;
    struct lock_class_key s_umount_key;
    struct lock_class_key s_vfs_rename_key;
    struct lock_class_key s_writers_key[SB_FREEZE_LEVELS];

    struct lock_class_key i_lock_key;
    struct lock_class_key i_mutex_key;
    struct lock_class_key invalidate_lock_key;
    struct lock_class_key i_mutex_dir_key;

};

文件系统挂载

  Linux文件系统只有被挂载上,才能进行访问,使用一个vfsmount来描述一个挂载点。

include/linux/fs.h

struct vfsmount {
    struct dentry *mnt_root;    /* root of the mounted tree */
    struct super_block *mnt_sb; /* pointer to superblock */
    int mnt_flags;
    struct user_namespace *mnt_userns;

    ANDROID_KABI_RESERVE(1);
    ANDROID_KABI_RESERVE(2);
    ANDROID_KABI_RESERVE(3);
    ANDROID_KABI_RESERVE(4);
} __randomize_layout;