Linux驱动开发进阶-文件系统

2025-03-16 约 8745 字预计阅读 18 分钟

https://bing.ee123.net/img/rand?artid=146301696

Linux驱动开发进阶 - 文件系统

1、前言

学习参考书籍：
本文是为了学习上述书籍时，不能囫囵吞枣，才写的。等实际遇到问题了，我也只会去回看原书籍。所以本文不太具备教学功能。

2、学习目标

在Linux中，文件系统可以分为两大类：虚拟文件系统（如sysfs、procfs、devtmpfs）和实际物理存储设备的文件系统（如ext2、ext3、ext4、vfat、fat32）。那Linux如何管理这些文件系统呢？同时本文将在最后编写一个虚拟的文件系统驱动程序。

3、VFS虚拟文件系统

Linux内核的设计哲学非常注重抽象和模块化，这种设计使得系统更加灵活、可扩展且易于维护。Linux为了管理各种类型的文件系统（sysfs、procfs、ext2、ext3、ext4、fat32、…），抽象出了 VFS（Virtual File System Switch，虚拟文件系统切换层）。一个功能的诞生肯定是为了解决实际问题，那VFS虚拟文件系统的诞生也是为了管理众多不同的文件系统。在Linux的VFS虚拟文件系统中，有四个核心对象是理解和实现文件系统的关键，分别是：超级块（Super Block）、索引节点（Inode）、目录项（Dentry）和文件对象（File）。

3.1、超级块（Super Block）

在Linux文件系统中，许多文件系统本身就存在显示的超级块，如ext2、ext3、ext4。它们在分区的第一个块（或前几个块）中存储了超级块。超级块包含了文件系统的元数据，如总块数、总Inode数、块大小、文件系统状态等。但不是所有的文件系统都显示存在超级块，如Windows下常用的文件系统，如ntfs、fat32等，它们有自己的结构来存储文件系统的元数据和管理信息，这些结构在功能上类似于超级块，但名称和具体实现不同。在Linux下，无论该文件系统是否存在超级块，挂载时必须初始化超级块数据结构。 struct super_block { struct list_head s_list; /* Keep this first / dev_t s_dev; / search index; not kdev_t / unsigned char s_blocksize_bits; unsigned long s_blocksize; loff_t s_maxbytes; / Max file size */ struct file_system_type *s_type; const struct super_operations *s_op; const struct dquot_operations *dq_op; const struct quotactl_ops s_qcop; const struct export_operations s_export_op; unsigned long s_flags; unsigned long s_iflags; / internal SB_I_ flags */ unsigned long s_magic; struct dentry *s_root; struct rw_semaphore s_umount; int s_count; atomic_t s_active; #ifdef CONFIG_SECURITY void *s_security; #endif const struct xattr_handler **s_xattr; #ifdef CONFIG_FS_ENCRYPTION const struct fscrypt_operations s_cop; #ifdef GENKSYMS /

Android ABI CRC preservation due to commit 391cceee6d43 (“fscrypt:
stop using keyrings subsystem for fscrypt_master_key”) changing this
type. Size is the same, this is a private field. */ struct key s_master_keys; / master crypto keys in use */ #else struct fscrypt_keyring s_master_keys; / master crypto keys in use */ #endif #endif #ifdef CONFIG_FS_VERITY const struct fsverity_operations *s_vop; #endif #ifdef CONFIG_UNICODE struct unicode_map s_encoding; __u16 s_encoding_flags; #endif struct hlist_bl_head s_roots; / alternate root dentries for NFS / struct list_head s_mounts; / list of mounts; not for fs use */ struct block_device *s_bdev; struct backing_dev_info *s_bdi; struct mtd_info s_mtd; struct hlist_node s_instances; unsigned int s_quota_types; / Bitmask of supported quota types / struct quota_info s_dquot; / Diskquota specific options / struct sb_writers s_writers; /
Keep s_fs_info, s_time_gran, s_fsnotify_mask, and
s_fsnotify_marks together for cache efficiency. They are frequently
accessed and rarely modified. */ void s_fs_info; / Filesystem private info / / Granularity of c/m/atime in ns (cannot be worse than a second) / u32 s_time_gran; / Time limits for c/m/atime in seconds */ time64_t s_time_min; time64_t s_time_max; #ifdef CONFIG_FSNOTIFY __u32 s_fsnotify_mask; struct fsnotify_mark_connector __rcu s_fsnotify_marks; #endif char s_id[32]; / Informational name / uuid_t s_uuid; / UUID / unsigned int s_max_links; fmode_t s_mode; /
The next field is for VFS only. No filesystems have any business
even looking at it. You had been warned. / struct mutex s_vfs_rename_mutex; / Kludge / /
Filesystem subtype. If non-empty the filesystem type field
in /proc/mounts will be “type.subtype” */ const char *s_subtype; const struct dentry_operations s_d_op; / default d_op for dentries / /
Saved pool identifier for cleancache (-1 means none) / int cleancache_poolid; struct shrinker s_shrink; / per-sb shrinker handle / / Number of inodes with nlink == 0 but still referenced / atomic_long_t s_remove_count; / Pending fsnotify inode refs / atomic_long_t s_fsnotify_inode_refs; / Being remounted read-only / int s_readonly_remount; / per-sb errseq_t for reporting writeback errors via syncfs / errseq_t s_wb_err; / AIO completions deferred from interrupt context */ struct workqueue_struct s_dio_done_wq; struct hlist_head s_pins; /
Owning user namespace and default context in which to
interpret filesystem uids, gids, quotas, device nodes,
xattrs and security labels. */ struct user_namespace s_user_ns; /
The list_lru structure is essentially just a pointer to a table
of per-node lru lists, each of which has its own spinlock.
There is no need to put them into separate cachelines. / struct list_lru s_dentry_lru; struct list_lru s_inode_lru; struct rcu_head rcu; struct work_struct destroy_work; struct mutex s_sync_lock; / sync serialisation lock / /
Indicates how deep in a filesystem stack this SB is / int s_stack_depth; / s_inode_list_lock protects s_inodes / spinlock_t s_inode_list_lock ____cacheline_aligned_in_smp; struct list_head s_inodes; / all inodes / spinlock_t s_inode_wblist_lock; struct list_head s_inodes_wb; / writeback inodes */ ANDROID_KABI_RESERVE(1); ANDROID_KABI_RESERVE(2); ANDROID_KABI_RESERVE(3); ANDROID_KABI_RESERVE(4); } __randomize_layout;
s_list：链表结构体，包含prev和next指针，用来连接前驱和后继节点
s_dev：块设备标识符，例如/dev/sda、/dev/nvme
s_blocksize_bits：块大小占用的位数，例如块大小为4096则该值为12
s_blocksize：数据块大小，单位为字节
s_maxbytes：文件的最大长度，单位字节
s_type：文件系统类型
s_op：超级块操作方法，指向具体的文件系统
dp_op：和s_op一样，但用于特定操作方法
s_qcop：和s_op一样，但用于配置磁盘的特定的操作方法
s_flags：文件系统是否安装标志
s_magic：文件系统魔数，每个文件系统都有各自的魔数
s_root：文件系统的根目录文件
s_umount：用于文件系统对文件进行读写同步
s_count：超级块的使用计数
s_active：超级块的引用计数
s_security：用于安全的私有指针
s_d_op：dentry操作方法集合其中超级块的操作函数结构体struct super_operations内容如下： struct super_operations { struct inode *(*alloc_inode)(struct super_block *sb); void (*destroy_inode)(struct inode *); void (*free_inode)(struct inode *); void (*dirty_inode) (struct inode *, int flags); int (*write_inode) (struct inode *, struct writeback_control *wbc); int (*drop_inode) (struct inode *); void (*evict_inode) (struct inode *); void (*put_super) (struct super_block *); int (*sync_fs)(struct super_block *sb, int wait); int (*freeze_super) (struct super_block *); int (*freeze_fs) (struct super_block *); int (*thaw_super) (struct super_block *); int (*unfreeze_fs) (struct super_block *); int (*statfs) (struct dentry *, struct kstatfs *); int (*remount_fs) (struct super_block *, int *, char *); void (*umount_begin) (struct super_block *); int (*show_options)(struct seq_file *, struct dentry *); int (*show_devname)(struct seq_file *, struct dentry *); int (*show_path)(struct seq_file *, struct dentry *); int (*show_stats)(struct seq_file *, struct dentry *); #ifdef CONFIG_QUOTA ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t); ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); struct dquot **(*get_dquots)(struct inode ); #endif int (bdev_try_to_free_page)(struct super_block, struct page, gfp_t); long (*nr_cached_objects)(struct super_block *, struct shrink_control *); long (*free_cached_objects)(struct super_block *, struct shrink_control *); ANDROID_KABI_RESERVE(1); ANDROID_KABI_RESERVE(2); ANDROID_KABI_RESERVE(3); ANDROID_KABI_RESERVE(4); }; 这些操作方法不用全部实现，下面对部分成员进行说明：
alloc_inode：分配一个inode
destroy_inode：释放一个硬盘上的inode
free_inode：释放内存中的inode
dirty_inode：用于将“脏”块标记的方法
write_inode：写入数据到inode中
drop_inode：当最后一个用户释放inode时调用该函数
put_super：释放超级块，卸载文件系统时调用此函数
sync_fs：同步文件系统，当有“脏页”时，更新数据
statfs：查看文件系统信息，例如魔数、文件名最长多少、页大小

3.2、dentry

dentry翻译过来叫“目录项”。在上面的struct super_block结构体里有一个成员s_root就是struct dentry类型： struct dentry { /* RCU lookup touched fields / unsigned int d_flags; / protected by d_lock / seqcount_spinlock_t d_seq; / per dentry seqlock / struct hlist_bl_node d_hash; / lookup hash list */ struct dentry d_parent; / parent directory */ struct qstr d_name; struct inode d_inode; / Where the name belongs to - NULL is

negative / unsigned char d_iname[DNAME_INLINE_LEN]; / small names / / Ref lookup also touches following / struct lockref d_lockref; / per-dentry lock and refcount */ const struct dentry_operations *d_op; struct super_block d_sb; / The root of the dentry tree / unsigned long d_time; / used by d_revalidate */ void d_fsdata; / fs-specific data / union { struct list_head d_lru; / LRU list */ wait_queue_head_t d_wait; / in-lookup ones only / }; struct list_head d_child; / child of parent list / struct list_head d_subdirs; / our children / /
d_alias and d_rcu can share memory / union { struct hlist_node d_alias; / inode alias list / struct hlist_bl_node d_in_lookup_hash; / only for in-lookup ones */ struct rcu_head d_rcu; } d_u; ANDROID_KABI_RESERVE(1); ANDROID_KABI_RESERVE(2); } __randomize_layout;
d_flags：目录项标志
d_hash：哈希表表项链表
d_parent：父目录
d_name：目录名称
d_inode：指向目录或文件的inode
d_iname：短文件名，当文件名小于DNAME_INLINE_LEN时，文件名存储在数组中
d_op：目录项操作方法集合
d_sb：指向该目录项的超级块指针
d_child：同级目录链表
d_subdirs：子目录项链表对于某些文件系统（如 ext2/ext3/ext4、XFS等），它们在磁盘上有一个物理的超级块。dentry并不存在于磁盘中，它是vfs虚拟文件系统抽象出来的一个对象，它只存在于内存中。目录项是文件系统中用于将文件名与inode号关联起来的数据结构，作用是快速定位文件路径，减少路径解析的时间。 struct dentry中的d_op操作方法集合如下： struct dentry_operations { int (*d_revalidate)(struct dentry *, unsigned int); int (*d_weak_revalidate)(struct dentry *, unsigned int); int (*d_hash)(const struct dentry *, struct qstr *); int (*d_compare)(const struct dentry *, unsigned int, const char *, const struct qstr *); int (*d_delete)(const struct dentry *); int (*d_init)(struct dentry *); void (*d_release)(struct dentry *); void (*d_prune)(struct dentry *); void (*d_iput)(struct dentry *, struct inode *); char *(*d_dname)(struct dentry *, char *, int); struct vfsmount *(*d_automount)(struct path *); int (*d_manage)(const struct path *, bool); struct dentry *(*d_real)(struct dentry *, const struct inode *); void (*d_canonical_path)(const struct path *, struct path *); ANDROID_KABI_RESERVE(1); ANDROID_KABI_RESERVE(2); ANDROID_KABI_RESERVE(3); ANDROID_KABI_RESERVE(4); } ____cacheline_aligned;
d_revalidate：使一个目录项重新生效
d_hash：生成一个哈希值，用于VFS向哈希表中加入一个目录项
d_compare：比较两个目录项名称
d_delete：删除目录项
d_init：初始化目录项
d_release：释放目录项
d_iput：当目录项的inode为NULL时，此时会调用该函数
d_dname：设置目录项名称上面有例举到，在struct dentry中有一个成员是d_inode，这里d_inode就是我们将要介绍的第三个核心对象。

3.3、inode

inode描述了磁盘上的文件信息，将所有文件的索引拿出来组成一个表，即inode表（inode table）。下图展示inode和dentry的关系（图片来自作者李文山的《Linux驱动开发进阶》）：当文件系统需要访问一个文件时，以下步骤会发生：

路径解析：

从根目录开始，逐级解析路径中的每个目录项，找到目标文件的 dentry。
每个目录项在目录文件中存储了文件名和对应的 inode 号。

找到 inode：

通过 dentry 的 d_inode 字段，找到与该文件名关联的 inode 对象。
如果 inode 对象尚未加载到内存中，文件系统会从磁盘读取对应的 inode 数据，并将其加载到内存中的 VFS inode 结构中。

访问文件数据：

通过 inode 中的数据块指针，找到文件数据在磁盘上的实际存储位置。
文件系统通过这些数据块指针读取或写入文件的实际数据。我们通过struct inode来看看inode存储了什么： struct inode { umode_t i_mode; unsigned short i_opflags; kuid_t i_uid; kgid_t i_gid; unsigned int i_flags; #ifdef CONFIG_FS_POSIX_ACL struct posix_acl *i_acl; struct posix_acl *i_default_acl; #endif const struct inode_operations *i_op; struct super_block *i_sb; struct address_space *i_mapping; #ifdef CONFIG_SECURITY void i_security; #endif / Stat data, not accessed from path walking / unsigned long i_ino; /
Filesystems may only read i_nlink directly. They shall use the
following functions for modification:
(set|clear|inc|drop)_nlink
inode_(inc|dec)_link_count / union { const unsigned int i_nlink; unsigned int __i_nlink; }; dev_t i_rdev; loff_t i_size; struct timespec64 i_atime; struct timespec64 i_mtime; struct timespec64 i_ctime; spinlock_t i_lock; / i_blocks, i_bytes, maybe i_size / unsigned short i_bytes; u8 i_blkbits; u8 i_write_hint; blkcnt_t i_blocks; #ifdef __NEED_I_SIZE_ORDERED seqcount_t i_size_seqcount; #endif / Misc / unsigned long i_state; struct rw_semaphore i_rwsem; unsigned long dirtied_when; / jiffies of first dirtying / unsigned long dirtied_time_when; struct hlist_node i_hash; struct list_head i_io_list; / backing dev IO list */ #ifdef CONFIG_CGROUP_WRITEBACK struct bdi_writeback i_wb; / the associated cgroup wb / / foreign inode detection, see wbc_detach_inode() / int i_wb_frn_winner; u16 i_wb_frn_avg_time; u16 i_wb_frn_history; #endif struct list_head i_lru; / inode LRU list / struct list_head i_sb_list; struct list_head i_wb_list; / backing dev writeback list / union { struct hlist_head i_dentry; struct rcu_head i_rcu; }; atomic64_t i_version; atomic64_t i_sequence; / see futex / atomic_t i_count; atomic_t i_dio_count; atomic_t i_writecount; #if defined(CONFIG_IMA) || defined(CONFIG_FILE_LOCKING) atomic_t i_readcount; / struct files open RO */ #endif union { const struct file_operations i_fop; / former ->i_op->default_file_ops */ void (*free_inode)(struct inode *); }; struct file_lock_context *i_flctx; struct address_space i_data; struct list_head i_devices; union { struct pipe_inode_info *i_pipe; struct block_device *i_bdev; struct cdev *i_cdev; char i_link; unsigned i_dir_seq; }; __u32 i_generation; #ifdef CONFIG_FSNOTIFY __u32 i_fsnotify_mask; / all events this inode cares about */ struct fsnotify_mark_connector __rcu *i_fsnotify_marks; #endif #ifdef CONFIG_FS_ENCRYPTION struct fscrypt_info *i_crypt_info; #endif #ifdef CONFIG_FS_VERITY struct fsverity_info *i_verity_info; #endif void i_private; / fs or device private pointer */ ANDROID_KABI_RESERVE(1); ANDROID_KABI_RESERVE(2); } __randomize_layout;
i_mode：文件的访问权限
i_op：inode操作函数集合
i_sb：超级块指针，指向文件系统的超级块
i_mapping：地址映射描述
i_nlink：硬连接数目
i_rdev：设备号，在Linux中，所有的设备即是文件
i_size：文件大小，单位字节
i_atime：最后访问时间
i_mtime：最后修改时间
i_ctime：最后改变时间
i_blkbits：块大小表示位数，例如块为4096字节时，此时值为12
i_blocks：文件所占用的block数量
i_hash：哈希表
i_dentry：目录项链表
i_fop：文件操作方法集合
i_data：设备数据地址映射
i_devices：块设备链表
i_pipe：管道文件
i_cdev：字符设备文件
i_link：连接文件
i_private：私有指针，一般用来存放数据块的首地址其中成员i_op为inode的操作集合： struct inode_operations { struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int); const char * (*get_link) (struct dentry *, struct inode *, struct delayed_call *); int (*permission) (struct inode *, int); struct posix_acl * (*get_acl)(struct inode *, int); int (*readlink) (struct dentry *, char __user *,int); int (*create) (struct inode *,struct dentry *, umode_t, bool); int (*link) (struct dentry *,struct inode *,struct dentry *); int (*unlink) (struct inode *,struct dentry *); int (*symlink) (struct inode *,struct dentry *,const char *); int (*mkdir) (struct inode *,struct dentry *,umode_t); int (*rmdir) (struct inode *,struct dentry *); int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t); int (*rename) (struct inode *, struct dentry *, struct inode *, struct dentry *, unsigned int); int (*setattr) (struct dentry *, struct iattr *); int (*getattr) (const struct path *, struct kstat *, u32, unsigned int); ssize_t (*listxattr) (struct dentry *, char *, size_t); int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len); int (*update_time)(struct inode *, struct timespec64 *, int); int (*atomic_open)(struct inode *, struct dentry *, struct file *, unsigned open_flag, umode_t create_mode); int (*tmpfile) (struct inode *, struct dentry *, umode_t); int (*set_acl)(struct inode *, struct posix_acl *, int); ANDROID_KABI_RESERVE(1); ANDROID_KABI_RESERVE(2); ANDROID_KABI_RESERVE(3); ANDROID_KABI_RESERVE(4); } ____cacheline_aligned;
lookup：在dentry下查找inode
create：在dentry下创建一个inode
link：为一个indoe创建一个inode
unlink：删除一个连接文件
mkdir：在dentry下创建一个目录inode
rmdir：在dentry下删除一个inode
mknod：创建设备节点
update_time：更新文件时间

3.4、file

file对象是与进程相关的文件描述符的内核表示。它是文件系统和进程之间交互的核心数据结构之一。file对象是Linux内核中用于表示打开文件的结构体。当进程通过系统调用（如 open()）打开文件时，内核会创建一个file对象，并将其添加到进程的文件描述符表中。当进程通过系统调用（如 close()）关闭文件时，内核会释放对应的file对象。file结构体如下： struct file { union { struct llist_node fu_llist; struct rcu_head fu_rcuhead; } f_u; struct path f_path; struct inode f_inode; / cached value */ const struct file_operations f_op; /

Protects f_ep_links, f_flags.
Must not be taken from IRQ context. */ spinlock_t f_lock; enum rw_hint f_write_hint; atomic_long_t f_count; unsigned int f_flags; fmode_t f_mode; struct mutex f_pos_lock; loff_t f_pos; struct fown_struct f_owner; const struct cred *f_cred; struct file_ra_state f_ra; u64 f_version; #ifdef CONFIG_SECURITY void f_security; #endif / needed for tty driver, and maybe others */ void private_data; #ifdef CONFIG_EPOLL / Used by fs/eventpoll.c to link all the hooks to this file / struct list_head f_ep_links; struct list_head f_tfile_llink; #endif / #ifdef CONFIG_EPOLL */ struct address_space f_mapping; errseq_t f_wb_err; errseq_t f_sb_err; / for syncfs / ANDROID_KABI_RESERVE(1); ANDROID_KABI_RESERVE(2); ANDROID_OEM_DATA(1); } __randomize_layout attribute((aligned(4))); / lest something weird decides that 2 is OK */
f_path：文件路径，包含了dentry和vfsmout
f_inode：执行inode指针
f_op：文件操作方法集合
f_count：文件对象使用计数
f_flags：文件被打开时指定的标志，例如O_RDONLY，O_WRONLY
f_mode：文件读写权限
f_pos：文件当前的偏移量，即当前读写的位置相对于文件开始地址的偏移
f_owner：文件所有者
private_data：私有数据指针，较为常用
f_mapping：文件的页缓冲映射地址

4、文件系统的挂载

当一个磁盘上的分区被挂载时，此时Linux内核会扫描该磁盘上对应的分区的所有索引节点(inode)，然后创建struct mount结构体和dentry对象，并将所有的超级块信息保存在struct superblock结构体中，并将所有的inode信息以链表的形式保存在struct inode结构体中，整个过程就建立了从struct mount到inode之间的关系。

5、文件系统的注册

内核维护一个全局链表 file_systems，用于存储所有已注册的文件系统类型。每个文件系统类型通过 file_system_type 结构体注册到这个链表中。

5.1、文件系统的注册过程

5.1.2、定义文件系统类型

文件系统开发者需要定义一个 file_system_type 结构体实例，并实现必要的操作函数（如挂载和卸载函数）。 static struct file_system_type myfs_type = { .owner = THIS_MODULE, .name = “myfs”, .mount = myfs_mount, .kill_sb = myfs_kill_sb, .fs_flags = FS_REQUIRES_DEV, };

5.1.3、注册文件系统

使用 register_filesystem() 函数将文件系统类型注册到内核中。这通常在文件系统模块加载时完成。 static int __init myfs_init(void) { return register_filesystem(&myfs_type); } int register_filesystem(struct file_system_type * fs) { int res = 0; struct file_system_type ** p; if (fs->parameters && !fs_validate_description(fs->name, fs->parameters)) return -EINVAL; BUG_ON(strchr(fs->name, ‘.’)); if (fs->next) return -EBUSY; write_lock(&file_systems_lock); p = find_filesystem(fs->name, strlen(fs->name)); if (*p) res = -EBUSY; else *p = fs; write_unlock(&file_systems_lock); return res; }

5.1.4、注销文件系统

使用 unregister_filesystem() 函数将文件系统从内核中注销。这通常在文件系统模块卸载时完成。 static void __exit myfs_exit(void) { unregister_filesystem(&myfs_type); } int unregister_filesystem(struct file_system_type * fs) { struct file_system_type ** tmp; write_lock(&file_systems_lock); tmp = &file_systems; while (*tmp) { if (fs == *tmp) { *tmp = fs->next; fs->next = NULL; write_unlock(&file_systems_lock); synchronize_rcu(); return 0; } tmp = &(*tmp)->next; } write_unlock(&file_systems_lock); return -EINVAL; }

5.2、文件系统的挂载与注册的关系

挂载前的注册：在文件系统被挂载之前，它必须先注册到内核中。只有注册过的文件系统类型才能被挂载。
挂载时的识别：当用户尝试挂载一个文件系统时（如通过 mount 命令），内核会遍历 file_systems 链表，查找匹配的文件系统类型。
动态加载：某些文件系统（如通过模块加载的文件系统）可以在运行时动态注册和注销。例如，ntfs 文件系统可以通过加载 ntfs.ko 模块动态注册。

6、实现一个虚拟文件系统

注：程序源码一样来自李文山的《Linux驱动开发进阶》：但上述源码是基于6.1的kernel，我实验的环境是5.x的kernel，部分接口函数会不一样。所以下面展示的源码是略有修改的。

6.1、定义文件系统结构

一般需要自定义开发文件系统时，可能才需要编写文件系统驱动程序。下图是本次要实现的虚拟文件系统结构（图片来自作者李文山的《Linux驱动开发进阶》）：

6.2、完整的驱动程序

#include #include #include #include #include #include #include #define MEMFS_INVALID 0xFFFFFFFF #define MEMFS_FILE_NAME_MAX 16 #define MEMFS_INODE_MAX 128 #define MEMFS_BLK_MAX 128 #define MEMFS_FILE_SIZE_MAX 1024 struct memfs_sb { uint32_t blk_size_bit; // 块大小占用的位数 uint32_t block_size; // 数据块大小 uint32_t magic; // 文件系统魔数 uint32_t private; // }; struct memfs_inode { char file_name[MEMFS_FILE_NAME_MAX]; // 文件名称 uint32_t mode; // 记录文件或者文件夹的读写权限 uint32_t idx; // 记录文件或者文件夹在inode bitmap的索引节点号 uint32_t child; // 记录当前文件夹下的目录或者文件的第一个(=MEMFS_INVALID: no child) uint32_t brother; // 记录当前文件或目录的同级目录或者文件(=MEMFS_INVALID: no brother) uint32_t file_size; // uint32_t data; // =MEMFS_INVALID: dir 0~127: file }; struct memfs_block { char data[1024]; }; struct memfs_sb g_mf_sb = { .blk_size_bit = 10, .block_size = 1024, .magic = 0x20221001, }; char g_inode_bitmap[16]={0}; // 128bit空间的一个inode bitmap, 每个bit都是一个开关量，标记着inode池中对应的inode是否已使用 char g_block_bitmap[16]={0}; // 128bit空间的一个block bitmap static struct memfs_inode *g_mf_inode; // 指向inode池 static struct memfs_block *g_mf_block; // 指向block池 static struct inode_operations memfs_inode_ops; void set_bitmap(char *bitmap, uint32_t index) { *(bitmap + (index»3)) |= (1« (index%8)); } void reset_bitmap(char bitmap, uint32_t index) { (bitmap + (index»3)) &= ~(1« (index%8)); } uint32_t get_idle_index(char bitmap) { uint8_t tmp; for(int i = 0; i < 16; i++) // 循环检查16个字节 { if(bitmap[i] != 0xFF) // 如果该字节(8个bit)存在空闲bit { tmp = bitmap[i]; for(int j = 0; j<8; j++) // 逐位检查8个bit { if((tmp & 0x1) == 0) // 找到空闲bit { set_bitmap(bitmap, i8 + j); // 设置改bit为非空闲 return i8 + j; // 返回下标 } else { tmp »= 1; } } } } return MEMFS_INVALID; } void put_used_index(char bitmap, uint32_t index) { reset_bitmap(bitmap, index); } int memfs_alloc_mem(void) { / 分配inode池，大小5120bytes / g_mf_inode = kzalloc(5120, GFP_KERNEL); if(!g_mf_inode) return -1; / 分配block池，大小128KB / g_mf_block = kzalloc(1281024, GFP_KERNEL); if(!g_mf_block) return -1; / 初始化inode的brother和child属性 */ for(int i = 0; i < MEMFS_INODE_MAX; i++) { g_mf_inode[i].brother = MEMFS_INVALID; g_mf_inode[i].child = MEMFS_INVALID; } return 0; } void memfs_free_mem(void) { kfree(g_mf_inode); kfree(g_mf_block); } static int memfs_readdir(struct file *filp, struct dir_context *ctx) { struct memfs_inode *mf_inode, *child_inode; if (ctx->pos) return 0; mf_inode = &g_mf_inode[filp->f_path.dentry->d_inode->i_ino]; if (!S_ISDIR(mf_inode->mode)) { return -ENOTDIR; } if(mf_inode->child != MEMFS_INVALID) { child_inode = &g_mf_inode[mf_inode->child]; } else { return 0; } while(child_inode->idx != MEMFS_INVALID) { if (!dir_emit(ctx, child_inode->file_name, MEMFS_FILE_NAME_MAX, child_inode->idx, DT_UNKNOWN)) { return 0; } ctx->pos += sizeof(struct memfs_inode); if(child_inode->brother != MEMFS_INVALID) child_inode = &g_mf_inode[child_inode->brother]; else break; } return 0; } ssize_t memfs_read_file(struct file * filp, char __user * buf, size_t len, loff_t *ppos) { struct memfs_inode *inode; char *buffer; inode = &g_mf_inode[filp->f_path.dentry->d_inode->i_ino]; // 获取实际要操作的inode if (ppos >= inode->file_size) return 0; buffer = (char)&g_mf_block[inode->data]; // 获取block池中对应位置的首地址 buffer += *ppos; // len = min((size_t)(inode->file_size - *ppos), len); if (copy_to_user(buf, buffer, len)) // 拷贝到用户态 { return -EFAULT; } *ppos += len; // 更新偏移 return len; } ssize_t memfs_write_file(struct file *filp, const char __user *buf, size_t len, loff_t *ppos) { struct memfs_inode *inode; char *buffer; inode = &g_mf_inode[filp->f_path.dentry->d_inode->i_ino]; // 获取实际要操作的inode if (ppos + len > MEMFS_FILE_SIZE_MAX ) return 0; buffer = (char)&g_mf_block[inode->data]; // 获取block池中对应位置的首地址 buffer += *ppos; // if (copy_from_user(buffer, buf, len)) { return -EFAULT; } *ppos += len; // 更新偏移 inode->file_size = *ppos; // 更新文件大小 return len; } const struct file_operations memfs_file_operations = { .read = memfs_read_file, .write = memfs_write_file, }; const struct file_operations memfs_dir_operations = { .owner = THIS_MODULE, .iterate_shared = memfs_readdir, }; //dir: 当前目录的inode //dentry：要创建的文件的dentry //mode：要创建的文件的mode static int memfs_do_create(struct inode *dir, struct dentry *dentry, umode_t mode) { struct inode *inode; struct super_block *sb; struct memfs_inode *mf_inode, *p_mf_inode, tmp_mf_inode; uint32_t idx_inode; / 获取sb指针 / sb = dir->i_sb; / 判断是否是目录和常规文件，如果不是，返回错误 / if (!S_ISDIR(mode) && !S_ISREG(mode)) { return -EINVAL; } if (strlen(dentry->d_name.name) > MEMFS_FILE_NAME_MAX) { return -ENAMETOOLONG; } inode = new_inode(sb); if (!inode) { return -ENOMEM; } / 初始化现在要创建的inode的sb / idx_inode = get_idle_index(g_inode_bitmap); // 获取一个空闲的inode，用于保存当前创建的目录或者文件的inode信息 if (idx_inode == MEMFS_INVALID) { return -ENOSPC; } inode->i_sb = sb; inode->i_op = &memfs_inode_ops; // 初始化当前的inode的ops inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); // 初始化创建时间和修改时间为当前时间 inode->i_ino = idx_inode; mf_inode = &g_mf_inode[idx_inode]; // mf_inode->idx = idx_inode; // mf_inode->mode = mode; / 接下来都是inode的初始化 */ if (S_ISDIR(mode)) // 如果创建的是一个文件，则分配一个block，如果是一个目录则不用分配block { mf_inode->data = MEMFS_INVALID; inode->i_fop = &memfs_dir_operations; } else if (S_ISREG(mode)) { mf_inode->child = MEMFS_INVALID; mf_inode->file_size = 0; inode->i_fop = &memfs_file_operations; mf_inode->data = get_idle_index(g_block_bitmap); if(mf_inode->data == MEMFS_INVALID) { return -ENOSPC; } } p_mf_inode = &g_mf_inode[dir->i_ino]; // 获取当前新创建的父目录节点 if(p_mf_inode->child == MEMFS_INVALID) // 当前目录为空目录 { p_mf_inode->child = mf_inode->idx; // 父目录节点的child直接指向现在要创建的inode } else // 非空目录，找到最后一个child { tmp_mf_inode = &g_mf_inode[p_mf_inode->child]; // 第一个child while(tmp_mf_inode->brother != MEMFS_INVALID) { tmp_mf_inode = &g_mf_inode[tmp_mf_inode->brother]; // 找到父目录最后一个child } tmp_mf_inode->brother = mf_inode->idx; // 最后一个child，并设置brother } strcpy(mf_inode->file_name, dentry->d_name.name); // 初始化内核的dentry名称 inode_init_owner(inode, dir, mode); // 添加inode到dir中 d_add(dentry, inode); // 绑定内核dentry与inode return 0; } static int memfs_inode_mkdir(struct inode *dir, struct dentry *direntry, umode_t mode) { return memfs_do_create(dir, direntry, S_IFDIR | mode); } static int memfs_inode_create(struct inode *dir, struct dentry *direntry, umode_t mode, bool excl) { return memfs_do_create(dir, direntry, mode); } //parent_inode: 父目录节点 //find_dentry: 要查找的dentry static struct dentry *memfs_inode_lookup(struct inode *parent_inode, struct dentry *find_dentry, unsigned int flags) { return NULL; } //删除空目录 //dentry: 待删除空目录的dentry int memfs_inode_rmdir(struct inode *dir, struct dentry *dentry) { uint32_t index = dentry->d_inode->i_ino; // 待删除的空目录的inode下标 struct memfs_inode *p_mf_inode, *child_mf_inode, *tmp_mf_inode; if(g_mf_inode[index].child != MEMFS_INVALID) // 如果是非空目录，返回错误 return -ENOTEMPTY; p_mf_inode = &g_mf_inode[dir->i_ino]; // 获取当前目录inode child_mf_inode = &g_mf_inode[p_mf_inode->child]; // 获取父目录inode的第一个child put_used_index(g_inode_bitmap, index); if(p_mf_inode->child == index) // 如果要删除的空目录是父目录的第一个child { if(child_mf_inode->brother == MEMFS_INVALID) // 如果当前node没有brother了 p_mf_inode->child = MEMFS_INVALID; // 那么父目录inode也不会有child else p_mf_inode->child = child_mf_inode->brother; // 否则父目录inode指向当前inode的brother } else // 如果要删除的空目录不是父目录的第一个child { while(child_mf_inode->idx != MEMFS_INVALID) { if(child_mf_inode->brother != MEMFS_INVALID) { tmp_mf_inode = child_mf_inode; child_mf_inode = &g_mf_inode[child_mf_inode->brother]; // 获取brother if(child_mf_inode->idx == index) // 找到待删除的空目录了 { if(child_mf_inode->brother != MEMFS_INVALID) tmp_mf_inode->brother = child_mf_inode->brother; // 在链表关系中，移除了待删除的这个空目录 else tmp_mf_inode->brother = MEMFS_INVALID; break; } } } } g_mf_inode[index].idx = MEMFS_INVALID; g_mf_inode[index].brother = MEMFS_INVALID; return simple_unlink(dir, dentry); } //删除文件操作 int memfs_inode_unlink(struct inode *dir, struct dentry *dentry) { uint32_t index = dentry->d_inode->i_ino; struct memfs_inode *p_mf_inode, *child_mf_inode, tmp_mf_inode; p_mf_inode = &g_mf_inode[dir->i_ino]; //获取第一个child child_mf_inode = &g_mf_inode[p_mf_inode->child]; put_used_index(g_inode_bitmap, index); put_used_index(g_block_bitmap, g_mf_inode[index].data); if(p_mf_inode->child == index) { if(child_mf_inode->brother == MEMFS_INVALID) p_mf_inode->child = MEMFS_INVALID; else p_mf_inode->child = child_mf_inode->brother; } else { while(child_mf_inode->idx != MEMFS_INVALID) { if(child_mf_inode->brother != MEMFS_INVALID) { tmp_mf_inode = child_mf_inode; child_mf_inode = &g_mf_inode[child_mf_inode->brother]; if(child_mf_inode->idx == index) { if(child_mf_inode->brother != MEMFS_INVALID) tmp_mf_inode->brother = child_mf_inode->brother; else tmp_mf_inode->brother = MEMFS_INVALID; break; } } } } g_mf_inode[index].idx = MEMFS_INVALID; g_mf_inode[index].brother = MEMFS_INVALID; return simple_unlink(dir, dentry); } static struct inode_operations memfs_inode_ops = { .create = memfs_inode_create, // 在dentry下创建一个inode .lookup = memfs_inode_lookup, // 在dentry下查找inode .mkdir = memfs_inode_mkdir, // 在dentry下创建一个目录inode .rmdir = memfs_inode_rmdir, // 在dentry下删除一个inode .unlink = memfs_inode_unlink, // 删除文件 }; / 每挂载一个块设备，都需要初始化相应的超级块，

该函数就是初始化超级块。 */ static int memfs_demo_fill_super(struct super_block *sb, void *data, int silent) { struct inode root_inode; int mode = S_IFDIR | 0755; root_inode = new_inode(sb); // 新建inode，用于保存根节点 root_inode->i_ino = 0; // 设置根节点的编号为0 root_inode->i_mode = mode; // 初始化根节点权限 root_inode->i_sb = sb; // 设置根节点的超级块 root_inode->i_op = &memfs_inode_ops; // 设置根节点的节点操作集合 root_inode->i_fop = &memfs_dir_operations; // 设置根节点目录操作集合 root_inode->i_atime = root_inode->i_mtime = root_inode->i_ctime = current_time(root_inode); //设置根节点的创建修改时间为当前时间 / 初始化inode池中的第0个inode，第0个inode就是根节点 / strcpy(g_mf_inode[0].file_name, “memfs”); g_mf_inode[0].mode = mode; g_mf_inode[0].idx = 0; g_mf_inode[0].child = MEMFS_INVALID; g_mf_inode[0].brother = MEMFS_INVALID; set_bitmap(g_inode_bitmap, g_mf_inode[0].idx); // 置inode bitmap的第0位为1 root_inode->i_private = &g_mf_inode[0]; // 将根节点保存到root_inode的私有数据中 / 初始化磁盘的描述信息(super block) */ sb->s_root = d_make_root(root_inode); // 设置上面分配的inode为根目录 sb->s_magic = g_mf_sb.magic; // 文件系统魔数 sb->s_blocksize_bits = g_mf_sb.blk_size_bit; // 页大小所占的位数为12 sb->s_blocksize = g_mf_sb.blk_size_bit; // 页大小为1024Bytes return 0; } static struct dentry *memfs_demo_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { return mount_nodev(fs_type, flags, data, memfs_demo_fill_super); } static void memfs_kill_sb(struct super_block sb) { kill_anon_super(sb); } / 文件系统类型 / static struct file_system_type memfs_type = { .owner = THIS_MODULE, .name = “memfs”, // 文件系统名字 .mount = memfs_demo_mount, // 挂载文件系统时，所执行的函数 .kill_sb = memfs_kill_sb, // 卸载文件系统时，所执行的函数 }; static int __init memfs_demo_init(void) { / 分配inode池和block池 / if(memfs_alloc_mem()) { printk(KERN_ERR “alloc memory failed\n”); return -ENOMEM; } / 注册文件系统 / return register_filesystem(&memfs_type); } static void __exit memfs_demo_exit(void) { / 释放inode池和block池 / memfs_free_mem(); / 卸载文件系统 */ unregister_filesystem(&memfs_type); } module_init(memfs_demo_init); module_exit(memfs_demo_exit); MODULE_LICENSE(“GPL”); MODULE_AUTHOR(“ ”); MODULE_DESCRIPTION(“memory fs demo”); MODULE_ALIAS(“fs:memfs”); MODULE_IMPORT_NS(VFS_internal_I_am_really_a_filesystem_and_am_NOT_a_driver); Makefile如下： KERN_DIR = /home/cohen/sdk/docker/rk356x-sdk/kernel/ all: make -C $(KERN_DIR) M=$(PWD) modules clean: make -C $(KERN_DIR) M=$(PWD) clean rm -f modules.order obj-m += my_memfs.o ccflags-y += -std=gnu99

6.3、测试

将编译好的驱动程序拷贝到板卡，安装驱动： insmod my_memfs.ko 此时，已经将一个名为memfs的虚拟文件系统注册进了内核。
挂载文件系统：

-t 表示文件系统的类型

none 表示没有IO设备

/mnt 为挂载点

mount -t memfs none /mnt 3. 进入/mnt，创建文件： 4. 创建目录： 5. 删除空目录： 6. 删除文件：