Linux驱动开发进阶-文件系统
Linux驱动开发进阶 - 文件系统
1、前言
- 学习参考书籍:
- 本文是为了学习上述书籍时,不能囫囵吞枣,才写的。等实际遇到问题了,我也只会去回看原书籍。所以本文不太具备教学功能。
2、学习目标
在Linux中,文件系统可以分为两大类:虚拟文件系统(如sysfs、procfs、devtmpfs)和实际物理存储设备的文件系统(如ext2、ext3、ext4、vfat、fat32)。那Linux如何管理这些文件系统呢?同时本文将在最后编写一个虚拟的文件系统驱动程序。
3、VFS虚拟文件系统
Linux内核的设计哲学非常注重抽象和模块化,这种设计使得系统更加灵活、可扩展且易于维护。Linux为了管理各种类型的文件系统(sysfs、procfs、ext2、ext3、ext4、fat32、…),抽象出了
VFS(Virtual File System Switch,虚拟文件系统切换层)。
一个功能的诞生肯定是为了解决实际问题,那VFS虚拟文件系统的诞生也是为了管理众多不同的文件系统。在Linux的VFS虚拟文件系统中,有四个核心对象是理解和实现文件系统的关键,分别是:超级块(Super
Block)、索引节点(Inode)、目录项(Dentry)和文件对象(File)。
3.1、超级块(Super Block)
在Linux文件系统中,许多文件系统本身就存在显示的超级块,如ext2、ext3、ext4。它们在分区的第一个块(或前几个块)中存储了超级块。超级块包含了文件系统的元数据,如总块数、总Inode数、块大小、文件系统状态等。 但不是所有的文件系统都显示存在超级块,如Windows下常用的文件系统,如ntfs、fat32等,它们有自己的结构来存储文件系统的元数据和管理信息,这些结构在功能上类似于超级块,但名称和具体实现不同。 在Linux下,无论该文件系统是否存在超级块,挂载时必须初始化超级块数据结构。 struct super_block { struct list_head s_list; /* Keep this first / dev_t s_dev; / search index; not kdev_t / unsigned char s_blocksize_bits; unsigned long s_blocksize; loff_t s_maxbytes; / Max file size */ struct file_system_type *s_type; const struct super_operations *s_op; const struct dquot_operations *dq_op; const struct quotactl_ops s_qcop; const struct export_operations s_export_op; unsigned long s_flags; unsigned long s_iflags; / internal SB_I_ flags */ unsigned long s_magic; struct dentry *s_root; struct rw_semaphore s_umount; int s_count; atomic_t s_active; #ifdef CONFIG_SECURITY void *s_security; #endif const struct xattr_handler **s_xattr; #ifdef CONFIG_FS_ENCRYPTION const struct fscrypt_operations s_cop; #ifdef GENKSYMS /
- Android ABI CRC preservation due to commit 391cceee6d43 (“fscrypt:
- stop using keyrings subsystem for fscrypt_master_key”) changing this
- type. Size is the same, this is a private field. */ struct key s_master_keys; / master crypto keys in use */ #else struct fscrypt_keyring s_master_keys; / master crypto keys in use */ #endif #endif #ifdef CONFIG_FS_VERITY const struct fsverity_operations *s_vop; #endif #ifdef CONFIG_UNICODE struct unicode_map s_encoding; __u16 s_encoding_flags; #endif struct hlist_bl_head s_roots; / alternate root dentries for NFS / struct list_head s_mounts; / list of mounts; not for fs use */ struct block_device *s_bdev; struct backing_dev_info *s_bdi; struct mtd_info s_mtd; struct hlist_node s_instances; unsigned int s_quota_types; / Bitmask of supported quota types / struct quota_info s_dquot; / Diskquota specific options / struct sb_writers s_writers; /
- Keep s_fs_info, s_time_gran, s_fsnotify_mask, and
- s_fsnotify_marks together for cache efficiency. They are frequently
- accessed and rarely modified. */ void s_fs_info; / Filesystem private info / / Granularity of c/m/atime in ns (cannot be worse than a second) / u32 s_time_gran; / Time limits for c/m/atime in seconds */ time64_t s_time_min; time64_t s_time_max; #ifdef CONFIG_FSNOTIFY __u32 s_fsnotify_mask; struct fsnotify_mark_connector __rcu s_fsnotify_marks; #endif char s_id[32]; / Informational name / uuid_t s_uuid; / UUID / unsigned int s_max_links; fmode_t s_mode; /
- The next field is for VFS only. No filesystems have any business
- even looking at it. You had been warned. / struct mutex s_vfs_rename_mutex; / Kludge / /
- Filesystem subtype. If non-empty the filesystem type field
- in /proc/mounts will be “type.subtype” */ const char *s_subtype; const struct dentry_operations s_d_op; / default d_op for dentries / /
- Saved pool identifier for cleancache (-1 means none) / int cleancache_poolid; struct shrinker s_shrink; / per-sb shrinker handle / / Number of inodes with nlink == 0 but still referenced / atomic_long_t s_remove_count; / Pending fsnotify inode refs / atomic_long_t s_fsnotify_inode_refs; / Being remounted read-only / int s_readonly_remount; / per-sb errseq_t for reporting writeback errors via syncfs / errseq_t s_wb_err; / AIO completions deferred from interrupt context */ struct workqueue_struct s_dio_done_wq; struct hlist_head s_pins; /
- Owning user namespace and default context in which to
- interpret filesystem uids, gids, quotas, device nodes,
- xattrs and security labels. */ struct user_namespace s_user_ns; /
- The list_lru structure is essentially just a pointer to a table
- of per-node lru lists, each of which has its own spinlock.
- There is no need to put them into separate cachelines. / struct list_lru s_dentry_lru; struct list_lru s_inode_lru; struct rcu_head rcu; struct work_struct destroy_work; struct mutex s_sync_lock; / sync serialisation lock / /
- Indicates how deep in a filesystem stack this SB is / int s_stack_depth; / s_inode_list_lock protects s_inodes / spinlock_t s_inode_list_lock ____cacheline_aligned_in_smp; struct list_head s_inodes; / all inodes / spinlock_t s_inode_wblist_lock; struct list_head s_inodes_wb; / writeback inodes */ ANDROID_KABI_RESERVE(1); ANDROID_KABI_RESERVE(2); ANDROID_KABI_RESERVE(3); ANDROID_KABI_RESERVE(4); } __randomize_layout;
s_list
:链表结构体,包含prev和next指针,用来连接前驱和后继节点s_dev
:块设备标识符,例如/dev/sda、/dev/nvmes_blocksize_bits
:块大小占用的位数,例如块大小为4096则该值为12s_blocksize
:数据块大小,单位为字节s_maxbytes
:文件的最大长度,单位字节s_type
:文件系统类型s_op
:超级块操作方法,指向具体的文件系统dp_op
:和s_op一样,但用于特定操作方法s_qcop
:和s_op一样,但用于配置磁盘的特定的操作方法s_flags
:文件系统是否安装标志s_magic
:文件系统魔数,每个文件系统都有各自的魔数s_root
:文件系统的根目录文件s_umount
:用于文件系统对文件进行读写同步s_count
:超级块的使用计数s_active
:超级块的引用计数s_security
:用于安全的私有指针s_d_op
:dentry操作方法集合 其中超级块的操作函数结构体struct super_operations内容如下: struct super_operations { struct inode *(*alloc_inode)(struct super_block *sb); void (*destroy_inode)(struct inode *); void (*free_inode)(struct inode *); void (*dirty_inode) (struct inode *, int flags); int (*write_inode) (struct inode *, struct writeback_control *wbc); int (*drop_inode) (struct inode *); void (*evict_inode) (struct inode *); void (*put_super) (struct super_block *); int (*sync_fs)(struct super_block *sb, int wait); int (*freeze_super) (struct super_block *); int (*freeze_fs) (struct super_block *); int (*thaw_super) (struct super_block *); int (*unfreeze_fs) (struct super_block *); int (*statfs) (struct dentry *, struct kstatfs *); int (*remount_fs) (struct super_block *, int *, char *); void (*umount_begin) (struct super_block *); int (*show_options)(struct seq_file *, struct dentry *); int (*show_devname)(struct seq_file *, struct dentry *); int (*show_path)(struct seq_file *, struct dentry *); int (*show_stats)(struct seq_file *, struct dentry *); #ifdef CONFIG_QUOTA ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t); ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); struct dquot **(*get_dquots)(struct inode ); #endif int (bdev_try_to_free_page)(struct super_block, struct page, gfp_t); long (*nr_cached_objects)(struct super_block *, struct shrink_control *); long (*free_cached_objects)(struct super_block *, struct shrink_control *); ANDROID_KABI_RESERVE(1); ANDROID_KABI_RESERVE(2); ANDROID_KABI_RESERVE(3); ANDROID_KABI_RESERVE(4); }; 这些操作方法不用全部实现,下面对部分成员进行说明:alloc_inode
:分配一个inodedestroy_inode
:释放一个硬盘上的inodefree_inode
:释放内存中的inodedirty_inode
:用于将“脏”块标记的方法write_inode
:写入数据到inode中drop_inode
:当最后一个用户释放inode时调用该函数put_super
:释放超级块,卸载文件系统时调用此函数sync_fs
:同步文件系统,当有“脏页”时,更新数据statfs
:查看文件系统信息,例如魔数、文件名最长多少、页大小
3.2、dentry
dentry翻译过来叫“目录项”。在上面的struct super_block结构体里有一个成员s_root就是struct dentry类型: struct dentry { /* RCU lookup touched fields / unsigned int d_flags; / protected by d_lock / seqcount_spinlock_t d_seq; / per dentry seqlock / struct hlist_bl_node d_hash; / lookup hash list */ struct dentry d_parent; / parent directory */ struct qstr d_name; struct inode d_inode; / Where the name belongs to - NULL is
- negative / unsigned char d_iname[DNAME_INLINE_LEN]; / small names / / Ref lookup also touches following / struct lockref d_lockref; / per-dentry lock and refcount */ const struct dentry_operations *d_op; struct super_block d_sb; / The root of the dentry tree / unsigned long d_time; / used by d_revalidate */ void d_fsdata; / fs-specific data / union { struct list_head d_lru; / LRU list */ wait_queue_head_t d_wait; / in-lookup ones only / }; struct list_head d_child; / child of parent list / struct list_head d_subdirs; / our children / /
- d_alias and d_rcu can share memory / union { struct hlist_node d_alias; / inode alias list / struct hlist_bl_node d_in_lookup_hash; / only for in-lookup ones */ struct rcu_head d_rcu; } d_u; ANDROID_KABI_RESERVE(1); ANDROID_KABI_RESERVE(2); } __randomize_layout;
d_flags
:目录项标志d_hash
:哈希表表项链表d_parent
:父目录d_name
:目录名称d_inode
:指向目录或文件的inoded_iname
:短文件名,当文件名小于DNAME_INLINE_LEN时,文件名存储在数组中d_op
:目录项操作方法集合d_sb
:指向该目录项的超级块指针d_child
:同级目录链表d_subdirs
:子目录项链表 对于某些文件系统(如 ext2/ext3/ext4、XFS等),它们在磁盘上有一个物理的超级块。dentry并不存在于磁盘中,它是vfs虚拟文件系统抽象出来的一个对象,它只存在于内存中。目录项是文件系统中用于将文件名与inode号关联起来的数据结构,作用是快速定位文件路径,减少路径解析的时间。struct dentry中的d_op操作方法集合如下: struct dentry_operations { int (*d_revalidate)(struct dentry *, unsigned int); int (*d_weak_revalidate)(struct dentry *, unsigned int); int (*d_hash)(const struct dentry *, struct qstr *); int (*d_compare)(const struct dentry *, unsigned int, const char *, const struct qstr *); int (*d_delete)(const struct dentry *); int (*d_init)(struct dentry *); void (*d_release)(struct dentry *); void (*d_prune)(struct dentry *); void (*d_iput)(struct dentry *, struct inode *); char *(*d_dname)(struct dentry *, char *, int); struct vfsmount *(*d_automount)(struct path *); int (*d_manage)(const struct path *, bool); struct dentry *(*d_real)(struct dentry *, const struct inode *); void (*d_canonical_path)(const struct path *, struct path *); ANDROID_KABI_RESERVE(1); ANDROID_KABI_RESERVE(2); ANDROID_KABI_RESERVE(3); ANDROID_KABI_RESERVE(4); } ____cacheline_aligned;
- d_revalidate:使一个目录项重新生效
- d_hash:生成一个哈希值,用于VFS向哈希表中加入一个目录项
- d_compare:比较两个目录项名称
- d_delete:删除目录项
- d_init:初始化目录项
- d_release:释放目录项
- d_iput:当目录项的inode为NULL时,此时会调用该函数
- d_dname:设置目录项名称 上面有例举到,在struct dentry中有一个成员是d_inode,这里d_inode就是我们将要介绍的第三个核心对象。
3.3、inode
inode描述了磁盘上的文件信息,将所有文件的索引拿出来组成一个表,即inode表(inode
table)。下图展示inode和dentry的关系(图片来自作者李文山的《Linux驱动开发进阶》):
当文件系统需要访问一个文件时,以下步骤会发生:
- 路径解析:
- 从根目录开始,逐级解析路径中的每个目录项,找到目标文件的 dentry。
- 每个目录项在目录文件中存储了文件名和对应的 inode 号。
- 找到 inode:
- 通过 dentry 的
d_inode
字段,找到与该文件名关联的 inode 对象。 - 如果 inode 对象尚未加载到内存中,文件系统会从磁盘读取对应的 inode 数据,并将其加载到内存中的 VFS inode 结构中。
- 访问文件数据:
- 通过 inode 中的数据块指针,找到文件数据在磁盘上的实际存储位置。
- 文件系统通过这些数据块指针读取或写入文件的实际数据。 我们通过struct inode来看看inode存储了什么: struct inode { umode_t i_mode; unsigned short i_opflags; kuid_t i_uid; kgid_t i_gid; unsigned int i_flags; #ifdef CONFIG_FS_POSIX_ACL struct posix_acl *i_acl; struct posix_acl *i_default_acl; #endif const struct inode_operations *i_op; struct super_block *i_sb; struct address_space *i_mapping; #ifdef CONFIG_SECURITY void i_security; #endif / Stat data, not accessed from path walking / unsigned long i_ino; /
- Filesystems may only read i_nlink directly. They shall use the
- following functions for modification:
- (set|clear|inc|drop)_nlink
- inode_(inc|dec)_link_count / union { const unsigned int i_nlink; unsigned int __i_nlink; }; dev_t i_rdev; loff_t i_size; struct timespec64 i_atime; struct timespec64 i_mtime; struct timespec64 i_ctime; spinlock_t i_lock; / i_blocks, i_bytes, maybe i_size / unsigned short i_bytes; u8 i_blkbits; u8 i_write_hint; blkcnt_t i_blocks; #ifdef __NEED_I_SIZE_ORDERED seqcount_t i_size_seqcount; #endif / Misc / unsigned long i_state; struct rw_semaphore i_rwsem; unsigned long dirtied_when; / jiffies of first dirtying / unsigned long dirtied_time_when; struct hlist_node i_hash; struct list_head i_io_list; / backing dev IO list */ #ifdef CONFIG_CGROUP_WRITEBACK struct bdi_writeback i_wb; / the associated cgroup wb / / foreign inode detection, see wbc_detach_inode() / int i_wb_frn_winner; u16 i_wb_frn_avg_time; u16 i_wb_frn_history; #endif struct list_head i_lru; / inode LRU list / struct list_head i_sb_list; struct list_head i_wb_list; / backing dev writeback list / union { struct hlist_head i_dentry; struct rcu_head i_rcu; }; atomic64_t i_version; atomic64_t i_sequence; / see futex / atomic_t i_count; atomic_t i_dio_count; atomic_t i_writecount; #if defined(CONFIG_IMA) || defined(CONFIG_FILE_LOCKING) atomic_t i_readcount; / struct files open RO */ #endif union { const struct file_operations i_fop; / former ->i_op->default_file_ops */ void (*free_inode)(struct inode *); }; struct file_lock_context *i_flctx; struct address_space i_data; struct list_head i_devices; union { struct pipe_inode_info *i_pipe; struct block_device *i_bdev; struct cdev *i_cdev; char i_link; unsigned i_dir_seq; }; __u32 i_generation; #ifdef CONFIG_FSNOTIFY __u32 i_fsnotify_mask; / all events this inode cares about */ struct fsnotify_mark_connector __rcu *i_fsnotify_marks; #endif #ifdef CONFIG_FS_ENCRYPTION struct fscrypt_info *i_crypt_info; #endif #ifdef CONFIG_FS_VERITY struct fsverity_info *i_verity_info; #endif void i_private; / fs or device private pointer */ ANDROID_KABI_RESERVE(1); ANDROID_KABI_RESERVE(2); } __randomize_layout;
i_mode
:文件的访问权限i_op
:inode操作函数集合i_sb
:超级块指针,指向文件系统的超级块i_mapping
:地址映射描述i_nlink
:硬连接数目i_rdev
:设备号,在Linux中,所有的设备即是文件i_size
:文件大小,单位字节i_atime
:最后访问时间i_mtime
:最后修改时间i_ctime
:最后改变时间i_blkbits
:块大小表示位数,例如块为4096字节时,此时值为12i_blocks
:文件所占用的block数量i_hash
:哈希表i_dentry
:目录项链表i_fop
:文件操作方法集合i_data
:设备数据地址映射i_devices
:块设备链表i_pipe
:管道文件i_cdev
:字符设备文件i_link
:连接文件i_private
:私有指针,一般用来存放数据块的首地址 其中成员i_op为inode的操作集合: struct inode_operations { struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int); const char * (*get_link) (struct dentry *, struct inode *, struct delayed_call *); int (*permission) (struct inode *, int); struct posix_acl * (*get_acl)(struct inode *, int); int (*readlink) (struct dentry *, char __user *,int); int (*create) (struct inode *,struct dentry *, umode_t, bool); int (*link) (struct dentry *,struct inode *,struct dentry *); int (*unlink) (struct inode *,struct dentry *); int (*symlink) (struct inode *,struct dentry *,const char *); int (*mkdir) (struct inode *,struct dentry *,umode_t); int (*rmdir) (struct inode *,struct dentry *); int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t); int (*rename) (struct inode *, struct dentry *, struct inode *, struct dentry *, unsigned int); int (*setattr) (struct dentry *, struct iattr *); int (*getattr) (const struct path *, struct kstat *, u32, unsigned int); ssize_t (*listxattr) (struct dentry *, char *, size_t); int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len); int (*update_time)(struct inode *, struct timespec64 *, int); int (*atomic_open)(struct inode *, struct dentry *, struct file *, unsigned open_flag, umode_t create_mode); int (*tmpfile) (struct inode *, struct dentry *, umode_t); int (*set_acl)(struct inode *, struct posix_acl *, int); ANDROID_KABI_RESERVE(1); ANDROID_KABI_RESERVE(2); ANDROID_KABI_RESERVE(3); ANDROID_KABI_RESERVE(4); } ____cacheline_aligned;lookup
:在dentry下查找inodecreate
:在dentry下创建一个inodelink
:为一个indoe创建一个inodeunlink
:删除一个连接文件mkdir
:在dentry下创建一个目录inodermdir
:在dentry下删除一个inodemknod
:创建设备节点update_time
:更新文件时间
3.4、file
file对象是与进程相关的文件描述符的内核表示。它是文件系统和进程之间交互的核心数据结构之一。file对象是Linux内核中用于表示打开文件的结构体。当进程通过系统调用(如 open())打开文件时,内核会创建一个file对象,并将其添加到进程的文件描述符表中。当进程通过系统调用(如 close())关闭文件时,内核会释放对应的file对象。file结构体如下: struct file { union { struct llist_node fu_llist; struct rcu_head fu_rcuhead; } f_u; struct path f_path; struct inode f_inode; / cached value */ const struct file_operations f_op; /
- Protects f_ep_links, f_flags.
- Must not be taken from IRQ context. */ spinlock_t f_lock; enum rw_hint f_write_hint; atomic_long_t f_count; unsigned int f_flags; fmode_t f_mode; struct mutex f_pos_lock; loff_t f_pos; struct fown_struct f_owner; const struct cred *f_cred; struct file_ra_state f_ra; u64 f_version; #ifdef CONFIG_SECURITY void f_security; #endif / needed for tty driver, and maybe others */ void private_data; #ifdef CONFIG_EPOLL / Used by fs/eventpoll.c to link all the hooks to this file / struct list_head f_ep_links; struct list_head f_tfile_llink; #endif / #ifdef CONFIG_EPOLL */ struct address_space f_mapping; errseq_t f_wb_err; errseq_t f_sb_err; / for syncfs / ANDROID_KABI_RESERVE(1); ANDROID_KABI_RESERVE(2); ANDROID_OEM_DATA(1); } __randomize_layout attribute((aligned(4))); / lest something weird decides that 2 is OK */
f_path
:文件路径,包含了dentry和vfsmoutf_inode
:执行inode指针f_op
:文件操作方法集合f_count
:文件对象使用计数f_flags
:文件被打开时指定的标志,例如O_RDONLY,O_WRONLYf_mode
:文件读写权限f_pos
:文件当前的偏移量,即当前读写的位置相对于文件开始地址的偏移f_owner
:文件所有者private_data
:私有数据指针,较为常用f_mapping
:文件的页缓冲映射地址
4、文件系统的挂载
当一个磁盘上的分区被挂载时,此时Linux内核会扫描该磁盘上对应的分区的所有索引节点(inode),然后创建struct mount结构体和dentry对象,并将所有的超级块信息保存在struct superblock结构体中,并将所有的inode信息以链表的形式保存在struct inode结构体中,整个过程就建立了从struct mount到inode之间的关系。
5、文件系统的注册
内核维护一个全局链表 file_systems,用于存储所有已注册的文件系统类型。每个文件系统类型通过 file_system_type 结构体注册到这个链表中。
5.1、文件系统的注册过程
5.1.2、定义文件系统类型
文件系统开发者需要定义一个 file_system_type
结构体实例,并实现必要的操作函数(如挂载和卸载函数)。
static struct file_system_type myfs_type = {
.owner = THIS_MODULE,
.name = “myfs”,
.mount = myfs_mount,
.kill_sb = myfs_kill_sb,
.fs_flags = FS_REQUIRES_DEV,
};
5.1.3、注册文件系统
使用 register_filesystem()
函数将文件系统类型注册到内核中。这通常在文件系统模块加载时完成。
static int __init myfs_init(void) {
return register_filesystem(&myfs_type);
}
int register_filesystem(struct file_system_type * fs)
{
int res = 0;
struct file_system_type ** p;
if (fs->parameters &&
!fs_validate_description(fs->name, fs->parameters))
return -EINVAL;
BUG_ON(strchr(fs->name, ‘.’));
if (fs->next)
return -EBUSY;
write_lock(&file_systems_lock);
p = find_filesystem(fs->name, strlen(fs->name));
if (*p)
res = -EBUSY;
else
*p = fs;
write_unlock(&file_systems_lock);
return res;
}
5.1.4、注销文件系统
使用 unregister_filesystem()
函数将文件系统从内核中注销。这通常在文件系统模块卸载时完成。
static void __exit myfs_exit(void) {
unregister_filesystem(&myfs_type);
}
int unregister_filesystem(struct file_system_type * fs)
{
struct file_system_type ** tmp;
write_lock(&file_systems_lock);
tmp = &file_systems;
while (*tmp) {
if (fs == *tmp) {
*tmp = fs->next;
fs->next = NULL;
write_unlock(&file_systems_lock);
synchronize_rcu();
return 0;
}
tmp = &(*tmp)->next;
}
write_unlock(&file_systems_lock);
return -EINVAL;
}
5.2、文件系统的挂载与注册的关系
- 挂载前的注册:在文件系统被挂载之前,它必须先注册到内核中。只有注册过的文件系统类型才能被挂载。
- 挂载时的识别:当用户尝试挂载一个文件系统时(如通过
mount
命令),内核会遍历file_systems
链表,查找匹配的文件系统类型。 - 动态加载:某些文件系统(如通过模块加载的文件系统)可以在运行时动态注册和注销。例如,
ntfs
文件系统可以通过加载ntfs.ko
模块动态注册。
6、实现一个虚拟文件系统
注:程序源码一样来自李文山的《Linux驱动开发进阶》: 但上述源码是基于6.1的kernel,我实验的环境是5.x的kernel,部分接口函数会不一样。所以下面展示的源码是略有修改的。
6.1、定义文件系统结构
一般需要自定义开发文件系统时,可能才需要编写文件系统驱动程序。下图是本次要实现的虚拟文件系统结构(图片来自作者李文山的《Linux驱动开发进阶》):
6.2、完整的驱动程序
#include #include #include #include #include #include #include #define MEMFS_INVALID 0xFFFFFFFF #define MEMFS_FILE_NAME_MAX 16 #define MEMFS_INODE_MAX 128 #define MEMFS_BLK_MAX 128 #define MEMFS_FILE_SIZE_MAX 1024 struct memfs_sb { uint32_t blk_size_bit; // 块大小占用的位数 uint32_t block_size; // 数据块大小 uint32_t magic; // 文件系统魔数 uint32_t private; // }; struct memfs_inode { char file_name[MEMFS_FILE_NAME_MAX]; // 文件名称 uint32_t mode; // 记录文件或者文件夹的读写权限 uint32_t idx; // 记录文件或者文件夹在inode bitmap的索引节点号 uint32_t child; // 记录当前文件夹下的目录或者文件的第一个(=MEMFS_INVALID: no child) uint32_t brother; // 记录当前文件或目录的同级目录或者文件(=MEMFS_INVALID: no brother) uint32_t file_size; // uint32_t data; // =MEMFS_INVALID: dir 0~127: file }; struct memfs_block { char data[1024]; }; struct memfs_sb g_mf_sb = { .blk_size_bit = 10, .block_size = 1024, .magic = 0x20221001, }; char g_inode_bitmap[16]={0}; // 128bit空间的一个inode bitmap, 每个bit都是一个开关量,标记着inode池中对应的inode是否已使用 char g_block_bitmap[16]={0}; // 128bit空间的一个block bitmap static struct memfs_inode *g_mf_inode; // 指向inode池 static struct memfs_block *g_mf_block; // 指向block池 static struct inode_operations memfs_inode_ops; void set_bitmap(char *bitmap, uint32_t index) { *(bitmap + (index»3)) |= (1« (index%8)); } void reset_bitmap(char bitmap, uint32_t index) { (bitmap + (index»3)) &= ~(1« (index%8)); } uint32_t get_idle_index(char bitmap) { uint8_t tmp; for(int i = 0; i < 16; i++) // 循环检查16个字节 { if(bitmap[i] != 0xFF) // 如果该字节(8个bit)存在空闲bit { tmp = bitmap[i]; for(int j = 0; j<8; j++) // 逐位检查8个bit { if((tmp & 0x1) == 0) // 找到空闲bit { set_bitmap(bitmap, i8 + j); // 设置改bit为非空闲 return i8 + j; // 返回下标 } else { tmp »= 1; } } } } return MEMFS_INVALID; } void put_used_index(char bitmap, uint32_t index) { reset_bitmap(bitmap, index); } int memfs_alloc_mem(void) { / 分配inode池,大小5120bytes / g_mf_inode = kzalloc(5120, GFP_KERNEL); if(!g_mf_inode) return -1; / 分配block池,大小128KB / g_mf_block = kzalloc(1281024, GFP_KERNEL); if(!g_mf_block) return -1; / 初始化inode的brother和child属性 */ for(int i = 0; i < MEMFS_INODE_MAX; i++) { g_mf_inode[i].brother = MEMFS_INVALID; g_mf_inode[i].child = MEMFS_INVALID; } return 0; } void memfs_free_mem(void) { kfree(g_mf_inode); kfree(g_mf_block); } static int memfs_readdir(struct file *filp, struct dir_context *ctx) { struct memfs_inode *mf_inode, *child_inode; if (ctx->pos) return 0; mf_inode = &g_mf_inode[filp->f_path.dentry->d_inode->i_ino]; if (!S_ISDIR(mf_inode->mode)) { return -ENOTDIR; } if(mf_inode->child != MEMFS_INVALID) { child_inode = &g_mf_inode[mf_inode->child]; } else { return 0; } while(child_inode->idx != MEMFS_INVALID) { if (!dir_emit(ctx, child_inode->file_name, MEMFS_FILE_NAME_MAX, child_inode->idx, DT_UNKNOWN)) { return 0; } ctx->pos += sizeof(struct memfs_inode); if(child_inode->brother != MEMFS_INVALID) child_inode = &g_mf_inode[child_inode->brother]; else break; } return 0; } ssize_t memfs_read_file(struct file * filp, char __user * buf, size_t len, loff_t *ppos) { struct memfs_inode *inode; char *buffer; inode = &g_mf_inode[filp->f_path.dentry->d_inode->i_ino]; // 获取实际要操作的inode if (ppos >= inode->file_size) return 0; buffer = (char)&g_mf_block[inode->data]; // 获取block池中对应位置的首地址 buffer += *ppos; // len = min((size_t)(inode->file_size - *ppos), len); if (copy_to_user(buf, buffer, len)) // 拷贝到用户态 { return -EFAULT; } *ppos += len; // 更新偏移 return len; } ssize_t memfs_write_file(struct file *filp, const char __user *buf, size_t len, loff_t *ppos) { struct memfs_inode *inode; char *buffer; inode = &g_mf_inode[filp->f_path.dentry->d_inode->i_ino]; // 获取实际要操作的inode if (ppos + len > MEMFS_FILE_SIZE_MAX ) return 0; buffer = (char)&g_mf_block[inode->data]; // 获取block池中对应位置的首地址 buffer += *ppos; // if (copy_from_user(buffer, buf, len)) { return -EFAULT; } *ppos += len; // 更新偏移 inode->file_size = *ppos; // 更新文件大小 return len; } const struct file_operations memfs_file_operations = { .read = memfs_read_file, .write = memfs_write_file, }; const struct file_operations memfs_dir_operations = { .owner = THIS_MODULE, .iterate_shared = memfs_readdir, }; //dir: 当前目录的inode //dentry:要创建的文件的dentry //mode:要创建的文件的mode static int memfs_do_create(struct inode *dir, struct dentry *dentry, umode_t mode) { struct inode *inode; struct super_block *sb; struct memfs_inode *mf_inode, *p_mf_inode, tmp_mf_inode; uint32_t idx_inode; / 获取sb指针 / sb = dir->i_sb; / 判断是否是目录和常规文件,如果不是,返回错误 / if (!S_ISDIR(mode) && !S_ISREG(mode)) { return -EINVAL; } if (strlen(dentry->d_name.name) > MEMFS_FILE_NAME_MAX) { return -ENAMETOOLONG; } inode = new_inode(sb); if (!inode) { return -ENOMEM; } / 初始化现在要创建的inode的sb / idx_inode = get_idle_index(g_inode_bitmap); // 获取一个空闲的inode,用于保存当前创建的目录或者文件的inode信息 if (idx_inode == MEMFS_INVALID) { return -ENOSPC; } inode->i_sb = sb; inode->i_op = &memfs_inode_ops; // 初始化当前的inode的ops inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); // 初始化创建时间和修改时间为当前时间 inode->i_ino = idx_inode; mf_inode = &g_mf_inode[idx_inode]; // mf_inode->idx = idx_inode; // mf_inode->mode = mode; / 接下来都是inode的初始化 */ if (S_ISDIR(mode)) // 如果创建的是一个文件,则分配一个block,如果是一个目录则不用分配block { mf_inode->data = MEMFS_INVALID; inode->i_fop = &memfs_dir_operations; } else if (S_ISREG(mode)) { mf_inode->child = MEMFS_INVALID; mf_inode->file_size = 0; inode->i_fop = &memfs_file_operations; mf_inode->data = get_idle_index(g_block_bitmap); if(mf_inode->data == MEMFS_INVALID) { return -ENOSPC; } } p_mf_inode = &g_mf_inode[dir->i_ino]; // 获取当前新创建的父目录节点 if(p_mf_inode->child == MEMFS_INVALID) // 当前目录为空目录 { p_mf_inode->child = mf_inode->idx; // 父目录节点的child直接指向现在要创建的inode } else // 非空目录,找到最后一个child { tmp_mf_inode = &g_mf_inode[p_mf_inode->child]; // 第一个child while(tmp_mf_inode->brother != MEMFS_INVALID) { tmp_mf_inode = &g_mf_inode[tmp_mf_inode->brother]; // 找到父目录最后一个child } tmp_mf_inode->brother = mf_inode->idx; // 最后一个child,并设置brother } strcpy(mf_inode->file_name, dentry->d_name.name); // 初始化内核的dentry名称 inode_init_owner(inode, dir, mode); // 添加inode到dir中 d_add(dentry, inode); // 绑定内核dentry与inode return 0; } static int memfs_inode_mkdir(struct inode *dir, struct dentry *direntry, umode_t mode) { return memfs_do_create(dir, direntry, S_IFDIR | mode); } static int memfs_inode_create(struct inode *dir, struct dentry *direntry, umode_t mode, bool excl) { return memfs_do_create(dir, direntry, mode); } //parent_inode: 父目录节点 //find_dentry: 要查找的dentry static struct dentry *memfs_inode_lookup(struct inode *parent_inode, struct dentry *find_dentry, unsigned int flags) { return NULL; } //删除空目录 //dentry: 待删除空目录的dentry int memfs_inode_rmdir(struct inode *dir, struct dentry *dentry) { uint32_t index = dentry->d_inode->i_ino; // 待删除的空目录的inode下标 struct memfs_inode *p_mf_inode, *child_mf_inode, *tmp_mf_inode; if(g_mf_inode[index].child != MEMFS_INVALID) // 如果是非空目录,返回错误 return -ENOTEMPTY; p_mf_inode = &g_mf_inode[dir->i_ino]; // 获取当前目录inode child_mf_inode = &g_mf_inode[p_mf_inode->child]; // 获取父目录inode的第一个child put_used_index(g_inode_bitmap, index); if(p_mf_inode->child == index) // 如果要删除的空目录是父目录的第一个child { if(child_mf_inode->brother == MEMFS_INVALID) // 如果当前node没有brother了 p_mf_inode->child = MEMFS_INVALID; // 那么父目录inode也不会有child else p_mf_inode->child = child_mf_inode->brother; // 否则父目录inode指向当前inode的brother } else // 如果要删除的空目录不是父目录的第一个child { while(child_mf_inode->idx != MEMFS_INVALID) { if(child_mf_inode->brother != MEMFS_INVALID) { tmp_mf_inode = child_mf_inode; child_mf_inode = &g_mf_inode[child_mf_inode->brother]; // 获取brother if(child_mf_inode->idx == index) // 找到待删除的空目录了 { if(child_mf_inode->brother != MEMFS_INVALID) tmp_mf_inode->brother = child_mf_inode->brother; // 在链表关系中,移除了待删除的这个空目录 else tmp_mf_inode->brother = MEMFS_INVALID; break; } } } } g_mf_inode[index].idx = MEMFS_INVALID; g_mf_inode[index].brother = MEMFS_INVALID; return simple_unlink(dir, dentry); } //删除文件操作 int memfs_inode_unlink(struct inode *dir, struct dentry *dentry) { uint32_t index = dentry->d_inode->i_ino; struct memfs_inode *p_mf_inode, *child_mf_inode, tmp_mf_inode; p_mf_inode = &g_mf_inode[dir->i_ino]; //获取第一个child child_mf_inode = &g_mf_inode[p_mf_inode->child]; put_used_index(g_inode_bitmap, index); put_used_index(g_block_bitmap, g_mf_inode[index].data); if(p_mf_inode->child == index) { if(child_mf_inode->brother == MEMFS_INVALID) p_mf_inode->child = MEMFS_INVALID; else p_mf_inode->child = child_mf_inode->brother; } else { while(child_mf_inode->idx != MEMFS_INVALID) { if(child_mf_inode->brother != MEMFS_INVALID) { tmp_mf_inode = child_mf_inode; child_mf_inode = &g_mf_inode[child_mf_inode->brother]; if(child_mf_inode->idx == index) { if(child_mf_inode->brother != MEMFS_INVALID) tmp_mf_inode->brother = child_mf_inode->brother; else tmp_mf_inode->brother = MEMFS_INVALID; break; } } } } g_mf_inode[index].idx = MEMFS_INVALID; g_mf_inode[index].brother = MEMFS_INVALID; return simple_unlink(dir, dentry); } static struct inode_operations memfs_inode_ops = { .create = memfs_inode_create, // 在dentry下创建一个inode .lookup = memfs_inode_lookup, // 在dentry下查找inode .mkdir = memfs_inode_mkdir, // 在dentry下创建一个目录inode .rmdir = memfs_inode_rmdir, // 在dentry下删除一个inode .unlink = memfs_inode_unlink, // 删除文件 }; / 每挂载一个块设备,都需要初始化相应的超级块,
- 该函数就是初始化超级块。 */ static int memfs_demo_fill_super(struct super_block *sb, void *data, int silent) { struct inode root_inode; int mode = S_IFDIR | 0755; root_inode = new_inode(sb); // 新建inode,用于保存根节点 root_inode->i_ino = 0; // 设置根节点的编号为0 root_inode->i_mode = mode; // 初始化根节点权限 root_inode->i_sb = sb; // 设置根节点的超级块 root_inode->i_op = &memfs_inode_ops; // 设置根节点的节点操作集合 root_inode->i_fop = &memfs_dir_operations; // 设置根节点目录操作集合 root_inode->i_atime = root_inode->i_mtime = root_inode->i_ctime = current_time(root_inode); //设置根节点的创建修改时间为当前时间 / 初始化inode池中的第0个inode,第0个inode就是根节点 / strcpy(g_mf_inode[0].file_name, “memfs”); g_mf_inode[0].mode = mode; g_mf_inode[0].idx = 0; g_mf_inode[0].child = MEMFS_INVALID; g_mf_inode[0].brother = MEMFS_INVALID; set_bitmap(g_inode_bitmap, g_mf_inode[0].idx); // 置inode bitmap的第0位为1 root_inode->i_private = &g_mf_inode[0]; // 将根节点保存到root_inode的私有数据中 / 初始化磁盘的描述信息(super block) */ sb->s_root = d_make_root(root_inode); // 设置上面分配的inode为根目录 sb->s_magic = g_mf_sb.magic; // 文件系统魔数 sb->s_blocksize_bits = g_mf_sb.blk_size_bit; // 页大小所占的位数为12 sb->s_blocksize = g_mf_sb.blk_size_bit; // 页大小为1024Bytes return 0; } static struct dentry *memfs_demo_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { return mount_nodev(fs_type, flags, data, memfs_demo_fill_super); } static void memfs_kill_sb(struct super_block sb) { kill_anon_super(sb); } / 文件系统类型 / static struct file_system_type memfs_type = { .owner = THIS_MODULE, .name = “memfs”, // 文件系统名字 .mount = memfs_demo_mount, // 挂载文件系统时,所执行的函数 .kill_sb = memfs_kill_sb, // 卸载文件系统时,所执行的函数 }; static int __init memfs_demo_init(void) { / 分配inode池和block池 / if(memfs_alloc_mem()) { printk(KERN_ERR “alloc memory failed\n”); return -ENOMEM; } / 注册文件系统 / return register_filesystem(&memfs_type); } static void __exit memfs_demo_exit(void) { / 释放inode池和block池 / memfs_free_mem(); / 卸载文件系统 */ unregister_filesystem(&memfs_type); } module_init(memfs_demo_init); module_exit(memfs_demo_exit); MODULE_LICENSE(“GPL”); MODULE_AUTHOR(“ ”); MODULE_DESCRIPTION(“memory fs demo”); MODULE_ALIAS(“fs:memfs”); MODULE_IMPORT_NS(VFS_internal_I_am_really_a_filesystem_and_am_NOT_a_driver); Makefile如下: KERN_DIR = /home/cohen/sdk/docker/rk356x-sdk/kernel/ all: make -C $(KERN_DIR) M=$(PWD) modules clean: make -C $(KERN_DIR) M=$(PWD) clean rm -f modules.order obj-m += my_memfs.o ccflags-y += -std=gnu99
6.3、测试
- 将编译好的驱动程序拷贝到板卡,安装驱动: insmod my_memfs.ko 此时,已经将一个名为memfs的虚拟文件系统注册进了内核。
- 挂载文件系统:
-t 表示文件系统的类型
none 表示没有IO设备
/mnt 为挂载点
mount -t memfs none /mnt
3. 进入/mnt,创建文件:
4. 创建目录:
5. 删除空目录:
6. 删除文件: