filefrag 实现原理

Linux 文件碎片

据说 Windows 需要碎片整理,而 Linux 不需要碎片整理。

不过 Linux 文件系统真的没有碎片吗?

文件碎片还是有的,只不过没有 Windows 那么明显。

在 Linux 发行版 CentOS 中,e2fsprogs 包有个 filefrag 的命令,可以用来查看文件在磁盘中存储的每个块信息。

当持续写入数据,然后执行 filefrag 命令查看该文件后,如下所示:

$ filefrag -v ./test_file
Filesystem type is: ef53
File size of ./test_file is 23865744 (5827 blocks of 4096 bytes)
 ext:     logical_offset:        physical_offset: length:   expected: flags:
   0:        0..       0:      37944..     37944:      1:
   1:        1..       1:  111185506.. 111185506:      1:      37945:
   2:        2..       2:  111186961.. 111186961:      1:  111185507:
   3:        3..       3:   90212910..  90212910:      1:  111186962:
   4:        4..       4:  111185000.. 111185000:      1:   90212911:
   5:        5..       5:   90213533..  90213533:      1:  111185001:
   6:        6..       6:   90214427..  90214427:      1:   90213534:
   7:        7..       7:  111187017.. 111187017:      1:   90214428:
   8:        8..       8:  111189013.. 111189013:      1:  111187018:
   9:        9..       9:   90213546..  90213546:      1:  111189014:
  10:       10..      10:  111185543.. 111185543:      1:   90213547:
  11:       11..      11:  111188000.. 111188000:      1:  111185544:
  12:       12..      13:   90212927..  90212928:      2:  111188001:
  13:       14..      14:  111187492.. 111187492:      1:   90212929:
  14:       15..      15:  111187496.. 111187496:      1:  111187493:
  15:       16..      31:      53424..     53439:     16:  111187497:
  16:       32..      63:      65920..     65951:     32:      53440:
  17:       64..     127:      89664..     89727:     64:      65952:
  18:      128..     255:      68992..     69119:    128:      89728:
  19:      256..     511:     112384..    112639:    256:      69120:
  20:      512..    1023:     312320..    312831:    512:     112640:
  21:     1024..    2047:     471040..    472063:   1024:     312832:
  22:     2048..    3010:     858112..    859074:    963:     472064:
  23:     3011..    3072:     979798..    979859:     62:     859075:
  24:     3073..    4095:     922625..    923647:   1023:     979860:
  25:     4096..    4732:    1562624..   1563260:    637:     923648:
  26:     4733..    5826:    2016376..   2017469:   1094:    1563261: eof
./test_file: 27 extents found

该测试文件存放的文件系统是 ext4,持续写入数据后,我们观察到:

刚开始占据 1 个文件系统块,写满 16 个块后,就开始 16 个连续块开始分配,直到 1024 个连续块。

从结果得知,写入删除频繁就容易造成了文件块不连续,也就产生了文件碎片。

filefrag 原理

Windows 的碎片整理工具是需要读取文件系统来发现文件是否碎片分布,而 Linux 提供了一个 ioctl(FS_IOC_FIEMAP) 的接口来给用户空间的应用程序查询文件块的分布。

其中,iocode FS_IOC_FIEMAP 定义在 /usr/include/linux/fs.h,所需要的结构体定义在 /usr/include/linux/fiemap.h

有了前面这两个头文件,就可以实现一个 mini 版本的 filefrag。

filefrag 最小版本实现

C 语言版

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/ioctl.h>
#include <linux/fs.h>
#include <linux/fiemap.h>


#define xstrerror()     strerror(errno)
#define errlog(...)     fprintf(stderr, __VA_ARGS__);


static void report_fiemap(int fd)
{
    __u64                 buf[2048];
    struct fiemap_extent  fm_last;
    int                   i, rc, last = 0;
    unsigned long         flags = 0;
    struct fiemap        *fiemap = (struct fiemap *)buf;
    struct fiemap_extent *fm_ext = &fiemap->fm_extents[0];
    int count = (sizeof(buf) - sizeof(*fiemap)) / sizeof(struct fiemap_extent);

    memset(fiemap, 0, sizeof(struct fiemap));
    memset(&fm_last, 0, sizeof(fm_last));

    do {
        fiemap->fm_length = ~0ULL;
        fiemap->fm_flags = flags;
        fiemap->fm_extent_count = count;

        rc = ioctl(fd, FS_IOC_FIEMAP, (unsigned long) fiemap);
        if (rc < 0) {
            errlog("ioctl fail, %s\n", xstrerror());
        }

        if (fiemap->fm_mapped_extents == 0) {
            break;
        }

        for (i = 0; i < fiemap->fm_mapped_extents; i++) {
            printf("logical: %lld, physical: %lld, length: %lld\n",
                fm_ext[i].fe_logical, fm_ext[i].fe_physical, fm_ext[i].fe_length);

            if (fm_ext[i].fe_flags & FIEMAP_EXTENT_LAST) {
                last = 1;
            }

            fm_last = fm_ext[i];
        }

        fiemap->fm_start = (fm_ext[i - 1].fe_logical + fm_ext[i - 1].fe_length);

    } while (last == 0);
}


int main(int argc, char *argv[])
{
    int         fd;
    const char *filename;

    if (argc != 2) {
        printf("%s filepath \n", argv[0]);
        exit(EXIT_SUCCESS);
    }

    filename = argv[1];

    fd = open(filename, O_RDONLY);
    if (fd == -1) {
        errlog("open(%s) fail, %s\n", filename, strerror(errno));
    }

    report_fiemap(fd);

    close(fd);

    return 0;
}

执行输出如下:

# ./a.out ./foo
logical: 0, physical: 785387520, length: 49152
logical: 49152, physical: 785436672, length: 16384
logical: 65536, physical: 8323792896, length: 65536
logical: 131072, physical: 8323989504, length: 131072
logical: 262144, physical: 8324251648, length: 262144
logical: 524288, physical: 8324644864, length: 524288
logical: 1048576, physical: 8340840448, length: 1048576
logical: 2097152, physical: 8352956416, length: 2097152
logical: 4194304, physical: 8539602944, length: 49152
logical: 4243456, physical: 8539652096, length: 4145152
logical: 8388608, physical: 8506044416, length: 49152
logical: 8437760, physical: 8506093568, length: 2117632
logical: 10555392, physical: 8522547200, length: 1130496
logical: 11685888, physical: 8486912000, length: 1179648
logical: 12865536, physical: 8488718336, length: 1142784
logical: 14008320, physical: 8520151040, length: 1167360
logical: 15175680, physical: 8483827712, length: 1052672
logical: 16228352, physical: 8729436160, length: 548864
logical: 16777216, physical: 8787816448, length: 49152
logical: 16826368, physical: 8787865600, length: 880640

参考

https://www.kernel.org/doc/Documentation/filesystems/fiemap.txt

https://man7.org/linux/man-pages/man8/filefrag.8.html