ceph部署

Ethereal Lv4

1. 部署

1.1 修改host并配置互信(所有节点都需执行)

vim /etc/hosts
10.244.0.228 ubuntu-vm-2404-test-1
10.244.0.155 ubuntu-vm-2404-test-2
10.244.0.226 ubuntu-vm-2404-test-3

1.2 安装 Docker(所有节点都需执行)

sudo apt install docker.io

1.3 SSH 免密登录(所有节点都需执行)注意必须执行下一步的拷贝公钥才算完成免密

sudo su # 必须以root身份
ssh-keygen -t rsa
# 拷贝密钥
ssh-copy-id ubuntu-vm-2404-test-1
ssh-copy-id ubuntu-vm-2404-test-2
ssh-copy-id ubuntu-vm-2404-test-3

1.4 使用 cephadm 安装 ceph 集群(在一台上执行)

# 安装 cephadm
apt install cephadm -y
# 启用集群
cephadm bootstrap --mon-ip 10.244.0.228
# 安装 ceph-cli
apt install ceph-common -y
# 集群状态信息
ceph -s
# 查看节点信息
ceph orch host ls
# 拷贝公钥
ssh-copy-id -f -i /etc/ceph/ceph.pub ubuntu-vm-2404-test-2
ssh-copy-id -f -i /etc/ceph/ceph.pub ubuntu-vm-2404-test-3
# 添加节点
ceph orch host add ubuntu-vm-2404-test-2
ceph orch host add ubuntu-vm-2404-test-3
# 查看节点信息
ceph orch host ls
# 列出可用设备,有延迟,一般不准确
ceph orch device ls
# 添加所有可用磁盘到集群
# 注意,此命令的效果是持久的,会自动添加新的可用的磁盘到集群中。
# https://docs.ceph.com/en/latest/cephadm/services/osd/
# 如果需要取消,可以执行ceph orch apply osd --all-available-devices --unmanaged=true
ceph orch apply osd --all-available-devices
# 添加指定磁盘到集群,磁盘必须未格式化,可以选择某个特定分区
# 不建议使用raw方式,更推荐使用lvm方式:https://docs.ceph.com/en/latest/ceph-volume/intro/ 
# 不会对性能产生明显影响,而且易于管理(易于扩容等)
# bluestore是ceph管理的存储引擎。如果使用raw的话,ceph会在磁盘上做标记为bluestore,但是依赖于lvm方式的不会做标记。
# 无论使用raw还是lvm,都会使用bluestore存储引擎。bluestore替代旧版的filestore引擎,在稳定性与性能上均有提升。
# https://cloud.tencent.com/developer/article/2314578
sudo ceph orch daemon add osd ubuntu-vm-2404-test-2:/dev/sda
sudo ceph orch daemon add osd --method raw ubuntu-vm-2404-test-2:/dev/sda # 不推荐使用
# 查看 osd 状态
ceph osd tree

1.5 初始化(在一台上执行)

# 创建数据pool
ceph osd pool create test
ceph osd pool set test bulk true
# 创建元数据pool
ceph osd pool create cephfs_metadata 128
# 创建元数据部署
ceph orch apply mds test --placement="1 ubuntu-vm-2404-test-1"
# 关联数据pool和元数据pool
ceph osd pool application enable test cephfs
# 创建文件系统(其中cephfs为文件系统名字)
ceph fs new cephfs_name cephfs_metadata test
# 创建子卷
ceph fs subvolumegroup create cephfs_name csi
# 创建rgw
ceph orch apply rgw default-realm default-zone --placement="3 k10 k11 k12"
ceph orch apply rgw infra_rgw --placement='3 k10 k11 k12' --port=8000
# 创建对象存储
radosgw-admin user create --uid=s3 --display-name="object_storage" --system
# 记住你的access_key和secret_key
"keys": [
        {
            "user": "s3",
            "access_key": "ENL7QVDGNNYNNEX3X3VS",
            "secret_key": "vaUjPhUkR8yLAdqVD6FRnXGVNrxBNDs9bMWFb6Kb",
            "active": true,
            "create_date": "2025-03-10T02:55:13.039290Z"
        }
    ],
# 创建对象桶
radosgw-admin bucket create --bucket=<bucket-name> --user=<username>


# 创建自定义realm的rgw
# 删除默认zone和zonegroup
radosgw-admin zone list
radosgw-admin zone delete  --rgw-zone default
radosgw-admin zonegroup list
radosgw-admin zonegroup delete  --rgw-zonegroup default
# 创建realm、zone和zonegroup
radosgw-admin realm create --rgw-realm=default --default
radosgw-admin zonegroup create --rgw-zonegroup default --rgw-realm default --master --default
radosgw-admin zone create --rgw-zonegroup default --rgw-zone default --master --default
# 设定default(前面default是命令后面是名字)
radosgw-admin realm default default
radosgw-admin zonegroup default default
radosgw-admin zone default default
# 设定master
radosgw-admin zonegroup get > zonegroup.json
vim zonegroup.json # 将其中is_master字段改为true
radosgw-admin zonegroup set --infile zonegroup.json # 导入配置
# 同步period,要执行两次
radosgw-admin period update --commit
radosgw-admin period update --commit
# 部署rgw,必须指定realm和zone
ceph orch apply rgw default --realm=default --zone=default  --placement='3 k10 k11 k12'


# 部署rbd
## rbd只需要创建pool即可,不需要部署daemon
ceph osd pool create rbd_pool 64 64
ceph osd pool application enable rbd_pool rbd
## 创建具体的块设备
rbd create -p rbd_pool --image ceph-rbd-demo.img --size 10G
## 查看当前的块设备列表
rbd -p rbd_pool ls
## 查看详细信息
rbd -p rbd_pool info ceph-rbd-demo.img
## 关闭features
rbd -p rbd_pool --image ceph-rbd-demo.img feature disable deep-flatten
rbd -p rbd_pool --image ceph-rbd-demo.img feature disable fast-diff
rbd -p rbd_pool --image ceph-rbd-demo.img feature disable object-map
rbd -p rbd_pool --image ceph-rbd-demo.img feature disable exclusive-lock
## map到本地
rbd map -p rbd_pool --image ceph-rbd-demo.img
## 卸载设备
rbd device unmap -p rbd_pool --image ceph-rbd-demo.img

1.6 配置密钥(客户端执行)

# 找到某用户密钥
cat /etc/ceph/ceph.client.admin.keyring
# 复制密钥
echo "AQCpx8Zn2nTWMxAAvqX4K3Limi6qYmqh9XKTsw==" > secret

用户创建流程可以参考ceph(二)CephX认证授权、用户管理和keyring - areke - 博客园

1
2
ceph auth add client.test mon 'allow *' osd 'allow *' mds 'allow *'
ceph auth list

1.7 挂载(客户端执行)

sudo mkdir /mnt/cephfs
sudo mount -t ceph ubuntu-vm-2404-test-1:6789:/ /mnt/cephfs -o name=admin,secretfile=secret

# df -h # 实际容量由于三备份会大约为1/3
Filesystem           Size  Used Avail Use% Mounted on
tmpfs                748M  2.2M  746M   1% /run
/dev/vda1             18G  4.8G   13G  28% /
tmpfs                3.7G   16K  3.7G   1% /dev/shm
tmpfs                5.0M     0  5.0M   0% /run/lock
/dev/vda16           881M   61M  758M   8% /boot
/dev/vda15           105M  6.1M   99M   6% /boot/efi
tmpfs                748M   12K  748M   1% /run/user/0
tmpfs                748M   12K  748M   1% /run/user/1000
10.244.0.228:6789:/   18G     0   18G   0% /mnt/cephfs

# sudo ceph -s
  cluster:
    id:     f020f9e9-f8da-11ef-9430-4eecb663651b
    health: HEALTH_OK

  services:
    mon: 1 daemons, quorum ubuntu-vm-2404-test-1 (age 4h)
    mgr: ubuntu-vm-2404-test-1.suvfjb(active, since 4h), standbys: ubuntu-vm-2404-test-2.ppaxtx
    mds: 1/1 daemons up
    osd: 6 osds: 6 up (since 59m), 6 in (since 59m)

  data:
    volumes: 1/1 healthy
    pools:   3 pools, 145 pgs
    objects: 24 objects, 585 KiB
    usage:   461 MiB used, 56 GiB / 57 GiB avail
    pgs:     145 active+clean

1.8 客户端

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# 先安装客户端
sudo apt install ceph-common
# 复制密钥和配置,以下两条在服务器端执行
sudo scp /etc/ceph/ceph.conf ethereal@10.24.0.118:/home/ethereal/Downloads/ceph
sudo scp /etc/ceph/ceph.client.admin.keyring ethereal@10.244.0.118:/home/ethereal/Downloads/ceph
# 以下客户端执行
sudo cp ceph.conf /etc/ceph/
sudo cp ceph.client.admin.keyring /etc/ceph
echo "AQCpx8Zn2nTWMxAAvqX4K3Limi6qYmqh9XKTsw==" > secret # 密钥来自于ceph.client.admin.keyring
# 客户端挂载
sudo mount -t ceph :/ /mnt/cephfs -o name=admin # 优先挂载v2版本,推荐v2方式,在速度与安全性方面都有提升,https://www.bookstack.cn/read/ceph-en/de5b43971cfd01ae.md#msgr2-protocol

apt install s3cmd
s3cmd --configure
# 1. 使用终端完成配置
# Access Key:刚才创建的radosgw user的access_key
# Secret Key:刚才创建的radosgw user的secret_key
# Default Region:默认直接回车,使用US
# S3 Endpoint:IP地址:port,例如“192.168.64.128:80”
# DNS-style bucket+hostname:“bootstrap_host_ip:80/%(bucket)s”,如"192.168.64.128:80/%(bucket)s"
# Encryption password:默认直接回车,不需要密码
# Path to GPG program [/usr/bin/gpg]:默认直接回车
# Use HTTPS protocol [No]: no,不使用HTTPS
# HTTP Proxy server name: 默认直接回车
# Test access with supplied credentials? [Y/n] 默认直接回车
# 2. 最后保存设置,会生成/root/.s3cfg文件
# 3. 修改刚生成的/root/.s3cfg中的三处配置
# cloudfront_host = [serverIP](改成自己的服务端的IP)
# host_base = [serverIP]:[Port](改成自己的服务端的IP和端口)
# host_bucket = [serverIP]:[Port]/%(bucket)(改成自己的服务端的IP和端口)
s3cmd ls
s3cmd mb s3://default-bucket # 创建bucket
s3cmd mb s3://default-bucket -v # debug模式
s3cmd put values.yaml s3://default-bucket/values.yaml # 上传文件


# rbd使用
## map到本地,其中keyfile内容只有key,例如AQDgb+pnZNLsNhAA2J83AIzrzFDB1AlYGjCoAQ==
rbd map -p rbd_pool --image ceph-rbd-demo.img
rbd --id admin -m 10.144.96.10:3300,10.144.96.11:3300,10.144.96.12:3300 --keyfile=***stripped*** map rbd_pool/ceph-rbd-demo.img --device-type krbd --options noudev
## 卸载设备
rbd device unmap -p rbd_pool --image ceph-rbd-demo.img

1.9 设置时间

1
2
3
4
5
6
7
8
9
10
11
12
# (服务端,所有节点)
# 启用时间同步
timedatectl set-ntp true
# 设置时区 Asia/Shanghai
timedatectl set-timezone Asia/Shanghai
# 查看状态
timedatectl status


# (服务端,单个节点)
ceph config set mon mon_clock_drift_allowed 0.5
ceph config set mon mon_clock_drift_warn_backoff 10

1.10 下线

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
# 下线OSD
# 删除守护进程
ceph orch daemon rm osd.0 --force
# 删除crush图节点
ceph osd crush remove osd.0
# 剔除osd
ceph osd down osd.0
ceph osd out osd.0
ceph auth del osd.0
# 删除osd
ceph osd rm osd.0

# 擦除数据,必须擦除数据后才可重新加入集群
wipefs -af /dev/sdb
ceph orch device zap k10 /dev/sdb --force


# 下线pool
# 删除所有相关mds
ceph orch rm mds.infra-meta
# 标记pool为fail
ceph fs fail cephfs-infra
# 查看当前fs状态
ceph fs status
# 删除pool
ceph config set mon mon_allow_pool_delete true
ceph osd pool rm infra-meta infra-meta --yes-i-really-really-mean-it

删除fs

1
2
3
4
5
6
ceph fs fail test_cephfs
ceph fs rm test_cephfs --yes-i-really-mean-it
ceph osd pool application disable cephfs_data_pool cephfs --yes-i-really-mean-it
ceph config set mon mon_allow_pool_delete true
ceph osd pool rm cephfs_data_pool cephfs_data_pool --yes-i-really-really-mean-it
ceph osd pool rm cephfs_meta_pool cephfs_meta_pool --yes-i-really-really-mean-it

1.11 分层缓存

缓存模式可以参考ceph 缓存分层 - 知乎

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
# 创建一个缓存层
ceph osd tier add cold-storage hot-storage
# 设置缓存模式
ceph osd tier cache-mode hot-storage writeback
# 将客户端的流量从存储池重定向到缓存池
ceph osd tier set-overlay cold-storage hot-storage

ceph osd pool set {cachepool} hit_set_type bloom
ceph osd pool set {cachepool} hit_set_count 1
ceph osd pool set {cachepool} hit_set_period 300 # 300s 后触发hitset
ceph osd pool set {cachepool} target_max_bytes 1000000000 # 1G
ceph osd pool set {cachepool} target_max_objects 100 # 100个objects后触发下刷
ceph osd pool set {cachepool} cache_min_flush_age 300 # 300s 后触发下刷
ceph osd pool set {cachepool} cache_min_evict_age 300 # 300s 后触发下刷
ceph osd pool set {cachepool} cache_target_dirty_ratio 0.01
ceph osd pool set {cachepool} cache_target_full_ratio 0.02

1.12 划分OSD

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# 导出原本的osd map
ceph osd getcrushmap -o ./tmp/crushmap.ori
# 反编译osd map
crushtool -d crushmap.ori -o decrushmap.ori
# 定义bucket
root hdd {
id -21 # do not change unnecessarily
id -22 class hdd # do not change unnecessarily
# weight 1.935
alg straw2
hash 0 # rjenkins1
item osd.0 weight 0.488
item osd.1 weight 0.488
item osd.2 weight 0.488
}

# 桶层次:type 0 osd,type 1 host,type 2 chassis,type 3 rack,type 4 row,type 5 pdu,type 6 pod,type 7 room,type 8 datacenter,type 9 region,type 10 root

# 修改规则
rule ssd{
id 1
type replicated
min_size 1
max_size 10
step take d
step chooseleaf firstn 0 type osd
step emit
}
rule hdd{
id 2
type replicated
min_size 1
max_size 10
step take hdd
step chooseleaf firstn 0 type osd # 这里是说从规则中选取osd
step emit
}
# 编译osd map
crushtool -c decrushmap.new -o crushmap.new
# 导入map
ceph osd setcrushmap -i ./crushmap.new
# 设定某个存储池的规则
ceph osd pool set ssd_pool crush_rule ssd
# 修改ceph.conf防止回滚,在global中加入如下字段
osd_crush_update_on_start=false


# pg手动分配
# 查看当前容量
ceph osd df
# 查看某个osd的占用
ceph pg ls-by-osd osd.3 | egrep ^1
# 移动pg,从3到2
ceph osd pg-upmap-items 1.77 3 2

1.13 崩溃恢复

Ceph集群显示XXX daemons have recently crashed警告-CSDN博客

1
2
3
4
5
6
# 列出崩溃信息
ceph crash ls-new
# 归档新的崩溃记录
ceph crash archive-all
# 集群信息存储位置,在所有节点上都存在
cd /var/lib/ceph/<cluster-id>

镜像008 Ceph集群数据同步 - 梦中泪 - 博客园

备份如何备份和恢复Ceph集群的配置和数据?请分别提供备份和恢复的步骤。 | 壹梵在线网络服务 一凡在线

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
备份集群配置文件:

# 备份Ceph配置文件
cp /etc/ceph/* {备份目录}
备份MON(监控节点)的数据:

# 备份MON的数据
ceph mon dump --cluster {集群名} --format json > {备份目录}/mon_dump.json
备份OSD(对象存储守护进程)的数据:

# 备份OSD的数据
ceph osd dump --cluster {集群名} --format json > {备份目录}/osd_dump.json
备份RGW(对象网关)的数据(如果有):

# 备份RGW的数据
radosgw-admin --cluster {集群名} backup export --file {备份目录}/rgw_backup.bin
备份MDS(元数据服务器)的数据(如果有):

# 备份MDS的数据
ceph fs dump --cluster {集群名} --format json > {备份目录}/fs_dump.json
ceph mds getmap -o {备份目录}/mdsmap.bin


恢复集群配置文件:

# 恢复Ceph配置文件
cp {备份目录}/* /etc/ceph/
如果之前的集群已被清空或者不可用,可以重新初始化集群:

# 重新初始化Ceph集群
ceph-deploy new {MON节点,多个节点以逗号分隔}
ceph-deploy install {MON节点,多个节点以逗号分隔}
ceph-deploy mon create-initial
恢复MON的数据:

# 恢复MON的数据
ceph-mon --cluster {集群名} --mkfs -i {MON节点} --keyring /etc/ceph/{集群名}.mon.{MON节点}.keyring
ceph-mon --cluster {集群名} -i {MON节点}
恢复OSD的数据:

# 恢复OSD的数据
ceph-osd --cluster {集群名} --mkfs -i {OSD节点} --osd-data /var/lib/ceph/osd/{集群名}-{OSD节点}
ceph-osd --cluster {集群名} -i {OSD节点}
恢复RGW的数据(如果有):

# 恢复RGW的数据
radosgw-admin --cluster {集群名} backup import --file {备份目录}/rgw_backup.bin
恢复MDS的数据(如果有):

# 恢复MDS的数据
ceph-mds --cluster {集群名} --mkfs -i {MDS节点} --keyring /etc/ceph/{集群名}.mds.{MDS节点}.keyring
ceph-mds --cluster {集群名} -i {MDS节点}
ceph fs new {文件系统名称} {MDS节点1} {MDS节点2}
ceph osd pool create cephfs_metadata 8
ceph osd pool create cephfs_data 8

恢复速度设置

https://docs.ceph.com/en/latest/rados/configuration/osd-config-ref/#recovery

ceph数据recovery配置策略(数据recovery流量控制) - 钟桂耀 - 博客园

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# 业务优先
ceph tell osd.* injectargs '--osd-max-backfills 1 --osd-recovery-max-active 1 --osd-recovery-max-single-start 1'
ceph tell osd.* injectargs '--osd-recovery-sleep 1'

# 恢复优先
ceph tell osd.* injectargs '--osd-max-backfills 5 --osd-recovery-max-active 5 --osd-recovery-max-single-start 5'
ceph tell osd.* injectargs '--osd-recovery-sleep 0'

# 以上为临时设置,永久设置需要修改config
ceph config set osd osd_recovery_max_active 10
ceph config set osd osd_max_backfills 10
ceph config set osd_recovery_max_single_start 1

# 需要注意,新的osd加入时,会采用默认设置
ceph config get osd osd_recovery_max_active

# 查看当前运行时配置
ceph tell osd.0 config show | grep recovery


osd_max_backfills : 一个osd上最多能有多少个pg同时做backfill。其中osd出去的最大backfill数量为osd_max_backfills ,osd进来的最大backfill数量也是osd_max_backfills ,所以每个osd最大的backfill数量为osd_max_backfills * 2;
osd_recovery_sleep: 出队列后先Sleep一段时间,拉长两个Recovery的时间间隔;
osd_recovery_max_active: 每个OSD上同时进行的所有PG的恢复操作(active recovery)的最大数量;(注意是恢复操作,不是恢复PG数,因此会收到下面参数的影响)
osd_recovery_max_single_start: OSD在某个时刻会为一个PG启动恢复操作数;


osd_max_backfills:默认值10. 一个osd上承载了多个pg。可能很多pg都需要做第二种recovery,即backfill。 设定这个参数来指明在一个osd上最多能有多少个pg同时做backfill。
osd_recovery_max_active:默认值15. 一个osd上可以承载多个pg, 可能好几个pg都需要recovery,这个值限定该osd最多同时有多少pg做recovery。
osd_recovery_max_single_start:默认值5. 这个值限定了每个pg可以启动recovery操作的最大数。
osd_recovery_max_chunk: 默认值8388608. 设置恢复数据块的大小,以防网络阻塞
osd_recovery_op_priority: 默认值10. osd修复操作的优先级, 可小于该值
osd_recovery_sleep: 默认值0. revocery的间隔

模拟坏盘

1
2
3
4
5
6
7
8
9
10
11
12
13
14
# 查看当前所在位置
ll /sys/block/sdc # 输出包含host0
# 模拟删除
echo 1 > /sys/block/sdc/device/delete
# 如果集群有写入,对应的 OSD 就很快 down 掉了


# 恢复
echo '- - -' > /sys/class/scsi_host/host0/scan
# 磁盘编号会改变,因此必须删除数据后重新加入osd(参考上面下线过程)


# 查看pg状态
ceph pg dump | grep recover

1.14 修改rgw的存储池

RGW池放置和存储类(Octopus版本) - Varden - 博客园

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# 获取zonegroup
radosgw-admin zonegroup get
# 获取zone
radosgw-admin zone get
# 向区域组default中添加placement temporary
radosgw-admin zonegroup placement add \
--rgw-zonegroup default \
--placement-id temporary
# 向区域中添加placement细节,引用它所属区域组中的placement temporary
radosgw-admin zone placement add \
--rgw-zone default \
--placement-id temporary \
--data-pool default.rgw.temporary.data \
--index-pool default.rgw.temporary.index \
--data-extra-pool default.rgw.temporary.non-ec
# 向区域组中default-placement的placement添加storage-class
radosgw-admin zonegroup placement add \
--rgw-zonegroup default \
--placement-id default-placement \
--storage-class COLD
# 向区域中添加storageclass细节,指定data-pool
radosgw-admin zone placement add \
--rgw-zone default \
--placement-id default-placement \
--storage-class COLD \
--data-pool default.rgw.cold.data \
--compression lz4
# 设定区域组 default默认放置目标
radosgw-admin zonegroup placement default \
--rgw-zonegroup default \
--placement-id new-placement


# 创建bucket时指定placement rule
# 利用--bucket-location覆盖用户的default_placement
s3cmd mb s3://second --bucket-location=":default-placement"

1.15 升级

Ceph Releases (index) — Ceph Documentation

使用 Cephadm 升级 CEPH - Varden - 博客园

1.16 故障处理

2. 常见 OSD 故障处理 · Ceph 运维手册

1
2
3
4
5
6
apt remove ceph-osd
systemctl restart ceph.target
ceph orch daemon restart osd.5
# docker中磁盘映射关系:
docker inspect -f "{{.Mounts}}" 369a1376f78c
[{bind /sys /sys true rprivate} {bind /run/lock/lvm /run/lock/lvm true rprivate} {bind /var/log/ceph/e30eab96-fa62-11ef-8818-246e96a3ad74 /var/log/ceph z true rprivate} {bind /run/udev /run/udev true rprivate} {bind /dev /dev true rprivate} {bind /run/lvm /run/lvm true rprivate} {bind / /rootfs true rslave} {bind /var/lib/ceph/e30eab96-fa62-11ef-8818-246e96a3ad74/osd.0 /var/lib/ceph/osd/ceph-0 z true rprivate} {bind /var/lib/ceph/e30eab96-fa62-11ef-8818-246e96a3ad74/osd.0/config /etc/ceph/ceph.conf z true rprivate} {bind /var/run/ceph/e30eab96-fa62-11ef-8818-246e96a3ad74 /var/run/ceph z true rprivate} {bind /var/lib/ceph/e30eab96-fa62-11ef-8818-246e96a3ad74/crash /var/lib/ceph/crash z true rprivate}]

1.17 配额

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
# 设置用户配额
radosgw-admin quota set --quota-scope=user --uid=uat --max-objects=10 --max-size=1024

# 设置bucket配额
radosgw-admin quota set --uid=uat --quota-scope=bucket --max-objects=10 --max-size=1024

# 启用禁用用户配额
radosgw-admin quota enable --quota-scope=user --uid=uat
radosgw-admin quota-disable --quota-scope=user --uid=uat

# 启用bucket配额
radosgw-admin quota enable --quota-scope=bucket --uid=uat
radosgw-admin quota-disable --quota-scope=bucket --uid=uat

# 获取配额信息
radosgw-admin user info --uid=uat

# 获取存储池配额
ceph osd pool get-quota test_map

# 设置存储池配额
ceph osd pool set-quota <poolname> max_bytes size

1.18 测试

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
写:必须先执行写并且添加--no-cleanup才可以执行读
rados bench -p rbd 10 write --no-cleanup

顺序读:
rados bench -p rbd 10 seq

随机读:
rados bench -p rbd 10 rand

删除rados bench命令创建的数据:
rados -p rbd cleanup

查看磁盘io:
`iotop -P`


cur 是current的缩写
cur MB/s 当前速度
avg MB/s 平均速度
Bandwidth (MB/sec): 吞吐量
Average IOPS: 平均iops
Stddev IOPS: 标准偏差
Average Latency(s): 平均延迟

测试结果:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# hdd write
Total time run: 10.3334
Total writes made: 559
Write size: 4194304
Object size: 4194304
Bandwidth (MB/sec): 216.387
Stddev Bandwidth: 10.8403
Max bandwidth (MB/sec): 232
Min bandwidth (MB/sec): 204
Average IOPS: 54
Stddev IOPS: 2.71006
Max IOPS: 58
Min IOPS: 51
Average Latency(s): 0.294018
Stddev Latency(s): 0.194231
Max latency(s): 0.892445
Min latency(s): 0.024855


# hdd read
Total time run: 2.664
Total reads made: 559
Read size: 4194304
Object size: 4194304
Bandwidth (MB/sec): 839.338
Average IOPS: 209
Stddev IOPS: 73.5391
Max IOPS: 283
Min IOPS: 179
Average Latency(s): 0.0730044
Max latency(s): 0.495011
Min latency(s): 0.00412251

# ssd write
Total time run: 10.0774
Total writes made: 1787
Write size: 4194304
Object size: 4194304
Bandwidth (MB/sec): 709.311
Stddev Bandwidth: 60.0577
Max bandwidth (MB/sec): 784
Min bandwidth (MB/sec): 608
Average IOPS: 177
Stddev IOPS: 15.0144
Max IOPS: 196
Min IOPS: 152
Average Latency(s): 0.0899146
Stddev Latency(s): 0.0832786
Max latency(s): 1.12338
Min latency(s): 0.0246071


# ssd read
Total time run: 5.86502
Total reads made: 1787
Read size: 4194304
Object size: 4194304
Bandwidth (MB/sec): 1218.75
Average IOPS: 304
Stddev IOPS: 27.335
Max IOPS: 312
Min IOPS: 244
Average Latency(s): 0.0513641
Max latency(s): 0.241155
Min latency(s): 0.00696541


# cache write
Total time run: 10.0538
Total writes made: 1939
Write size: 4194304
Object size: 4194304
Bandwidth (MB/sec): 771.446
Stddev Bandwidth: 61.3638
Max bandwidth (MB/sec): 836
Min bandwidth (MB/sec): 660
Average IOPS: 192
Stddev IOPS: 15.3409
Max IOPS: 209
Min IOPS: 165
Average Latency(s): 0.0827728
Stddev Latency(s): 0.0337953
Max latency(s): 0.290505
Min latency(s): 0.0306568


# cache read
Total time run: 6.85431
Total reads made: 1939
Read size: 4194304
Object size: 4194304
Bandwidth (MB/sec): 1131.55
Average IOPS: 282
Stddev IOPS: 40.6596
Max IOPS: 349
Min IOPS: 228
Average Latency(s): 0.0553511
Max latency(s): 0.252595
Min latency(s): 0.00646825

1.19 修改监控

查看当前地址

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
# ceph config get mgr
WHO MASK LEVEL OPTION VALUE
RO
mgr advanced container_image quay.io/ceph/ceph@sha256:41d3f5e46ff7de28544cc8869fdea13fca824dcef83936cb3288ed9de935e4de *
mgr advanced mgr/cephadm/container_init True
*
mgr advanced mgr/cephadm/migration_current 7
*
mgr advanced mgr/dashboard/ALERTMANAGER_API_HOST http://cluster.svc:9093
*
mgr advanced mgr/dashboard/GRAFANA_API_SSL_VERIFY false
*
mgr advanced mgr/dashboard/GRAFANA_API_URL https://cluster.svc:3000

mgr advanced mgr/dashboard/PROMETHEUS_API_HOST http://cluster.svc:9095
*
mgr advanced mgr/dashboard/RGW_API_ACCESS_KEY {"default": "HKCWLKQKXS1G1L2BV7A4"}
*
mgr advanced mgr/dashboard/RGW_API_SECRET_KEY {"default": "qGCwWDjirgdCLzoIOhdtJvMnWiPqOWwIaGoZofSw"} *
mgr advanced mgr/dashboard/ssl_server_port 8443
*
global basic mgr/orchestrator/orchestrator cephadm

修改地址

1
2
3
4
5
6
7
ceph dashboard set-alertmanager-api-host https://cluster.svc:9093
ceph dashboard set-grafana-api-url https://cluster.svc:3000
ceph dashboard set-prometheus-api-host https://cluster.svc:9095
# 或者
ceph config set mgr mgr/dashboard/ALERTMANAGER_API_HOST https://cluster.svc:9093
ceph config set mgr mgr/dashboard/GRAFANA_API_URL https://cluster.svc:3000
ceph config set mgr mgr/dashboard/PROMETHEUS_API_HOST https://cluster.svc:9095

2. 部署到k8s

2.0 准备工作(服务端执行)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
# 确保ceph正常运行
ceph -s
# 确保存在mon
ceph mon dump
# 获取key
ceph auth get client.admin
# 创建data池
ceph osd pool create test 8 8
# 创建元数据池
ceph osd pool create cephfs_metadata 8 8
# 关联元数据与data
ceph fs new cephfs cephfs_metadata test
# 创建子卷
ceph fs subvolumegroup create cephfs csi
# volume代表一个文件系统卷,subvolume可以理解成volume下的文件夹
# volumegroup与subvolumegroup可以对volume或subvolume进行方便的权限管理
# https://elrond.wang/2021/08/16/CephFS-subvolume/

子卷调整配额

管理CephFS:创建、删除及操作子卷、快照和子卷组-CSDN博客

4.3. Ceph 文件系统子卷 | Red Hat Product Documentation

2.1 下载csi

1
2
git clone git@github.com:ceph/ceph-csi.git --depth=1
cd ceph-csi/deploy/cephfs/kubernetes

2.2 修改配置文件

2.2.1 修改conf

其中,clusterID是集群ID,可以通过在服务器端命令ceph -s获得,mon信息可以通过命令ceph mon dump获得

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
# cat csi-config-map.yaml
---
apiVersion: v1
kind: ConfigMap
data:
config.json: |-
[
{
"clusterID": "c7b4xxf7-c61e-4668-9xx0-82c9xx5e3696",
"monitors": [
"xxx.xxx.xxx.xxx:3300", # v2方式
"xxx.xxx.xxx.xxx:6789" # v1方式
]
}
]
metadata:
name: ceph-csi-config
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
# sudo ceph -s
cluster:
id: f020f9e9-f8da-11ef-9430-4eecb663651b
health: HEALTH_OK

services:
mon: 1 daemons, quorum ubuntu-vm-2404-test-1 (age 22h)
mgr: ubuntu-vm-2404-test-1.suvfjb(active, since 22h), standbys: ubuntu-vm-2404-test-2.ppaxtx
mds: 1/1 daemons up
osd: 6 osds: 6 up (since 19h), 6 in (since 19h)

data:
volumes: 1/1 healthy
pools: 3 pools, 145 pgs
objects: 278 objects, 1001 MiB
usage: 3.4 GiB used, 53 GiB / 57 GiB avail
pgs: 145 active+clean
1
2
3
4
5
6
7
8
9
# sudo ceph mon dump
epoch 1
fsid f020f9e9-f8da-11ef-9430-4eecb663651b
last_changed 2025-03-04T09:28:10.173718+0000
created 2025-03-04T09:28:10.173718+0000
min_mon_release 19 (squid)
election_strategy: 1
0: [v2:10.244.0.228:3300/0,v1:10.244.0.228:6789/0] mon.ubuntu-vm-2404-test-1
dumped monmap epoch 1

2.2.2 修改secret

其中,userKey和adminKey都可通过命令ceph auth get client.admin获得

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
# cat secret.yaml
---
apiVersion: v1
kind: Secret
metadata:
name: csi-cephfs-secret
namespace: default
stringData:
# Required for statically provisioned volumes
userID: admin
userKey: AQBg4llf+9CAGdsAds4tQzS+0O7dscB5ZTiTEQ==

# Required for dynamically provisioned volumes
adminID: admin
adminKey: AQBg4llf+9CAGdsAds4tQzS+0O7dscB5ZTiTEQ==
1
2
3
4
5
6
7
# sudo ceph auth get client.admin
[client.admin]
key = AQCpx8Zn2nTWMxAAvqX4K3Limi6qYmqh9XKTsw==
caps mds = "allow *"
caps mgr = "allow *"
caps mon = "allow *"
caps osd = "allow *"

如果是rbd,那么类似

1
2
3
4
5
6
7
8
9
10
11
12
apiVersion: v1
kind: Secret
metadata:
name: csi-rbd-secret
stringData:
# Required for statically provisioned volumes
userID: admin
userKey: AQDDv9pnTzbFBhAAPal5qxNBNq3KFMRXbaWvMg==

# Required for dynamically provisioned volumes
adminID: admin
adminKey: AQDDv9pnTzbFBhAAPal5qxNBNq3KFMRXbaWvMg==

2.2.3 创建剩余的配置文件

1
2
3
4
5
6
7
8
9
vim csi-config-map-kms.yaml
---
apiVersion: v1
kind: ConfigMap
data:
config.json: |-
{}
metadata:
name: ceph-csi-encryption-kms-config

2.2.4 创建sc

其中,fsName是文件系统名称,pool要对应到一个数据类型的池中,clusterID与前面配置一致,mountOptions要去掉,不然pod会挂载不上,reclaimPolicy表示当pod删除后对应的文件是否要删除。添加kernelMountOptions: ms_mode=prefer-crc后即可启用v2连接。

用法详见ceph-csi/examples/cephfs/storageclass.yaml at devel · ceph/ceph-csi · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
# cat storageclass.yaml
---
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: csi-cephfs-sc
provisioner: cephfs.csi.ceph.com
parameters:
clusterID: c7b43ef7-c61e-4668-9970-82c9775e3696
fsName: cephfs
pool: test
kernelMountOptions: ms_mode=prefer-crc
csi.storage.k8s.io/provisioner-secret-name: csi-cephfs-secret
csi.storage.k8s.io/provisioner-secret-namespace: default
csi.storage.k8s.io/controller-expand-secret-name: csi-cephfs-secret
csi.storage.k8s.io/controller-expand-secret-namespace: default
csi.storage.k8s.io/node-stage-secret-name: csi-cephfs-secret
csi.storage.k8s.io/node-stage-secret-namespace: default
reclaimPolicy: Delete
allowVolumeExpansion: true
# mountOptions:
# - discard

rbd创建的sc如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
---
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: csi-rbd-sc
provisioner: rbd.csi.ceph.com
# If topology based provisioning is desired, delayed provisioning of
# PV is required and is enabled using the following attribute
# For further information read TODO<doc>
# volumeBindingMode: WaitForFirstConsumer
parameters:
# (required) String representing a Ceph cluster to provision storage from.
# Should be unique across all Ceph clusters in use for provisioning,
# cannot be greater than 36 bytes in length, and should remain immutable for
# the lifetime of the StorageClass in use.
# Ensure to create an entry in the configmap named ceph-csi-config, based on
# csi-config-map-sample.yaml, to accompany the string chosen to
# represent the Ceph cluster in clusterID below
clusterID: 06be027c-04c2-11f0-ace2-246e96a3ad74

# (optional) If you want to use erasure coded pool with RBD, you need to
# create two pools. one erasure coded and one replicated.
# You need to specify the replicated pool here in the `pool` parameter, it is
# used for the metadata of the images.
# The erasure coded pool must be set as the `dataPool` parameter below.
# dataPool: <ec-data-pool>

# (required) Ceph pool into which the RBD image shall be created
# (optional) If the topologyConstrainedPools is provided
# eg: pool: rbdpool
pool: rbd_pool

# (optional) RBD image features, CSI creates image with image-format 2 CSI
# RBD currently supports `layering`, `journaling`, `exclusive-lock`,
# `object-map`, `fast-diff`, `deep-flatten` features.
# Refer https://docs.ceph.com/en/latest/rbd/rbd-config-ref/#image-features
# for image feature dependencies.
# imageFeatures: layering,journaling,exclusive-lock,object-map,fast-diff
imageFeatures: "layering"

# (optional) Options to pass to the `mkfs` command while creating the
# filesystem on the RBD device. Check the man-page for the `mkfs` command
# for the filesystem for more details. When `mkfsOptions` is set here, the
# defaults will not be used, consider including them in this parameter.
#
# The default options depend on the csi.storage.k8s.io/fstype setting:
# - ext4: "-m0 -Enodiscard,lazy_itable_init=1,lazy_journal_init=1"
# - xfs: "-K"
#
# mkfsOptions: "-m0 -Ediscard -i1024"

# (optional) Specifies whether to try other mounters in case if the current
# mounter fails to mount the rbd image for any reason. True means fallback
# to next mounter, default is set to false.
# Note: tryOtherMounters is currently useful to fallback from krbd to rbd-nbd
# in case if any of the specified imageFeatures is not supported by krbd
# driver on node scheduled for application pod launch, but in the future this
# should work with any mounter type.
# tryOtherMounters: false

# (optional) mapOptions is a comma-separated list of map options.
# For krbd options refer
# https://docs.ceph.com/docs/master/man/8/rbd/#kernel-rbd-krbd-options
# For nbd options refer
# https://docs.ceph.com/docs/master/man/8/rbd-nbd/#options
# Format:
# mapOptions: "<mounter>:op1,op2;<mounter>:op1,op2"
# An empty mounter field is treated as krbd type for compatibility.
# eg:
# mapOptions: "krbd:lock_on_read,queue_depth=1024;nbd:try-netlink"

# (optional) unmapOptions is a comma-separated list of unmap options.
# For krbd options refer
# https://docs.ceph.com/docs/master/man/8/rbd/#kernel-rbd-krbd-options
# For nbd options refer
# https://docs.ceph.com/docs/master/man/8/rbd-nbd/#options
# Format:
# unmapOptions: "<mounter>:op1,op2;<mounter>:op1,op2"
# An empty mounter field is treated as krbd type for compatibility.
# eg:
# unmapOptions: "krbd:force;nbd:force"

# The secrets have to contain Ceph credentials with required access
# to the 'pool'.
csi.storage.k8s.io/provisioner-secret-name: csi-rbd-secret
csi.storage.k8s.io/provisioner-secret-namespace: ceph-storage
csi.storage.k8s.io/controller-expand-secret-name: csi-rbd-secret
csi.storage.k8s.io/controller-expand-secret-namespace: ceph-storage
csi.storage.k8s.io/node-stage-secret-name: csi-rbd-secret
csi.storage.k8s.io/node-stage-secret-namespace: ceph-storage

# (optional) Specify the filesystem type of the volume. If not specified,
# csi-provisioner will set default as `ext4`.
csi.storage.k8s.io/fstype: ext4

# (optional) uncomment the following to use rbd-nbd as mounter
# on supported nodes
# mounter: rbd-nbd

# (optional) ceph client log location, eg: rbd-nbd
# By default host-path /var/log/ceph of node is bind-mounted into
# csi-rbdplugin pod at /var/log/ceph mount path. This is to configure
# target bindmount path used inside container for ceph clients logging.
# See docs/design/proposals/rbd-nbd.md for available configuration options.
# cephLogDir: /var/log/ceph

# (optional) ceph client log strategy
# By default, log file belonging to a particular volume will be deleted
# on unmap, but you can choose to just compress instead of deleting it
# or even preserve the log file in text format as it is.
# Available options `remove` or `compress` or `preserve`
# cephLogStrategy: remove

# (optional) Prefix to use for naming RBD images.
# If omitted, defaults to "csi-vol-".
# volumeNamePrefix: "foo-bar-"

# (optional) Instruct the plugin it has to encrypt the volume
# By default it is disabled. Valid values are "true" or "false".
# A string is expected here, i.e. "true", not true.
# encrypted: "true"

# (optional) Select the encryption type when encrypted: "true" above.
# Valid values are:
# "file": Enable file encryption on the mounted filesystem
# "block": Encrypt RBD block device
# When unspecified assume type "block". "file" and "block" are
# mutually exclusive.
# encryptionType: "block"

# (optional) Use external key management system for encryption passphrases by
# specifying a unique ID matching KMS ConfigMap. The ID is only used for
# correlation to configmap entry.
# encryptionKMSID: <kms-config-id>

# Add topology constrained pools configuration, if topology based pools
# are setup, and topology constrained provisioning is required.
# For further information read TODO<doc>
# topologyConstrainedPools: |
# [{"poolName":"pool0",
# "dataPool":"ec-pool0" # optional, erasure-coded pool for data
# "domainSegments":[
# {"domainLabel":"region","value":"east"},
# {"domainLabel":"zone","value":"zone1"}]},
# {"poolName":"pool1",
# "dataPool":"ec-pool1" # optional, erasure-coded pool for data
# "domainSegments":[
# {"domainLabel":"region","value":"east"},
# {"domainLabel":"zone","value":"zone2"}]},
# {"poolName":"pool2",
# "dataPool":"ec-pool2" # optional, erasure-coded pool for data
# "domainSegments":[
# {"domainLabel":"region","value":"west"},
# {"domainLabel":"zone","value":"zone1"}]}
# ]

# Image striping, Refer https://docs.ceph.com/en/latest/man/8/rbd/#striping
# For more details
# (optional) stripe unit in bytes.
# stripeUnit: <>
# (optional) objects to stripe over before looping.
# stripeCount: <>
# (optional) The object size in bytes.
# objectSize: <>

# rbd volume QoS.
# QoS provides settings for rbd volume read/write iops
# and read/write bandwidth. There are 4 base qos parameters
# among them, when users apply for a volume capacity equal
# to or less than BaseVolSizebytes, use base qos limit.
# For the portion of capacity exceeding BaseVolSizebytes,
# QoS will be increased in steps set per GiB. If the step
# size parameter per GiB is not provided, only base QoS limit
# will be used and not associated with capacity size.
#
# note: currently supports rbd-nbd mounter.
#
# For more details
# (optional) the base limit of read operations per second.
# BaseReadIops: <>
# (optional) the base limit of write operations per second.
# BaseWriteIops: <>
# (optional) the base limit of read bytes per second.
# BaseReadBytesPerSecond: <>
# (optional) the base limit of write bytes per second.
# BaseWriteBytesPerSecond: <>
# (optional) the limit of read operations per GiB.
# ReadIopsPerGiB: <>
# (optional) the limit of write operations per GiB.
# WriteIopsPerGiB: <>
# (optional) the limit of read bytes per GiB.
# ReadBpsPerGiB: <>
# (optional) the limit of write bytes per GiB.
# WriteBpsPerGiB: <>
# (optional) min size of volume what use to calc qos beased on capacity.
# BaseVolSizeBytes:<>
reclaimPolicy: Delete
allowVolumeExpansion: true

# If filesystem is xfs, nouuid will be automatically added to mountOptions
mountOptions:
- discard

2.2.5 创建pvc

1
2
3
4
5
6
7
8
9
10
11
12
# vim pvc.yaml
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: csi-cephfs-pvc
spec:
accessModes:
- ReadWriteMany
resources:
requests:
storage: 1Gi
storageClassName: csi-cephfs-sc

2.2.6 应用

1
2
3
4
5
6
7
8
cd ceph-csi/deploy/cephfs/kubernetes
k apply -f ../../ceph-conf.yaml # 必须应用此文件
k apply -f ./ # 应用全部创建csi,包括deployment等
k apply -f csi-config-map.yaml
k apply -f csi-config-map-kms.yaml
k apply -f secret.yaml
k apply -f storageclass.yaml
k apply -f pvc.yaml

2.2.7 创建pod

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
# cat pod.yaml
---
apiVersion: v1
kind: Pod
metadata:
name: csi-cephfs-demo-pod
spec:
containers:
- name: web-server
image: nginx
volumeMounts:
- name: mypvc
mountPath: /var/lib/www
volumes:
- name: mypvc
persistentVolumeClaim:
claimName: csi-cephfs-pvc
readOnly: false

应用,即可看到挂载。对应的ceph路径为/volumes/csi/csi-vol-035561d6-1f49-4477-9c6d-794382609b66/9247952c-12bd-4ecb-8845-1e2ec3bf1066/

1
10.244.0.228:6789:/volumes/csi/csi-vol-035561d6-1f49-4477-9c6d-794382609b66/9247952c-12bd-4ecb-8845-1e2ec3bf1066  1.0G     0  1.0G   0% /var/lib/www

2.3 镜像列表

1
2
3
4
5
registry.k8s.io/sig-storage/csi-snapshotter:v8.2.0
registry.k8s.io/sig-storage/csi-resizer:v1.13.1
registry.k8s.io/sig-storage/csi-provisioner:v5.1.0
registry.k8s.io/sig-storage/csi-node-driver-registrar:v2.13.0
quay.io/cephcsi/cephcsi:canary

2.4 负载均衡

2.4.1 保证每台节点均有rgw与mds服务

2.4.2 修改coredns

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
kubectl edit configmap coredns -n kube-system
# 加入如下中的hosts段:
Corefile: |
.:53 {
errors
health {
lameduck 5s
}
hosts {
10.144.96.10 k10 postgres.service.com s3.service.com
10.144.96.11 k11 postgres.service.com s3.service.com
10.144.96.12 k12 postgres.service.com s3.service.com
fallthrough
}
ready
kubernetes cluster.local in-addr.arpa ip6.arpa {
pods insecure
fallthrough in-addr.arpa ip6.arpa
ttl 30
}

2.5 选型

2.5.1 Cephfs

  • 优点

    • 读取延迟低,I/O带宽表现良好,尤其是block size较大一些的文件

    • 灵活度高,支持k8s的所有接入模式

  • 缺点

    • 写入延迟相对较高且延迟时间不稳定
  • 适用场景

    • 适用于要求灵活度高(支持k8s多节点挂载特性),对I/O延迟不甚敏感的文件读写操作,以及非海量的小文件存储支持.例如作为常用的应用/中间件挂载存储后端.

2.5.2 Ceph RBD

  • 优点

    • I/O带宽表现良好

    • 读写延迟都很低

    • 支持镜像快照,镜像转储

  • 缺点

    • 不支持多节点挂载
  • 适用场景

    • 对I/O带宽和延迟要求都较高,且无多个节点同时读写数据需求的应用,例如数据库

2.5.3 测试

2.5.3.1 可用工具

sysbench

akopytov/sysbench: Scriptable database and system performance benchmark

2.5.3.2 写入一个大文件
  • rbd
1
2
3
4
# dd if=/dev/zero  of=test bs=1M count=2048
2048+0 records in
2048+0 records out
2147483648 bytes (2.1 GB, 2.0 GiB) copied, 1.8003 s, 1.2 GB/s
  • fs
1
2
3
4
# dd if=/dev/zero  of=test bs=1M count=2048
2048+0 records in
2048+0 records out
2147483648 bytes (2.1 GB, 2.0 GiB) copied, 1.86057 s, 1.2 GB/s
2.5.3.3 写入一万个小文件
  • rbd
1
2
3
4
# time seq 10000 | xargs -i dd if=/dev/zero of={}.dat bs=1024 count=1
real 0m12.695s
user 0m2.479s
sys 0m10.661s
  • fs
1
2
3
4
# time seq 10000 | xargs -i dd if=/dev/zero of={}.dat bs=1024 count=1
real 0m20.365s
user 0m3.552s
sys 0m10.236s

3. 其他命令

sudo ceph orch host ls
sudo ceph osd map test sys.txt
sudo ceph orch ls
sudo ceph mds metadata
sudo ceph health detail
sudo ceph -s
sudo ceph orch stop mds.test
sudo rados df
radosgw-admin user create --uid=s3 --display-name="object_storage" --system
ceph orch device ls
sgdisk --zap-all /dev/nvme0n1
fdisk

4. 参考

Ceph分布式存储系统的介绍及详细安装部署过程:详细实战版(保姆级)_ceph存储部署-CSDN博客

K8S使用ceph实现持久化存储 - hovin - 博客园

Ubuntu 22.04 安装 ceph 集群 | 小汪老师

Ubuntu CEPH快速安装-腾讯云开发者社区-腾讯云

Ubuntu22.04LTS基于cephadm快速部署Ceph Reef(18.2.X)集群 -阿里云开发者社区

Ceph集群详细部署配置图文讲解,只要看一遍就能上手(二)【与云原生的故事】-云社区-华为云

Ceph 服务管理之OSD服务 - Varden - 博客园

ceph(五)CephFS部署、使用和MDS高可用实现 - areke - 博客园

【ceph】ceph分布式存储MDS(各种状态、源码)_ceph mds-CSDN博客

为什么ceph没有在新节点上检测到ssd设备?-腾讯云开发者社区-腾讯云

ceph 运维操作-MDS - 简书

2.6. 使用 Ceph Orchestrator 管理 MDS 服务 | Red Hat Product Documentation

9.2. 使用命令行界面部署 MDS 服务 | Red Hat Product Documentation

ceph(五)CephFS部署、使用和MDS高可用实现 - areke - 博客园

在Ubuntu20.04下基于ceph-deploy部署ceph 16.2.10 - cyh00001 - 博客园

Linux | Ceph | Ubuntu 中部署 Ceph 集群 - 隔江千万里 - 博客园

二、Ceph的ceph-deploy部署 - yaowx - 博客园

Ubuntu部署ceph:安装ceph-deploy遇到的问题总汇_ceph-deploy ubuntu系统-CSDN博客

CEPH-1:ceph-deploy离线部署ceph集群及报错解决FAQ - 塔克拉玛攻城狮 - 博客园

Index of /debian-19.2.1/dists/bookworm/

Ceph集群添加MDS — Cloud Atlas: Discovery beta 文档

Ceph故障排除: 1 pool(s) do not have an application enabled-CSDN博客

PG 异常状态- active+undersized+degraded-CSDN博客

CephFS挂载 - OrcHome

HEALTH_WARN mds 0 is laggy的解决方法_mount error: no mds server is up or the cluster is-CSDN博客

Ceph保姆级安装教程(详细 )_ceph 安装-CSDN博客

详解cephfs几种挂载方式_cephfs挂载-CSDN博客

安装ceph (快速) 步骤三: Ceph 客户端 - 哈喽哈喽111111 - 博客园

ceph-create-keys – ceph keyring generate tool — Ceph Documentation

ceph(二)CephX认证授权、用户管理和keyring - areke - 博客园

ceph/ceph-csi: CSI driver for Ceph

Ceph块存储-1·Client客户端使用 - 简书

安装ceph (快速) 步骤三: Ceph 客户端 - 哈喽哈喽111111 - 博客园

Ceph的客户端安装 - CL.TANG - 博客园

ceph客户端配置 - 山的那一边 - 博客园

Ceph集群搭建系列(四):CephFS client客户端使用CephFS_ceph查看client ip-CSDN博客

安装ceph (快速) 步骤三: Ceph 客户端 - 哈喽哈喽111111 - 博客园

无法从DNS SRV获取服务名称:ceph的监视器信息。-腾讯云开发者社区-腾讯云

unable to get monitor info from DNS SRV with service name: ceph-mon · Issue #3595 · rook/rook

mount.ceph – mount a Ceph file system — Ceph Documentation

[SOLVED] - CEPH Mirroring : unable to get monitor info from DNS SRV with service name: ceph-mon | Proxmox Support Forum

Cannot open ceph.conf | Proxmox Support Forum

【原创】K8S使用ceph-csi持久化存储之CephFS - wsjhk - 博客园

ceph/ceph-csi at release-v3.13

5.5 ceph 集群状态说明 - 云起时。 - 博客园

[v3.11.0] pod with pvc failed to mount from ceph cluster. (stderr: unable to get monitor info from DNS SRV with service name: ceph-mon) · Issue #4771 · ceph/ceph-csi

K8S使用ceph-csi持久化存储之RBD - Chuyio - 博客园

【原创】K8S使用ceph-csi持久化存储之RBD - wsjhk - 博客园

No such file or directory: “subvolume group ‘csi’ does not exist” · Issue #4548 · ceph/ceph-csi

🎉 你又回来啦!

第33讲:K8S集群StorageClass使用Ceph CSI供应商与Cephfs文件系统集成-CSDN博客

容器镜像加速服务

Cephadm部署使用rgw对象网关(s3cmd和Java)_cephadm rgw-CSDN博客

Ceph对象存储 S3 - 李占勋 - 博客园

9.3. 使用 Ceph Orchestrator 删除 MDS 服务 | Red Hat Product Documentation

ceph mon时钟偏移问题 | 夏天的风的博客

Cephadm Operations — Ceph Documentation

实战篇:使用rook在k8s上搭建ceph集群 - 知乎

K8S中部署Ceph | 左老师的课堂笔记

kubernetes 部署 rook+ceph 存储系统-腾讯云开发者社区-腾讯云

Cephadm部署使用rgw对象网关(s3cmd和Java)_cephadm rgw-CSDN博客

Ceph分布式存储系列(七):对象存储RGW和S3cmd的安装配置及常用命令_对象存储常用命令-CSDN博客

s3cmd在配置后使用时提示ERROR: S3 error: 403 (InvalidAccessKeyId): The AWS Access Key Id you provided does not exist in our records. - Believer007 - 博客园

在k8s中通过CoreDNS进行域名解析的其中三种方法_coredns添加域名解析-CSDN博客

K8s 跨 namespace 访问服务_kubernetes 跨namespace svc 访问-CSDN博客

Kubernetes ExternalName类型的服务 - 人艰不拆_zmc - 博客园

kubernetes pod间通信,跨namespace互访_在 kubernetes 集群中 pod和pod之间的访问流程-CSDN博客

Linux中一个ip绑定多个域名的详细步骤_hosts文件一个ip对应多个域名-CSDN博客

linux中,如何在/etc/hosts中将一个域名解析为多个IP地址?工作原理是什么? - Zhai_David - 博客园

hosts文件的作用以及hosts中多个ip映射一个域名地址的解析顺序_hosts 多个ip对应一个域名-CSDN博客

Overview — Ceph Documentation

Ceph Internals - msgr2 protocol - 《Ceph v15.0 Document》 - 书栈网 · BookStack

Configuration - Messenger v2 protocol - 《Ceph v15.0 Document》 - 书栈网 · BookStack

6.3. 为什么 ceph-volume 替换 ceph-disk? | Red Hat Product Documentation

浅学lvm以及lvm在ceph中的应用_ceph osd 为什么采用lvm-CSDN博客

Ceph删除OSD和Host的正确方法 - iouwenbo - 博客园

6.10. 使用 Ceph Orchestrator 删除 OSD 守护进程 | Red Hat Product Documentation

ceph-volume 创建osd - 简书

ceph相关的命令_ceph orch命令-CSDN博客

6.11. 使用 Ceph Orchestrator 替换 OSD | Red Hat Product Documentation

Ceph后端存储引擎BlueStore — Cloud Atlas: Discovery beta 文档

BlueStore Migration — Ceph Documentation

ceph 删除和添加osd_ceph删除osd-CSDN博客

从Ceph集群中删除OSD节点 | Sirius’s Blog

Ceph OSD删除与磁盘释放教程-CSDN博客

ceph 删除和添加osd_ceph删除osd-CSDN博客

如何将下线的OSD磁盘,重新初始化上线使用 - ST运维 - 博客园

解决重装系统后有磁盘被ceph占用问题_如何去除磁盘的sdb的ceph-CSDN博客

Linux格式化并重新加载磁盘_writing superblocks and filesystem-CSDN博客

7.4. 使用 ceph-volume准备 Ceph OSD | Red Hat Product Documentation

Ceph bluestore 和 ceph-volume - 代码杂货铺

Ceph:关于 Ceph 中 BlueStore 架构以及 OSD 创建的一些笔记-腾讯云开发者社区-腾讯云

[ ceph ] BlueStore 存储引擎介绍 - hukey - 博客园

OSD Service — Ceph Documentation

ceph 分层缓存 cache pool - 简书

Ceph 进阶系列(二):如何让某个 pool 使用特定的 OSD 设备 (1 of 2,手动版,早于 luminous 版本)_ceph osd pool 能指定硬盘吗-CSDN博客

ceph 指定OSD创建pool-腾讯云开发者社区-腾讯云

ceph中pool的管理 - 波神 - 博客园

6.10. 使用 Ceph Orchestrator 删除 OSD 守护进程 | Red Hat Product Documentation

Ceph篇之利用shell脚本实现批量创建bucket桶-CSDN博客

全局Ceph节点宕机处理-CSDN博客

ceph里面osd容量分布不均问题的处理办法 - 蓝枫居士 - 博客园

ceph rgw: zone/zone/group/realm - 简书

ceph挂载osd时出现permission denied问题_error einval: failed to connect to ceph2 (ceph2). -CSDN博客

2.3. 启动、停止和重启所有 Ceph 服务 | Red Hat Product Documentation

2. 常见 OSD 故障处理 · Ceph 运维手册

CRUSH Maps — Ceph Documentation

创建Ceph crush运行图实现基于HDD和SSD磁盘实现数据冷热数据分类存储 - PunchLinux - 博客园

3.4. 配置 Bucket 分片 | Red Hat Product Documentation

5.10. zone group 和 zone 配置设置 | Red Hat Product Documentation

ceph之crush map - 阳台 - 博客园

Ceph CRUSH 规则 - 简书

Docker查看容器挂载目录_docker 查看挂载目录-CSDN博客

ceph radosgw 对象存储 配额控制_radosgw-admin quota stats –quota-scope=user-CSDN博客

Ceph获取对应存储池配额及修改 - 知乎

ceph 对象网关多区部署_rgw zonegroup 和zone-CSDN博客

分布式存储ceph 对象存储配置zone同步_ceph修改endpoints-CSDN博客

Ceph RGW multi site 配置 | yanyx’s blog

012 Ceph多区域网关 - 梦中泪 - 博客园

创建,查看,删除pool,查看,修改pool参数命令总结 - sisimi_2017 - 博客园

k8s如何强制删除pod&pv&pvc和ns&namespace方法 - 记忆流年 - 博客园

ceph-csi/examples/cephfs/storageclass.yaml at devel · ceph/ceph-csi

ceph运维大宝剑之pg常见状态修复 | 奋斗的松鼠 - Blog

Setting a zone group - IBM Documentation

Multi-Site — Ceph Documentation

ceph部署与配置及部署过程遇到的问题解决_ceph dashboard creat-self-signed-cert 报错-CSDN博客

【ceph运维】修改ceph集群配置 - 苏格拉底的落泪 - 博客园

Configuring multiple realms in the same storage cluster - IBM Documentation

【ceph相关】ceph基准性能测试工具_rbd bench-CSDN博客

Ceph CrushMap及RGW Placement设置 - 简书

ceph 读写测试 rados bench - 简书

Ceph 创建 OSD 报错 ‘GPT headers found, they must be removed’ 的处理 - 简书

Ceph Placement rule(副本放置规则) - Varden - 博客园

Ceph CrushMap及RGW Placement设置 - 简书

https://docs.ceph.com/en/latest/rados/configuration/osd-config-ref/#recovery

ceph数据recovery配置策略(数据recovery流量控制) - 钟桂耀 - 博客园

Ceph 16 模拟坏盘和恢复 | GuKaifeng’s Blog

Linux 删除磁盘设备上的 LVM | GuKaifeng’s Blog

【ceph运维】PG相关命令 - 苏格拉底的落泪 - 博客园

ceph的pg的分布的快速查看 - 武汉-磨渣 - 博客园

Ceph recover的速度控制 - 多看多学多记多实践 - 博客园

ceph recovering速度控制-腾讯云开发者社区-腾讯云

Ceph PG状态及故障模拟_51CTO博客_ceph PG

osd max backfills_osd max backfills配置-CSDN博客

ceph 数据恢复和回填速度 重建osd 加快数据恢复_ceph tell osd.* injectargs ‘–osd-max-backfills 50-CSDN博客

Ceph RBD和QEMU块设备qos测试_ceph rbd qos 热生效-CSDN博客

Ceph 入门到实战之 RBD 块存储接口-腾讯云开发者社区-腾讯云

Cephfs & Ceph RBD 在k8s中的适用场景讨论及数据库性能压测_cephfs和ceph rbd的使用场景-CSDN博客

ceph-csi/examples/rbd/storageclass.yaml at devel · ceph/ceph-csi

3x performance degradation with cephfs vs rbd · Issue #1706 · rook/rook

第30讲:Ceph集群RBD块存储通过CSI客户端与K8S StorageClass集成_ceph-csi rbd-CSDN博客

远程使用ceph rbd块设备

ceph 开启rbd与远程挂载

Linux 远程挂载 Ceph RBD 磁盘-腾讯云开发者社区-腾讯云

【Ceph Block Device】块设备挂载使用-CSDN博客

客户端使用RBD-崔亮的博客

ceph-radosgw 手动安装教程以及安装问题&解决办法-CSDN博客

cephfs文件系统场景 - doublexi - 博客园

How to delete “CephFS”? | Proxmox Support Forum

Deploy Hyper-Converged Ceph Cluster

cephfs创建和删除pool_ceph删除pool-CSDN博客

  • Title: ceph部署
  • Author: Ethereal
  • Created at: 2025-03-04 21:58:41
  • Updated at: 2025-03-31 19:13:11
  • Link: https://ethereal-o.github.io/2025/03/04/ceph部署/
  • License: This work is licensed under CC BY-NC-SA 4.0.
 Comments