1. 安装etcd客户端
2. 创建备份 1 2 3 4 5 6 7 mkdir ./etcd-backup ETCDCTL_API=3 etcdctl --endpoints=https://127.0.0.1:2379 \ --cacert=/etc/kubernetes/pki/etcd/ca.crt \ --cert=/etc/kubernetes/pki/etcd/server.crt \ --key=/etc/kubernetes/pki/etcd/server.key \ snapshot save ./etcd-backup/etcdbackup.db
其他备份
1 2 3 4 5 6 7 8 mkdir ~/confirm #备份源集群文件 cp -rf /etc/kubernetes/ ~/confirm/ ll ~/confirm/kubernetes/ mkdir confirm/data_etcd #备份ETCD cp -rf /var/lib/etcd/* ~/confirm/data_etcd cp /usr/bin/kubeadm /usr/bin/kubeadm.bak
3. 测试备份是否成功 1 2 3 4 ETCDCTL_API=3 etcdctl --write-out=table snapshot status ./etcd-backup/etcdbackup.db ETCDCTL_API=3 etcdctl snapshot restore etcd-backup/etcdbackup.db
4. 恢复
解码备份
1 ETCDCTL_API=3 etcdctl snapshot restore etcd-backup/etcdbackup.db
停止整个集群并保存原始文件
1 2 3 mkdir system_yaml_files mv /etc/kubernetes/manifests/* system_yaml_files/ # 此时k8s将会无法连接 mv /var/lib/etcd/member/ /var/lib/etcd/member.bak
恢复数据
1 2 cd default.etcd/ mv member/ /var/lib/etcd/
重启集群
1 2 3 systemctl stop kubelet cp system_yaml_files/* /etc/kubernetes/manifests/ systemctl start kubelet
5. pod日志备份 5.1 日志目录 日志目录位于/var/log
下面
5.2 方案一:周期性 Dockerfile
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 FROM alpine:3.18 RUN apk add --no-cache wget tar RUN wget https://obs-community.obs.cn-north-1.myhuaweicloud.com/obsutil/current/obsutil_linux_amd64.tar.gz -O /tmp/obsutil.tar.gz && \ tar -C /usr/local/bin -xzf /tmp/obsutil.tar.gz && \ chmod +x /usr/local/bin/obsutil_linux_amd64_*/obsutil && \ rm /tmp/obsutil.tar.gz RUN ln -s /usr/local/bin/obsutil_linux_amd64_*/obsutil /usr/local/bin/obsutil CMD ["obsutil" , "version" ]
cronjob
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 apiVersion: batch/v1 kind: CronJob metadata: name: node-var-log-backup-to-obs spec: # Cron 格式:分 时 日 月 周。 "0 3 * * *" 表示每天凌晨3点执行 schedule: "0 3 * * *" jobTemplate: spec: # Job 在 10 分钟后如果还没完成,就超时 activeDeadlineSeconds: 600 template: spec: # 关键:选择要在哪个节点上运行备份。 # 您需要为每个希望备份的节点创建一个类似的 CronJob,或者使用更高级的控制器。 nodeSelector: kubernetes.io/hostname: <target-node-name> # <-- 修改为您的目标节点名 containers: - name: log-backup-uploader # 使用您在步骤3中构建的镜像 image: your-docker-repo/obsutil-alpine:latest # <-- 修改为您的镜像 # 以 root 用户运行,以便有权限读取 /var/log securityContext: runAsUser: 0 command: ["/bin/sh", "-c"] args: - | set -ex # --- 1. 配置 obsutil --- # 从 Secret 获取凭证,并配置 obsutil # OBS_ENDPOINT 是您 OBS 桶所在区域的入口地址 OBS_BUCKET="k8s-log-backups" # <-- 修改为您的桶名 OBS_ENDPOINT="obs.cn-north-4.myhuaweicloud.com" # <-- 修改为您的桶 Endpoint echo "Configuring obsutil..." obsutil config -i=$HUAWEI_AK -k=$HUAWEI_SK -e=$OBS_ENDPOINT # --- 2. 打包日志文件 --- NODE_NAME=$(cat /etc/nodename) BACKUP_FILE="/tmp/log-backup-${NODE_NAME}-$(date +%Y-%m-%d-%H%M).tar.gz" echo "Creating backup archive: ${BACKUP_FILE}" # -C /host/var/log . 表示切换到 /host/var/log 目录进行打包,避免包含绝对路径 tar -czf ${BACKUP_FILE} -C /host/var/log . # --- 3. 上传到 OBS --- S3_PATH="obs://${OBS_BUCKET}/node-logs/${NODE_NAME}/" echo "Uploading ${BACKUP_FILE} to ${S3_PATH}" # 使用 obsutil 上传文件。-f 表示强制覆盖(如果需要) obsutil cp ${BACKUP_FILE} ${S3_PATH} # --- 4. 清理本地临时文件 --- echo "Cleaning up local backup file..." rm ${BACKUP_FILE} echo "Backup to OBS successfully completed." env: # 从我们创建的 Secret 中注入环境变量 - name: HUAWEI_AK valueFrom: secretKeyRef: name: huawei-obs-credentials key: access_key_id - name: HUAWEI_SK valueFrom: secretKeyRef: name: huawei-obs-credentials key: secret_access_key volumeMounts: # 将节点的 /var/log 目录以只读方式挂载到 Pod 的 /host/var/log - name: node-var-log mountPath: /host/var/log readOnly: true # 挂载节点的 hostname 文件,方便在备份文件名中标识节点 - name: node-name-file mountPath: /etc/nodename readOnly: true volumes: # 定义 hostPath 卷,指向节点的真实目录 - name: node-var-log hostPath: path: /var/log type: Directory - name: node-name-file hostPath: path: /etc/hostname # 任务失败后不重启 Pod restartPolicy: OnFailure
5.3 方案二:实时备份 编写脚本到文件/usr/local/bin/sync_script.sh
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 # !/bin/sh set -e # 定义源目录和目标目录 SOURCE_DIR="/var/log/pods" DEST_DIR="/opt/k8s-log" # 确保目标目录存在 mkdir -p "$DEST_DIR" echo "Starting initial sync from $SOURCE_DIR to $DEST_DIR" # 第一次启动时,先完整同步一次 rsync -av --delete "$SOURCE_DIR" "$DEST_DIR" echo "Watching for changes in $SOURCE_DIR..." # 使用 inotifywait 持续监控源目录中的创建、修改、删除、移动事件 # -m 表示持续监控,-r 表示递归,-q 表示静默模式 # --format '%w%f' 输出变化的文件路径 inotifywait -m -r -q -e create,modify,delete,move --format '%w%f' "$SOURCE_DIR" | while read FILE do echo "Change detected. Syncing..." # 每次检测到变化,就执行一次 rsync # --delete 参数确保源目录删除的文件在目标目录也删除 rsync -av --delete "$SOURCE_DIR" "$DEST_DIR" echo "Sync complete. Waiting for next change." done
编写服务配置到文件/etc/systemd/system/k8s-log-sync.service
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 [Unit] Description=Real-time Log Sync Service for K8s Pods Documentation=https://your-documentation-link-here # (Optional) After=network.target [Service] # User that will run the script. Use root if you need to access /var/log. User=root # Group for the process. Group=root # Path to the executable script. ExecStart=/usr/local/bin/sync_script.sh # Restart policy: always restart the service if it fails. Restart=always # Time to wait before restarting the service. RestartSec=10 # Standard output and error logging configuration. # We are redirecting output to a file within the script, # but systemd can also handle it via journald. StandardOutput=journal StandardError=journal [Install] # Defines the target to link this service to when enabling it. # multi-user.target is a standard target for services that should start at boot. WantedBy=multi-user.target
应用
1 2 3 4 sudo chmod +x /usr/local/bin/sync_script.sh sudo systemctl daemon-reload sudo systemctl start k8s-log-sync.service sudo systemctl enable k8s-log-sync.service
可以使用华为云obs做备份
首先安装s3fs
1 2 3 apt-get install -y s3fs echo "YOUR_AK:YOUR_SK" | sudo tee /etc/passwd-s3fs sudo chmod 600 /etc/passwd-s3fs
挂载
1 2 3 4 5 6 7 # --- 请将下面的占位符替换为您的实际信息 --- # <your-bucket-name>: 您的 OBS 桶名称 # <your-mount-point>: 您创建的本地挂载点 # <obs-endpoint>: 您的桶所在区域的 Endpoint sudo s3fs <your-bucket-name> <your-mount-point> -o url=https://<obs-endpoint> -o use_path_request_style s3fs hskj-aimage-log-backups /opt/k8s-log -o url=https://obs.cn-south-1.myhuaweicloud.com -o use_path_request_style
添加开机自动挂载:
在文件/etc/fstab
末尾添加:
1 s3fs# hskj-aimage-log-backups /opt/k8s-log fuse _netdev,allow_other,url=https://obs.cn-south-1.myhuaweicloud.com,use_path_request_style 0 0
也可以安装fluent-bit(ubuntu20版本过低无法使用)
1 curl https://raw.githubusercontent.com/fluent/fluent-bit/master/install.sh | sh
5. 参考 实践:Kubernetes环境中Etcd集群的备份与恢复-腾讯云开发者社区-腾讯云
linux下上传华为云OBS—obsutil使用指南_obsutil命令参数-CSDN博客
以华为云OBS为例,记录s3fs挂载对象存储的避坑指南 - 维简网
1. 使用 fluent-bit 采集文件 - 程序阳YY - 博客园
Fluent-bit 配置参考 | ClickVisual
Fluent Bit配置与使用——基于版本V2.2.2-CSDN博客
轻量级日志收集转发 | fluent-bit配置详解(二)-腾讯云开发者社区-腾讯云
pod日志采集(Fluent Bit方案)-崔亮的博客
Ubuntu | Fluent Bit: Official Manual
Install Fluent Bit on Ubuntu 24.04 | Lindevs
如何在Debian使用inotifywait - 问答 - 亿速云