二、准备清单文件
- 从官方的地址获取最新的main分支,或者直接打包下载main
git clone https://github.com/prometheus-operator/kube-prometheus.git
git checkout main
- 或者直接下载打包好的包
wget https://github.com/prometheus-operator/kube-prometheus/archive/refs/tags/main.tar.gz
tar -xvf main.tar.gz
mv kube-prometheus-main kube-prometheus
默认下载下来的文件较多,建议把文件进行归类处理,将相关yaml文件移动到对应目录下
- 新建相关目录
cd kube-prometheus/manifests
mkdir -p adapter alertmanager blackbox grafana kube-control-plane kube-state-metrics node-exporter prometheus serviceMonitor
2.1、修改yaml,增加持久化存储
修改Prometheus部署文件
vim manifests/prometheus/prometheus-prometheus.yaml
apiVersion: monitoring.coreos.com/v1
kind: Prometheus
metadata:
labels:
app.kubernetes.io/component: prometheus
app.kubernetes.io/instance: k8s
app.kubernetes.io/name: prometheus
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 2.32.1
name: k8s
namespace: monitoring
spec:
alerting:
alertmanagers:
- apiVersion: v2
name: alertmanager-main
namespace: monitoring
port: web
enableFeatures: []
externalLabels: {}
image: quay.io/prometheus/prometheus:v2.32.1
nodeSelector:
kubernetes.io/os: linux
podMetadata:
labels:
app.kubernetes.io/component: prometheus
app.kubernetes.io/instance: k8s
app.kubernetes.io/name: prometheus
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 2.32.1
podMonitorNamespaceSelector: {}
podMonitorSelector: {}
probeNamespaceSelector: {}
probeSelector: {}
replicas: 2
resources:
requests:
memory: 400Mi
ruleNamespaceSelector: {}
ruleSelector: {}
securityContext:
fsGroup: 2000
runAsNonRoot: true
runAsUser: 1000
serviceAccountName: prometheus-k8s
serviceMonitorNamespaceSelector: {}
serviceMonitorSelector: {}
version: 2.32.1
# 新增持久化存储,yaml 末尾添加
retention: 3d
storage:
volumeClaimTemplate:
spec:
storageClassName: managed-nfs-storage
resources:
requests:
storage: 50Gi
修改Grafana部署文件
vim manifests/grafana/grafana-deployment.yaml
volumes:
# - emptyDir: {} # 注释此两行,新增下三行
# name: grafana-storage
- name: grafana-storage
persistentVolumeClaim:
claimName: grafana-pvc
- name: grafana-datasources
secret:
secretName: grafana-datasources
2.2、准备 持久化配置(nfs-storage)
三、启动服务
[root@Master02 manifests]# kubectl apply -f setup/
使用apply时会报错,
The CustomResourceDefinition "prometheuses.monitoring.coreos.com" is invalid: metadata.annotations: Too long: must have at most 262144 bytes
最好使用kubectl create -f setup/
创建PVC
kubectl create -f grafana/grafana-pvc.yaml
启动服务
先把pvc的文件拿出来
[root@Master02 manifests]# mv grafana/grafana-pvc.yaml ./
创建文件
[root@Master02 manifests]# kubectl create -f adapter/ -f alertmanager/ -f blackbox/ -f grafana/ -f kube-state-metrics/ -f node-exporter/ -f prometheus/ -f kube-control-plane/ -f serviceMonitor/
创建完成后有一个报错。
查看日志信息
err="opening storage failed: lock DB directory: resource temporarily unavailable"
前往NFS共享服器目录删除lock
文件
然后回到Master 删除报错的Pod后再次查看启动正常
启动完成如下图
配置Ingress-Nginx
持久化存储日志
---
apiVersion: v1
kind: Namespace
metadata:
name: ingress-nginx
labels:
app.kubernetes.io/name: ingress-nginx
app.kubernetes.io/instance: ingress-nginx
---
# 添加 ingress的 日志持久化 pvc
# add ingress logs volume-pvc
kind: PersistentVolumeClaim
apiVersion: v1
metadata:
name: ingress-logs-pvc
namespace: ingress-nginx
spec:
# 指向我们创建的 nfs-storage
storageClassName: prometheus-nfs
accessModes:
- ReadWriteMany
resources:
requests:
storage: 10Gi
---
...
---
# Source: ingress-nginx/templates/controller-configmap.yaml
apiVersion: v1
kind: ConfigMap
metadata:
labels:
helm.sh/chart: ingress-nginx-3.30.0
app.kubernetes.io/name: ingress-nginx
app.kubernetes.io/instance: ingress-nginx
app.kubernetes.io/version: 0.46.0
app.kubernetes.io/managed-by: Helm
app.kubernetes.io/component: controller
name: ingress-nginx-controller
namespace: ingress-nginx
data:
# 设置configmap 定义日志输出目录
access-log-path: "/var/log/nginx/access.log"
error-log-path: "/var/log/nginx/error.log"
...
args:
- /nginx-ingress-controller
- --election-id=ingress-controller-leader
- --ingress-class=nginx
- --configmap=$(POD_NAMESPACE)/ingress-nginx-controller
- --validating-webhook=:8443
- --validating-webhook-certificate=/usr/local/certificates/cert
- --validating-webhook-key=/usr/local/certificates/key
- --v=2
# 添加下面2两行,日志输出路径
- --log_dir=/var/log/nginx/
- --logtostderr=false
...
volumeMounts:
- name: webhook-cert
mountPath: /usr/local/certificates/
readOnly: true
# 添加挂载目录
- name: ingress-logs
mountPath: /var/log/nginx/
resources:
requests:
cpu: 100m
memory: 90Mi
nodeSelector:
kubernetes.io/os: linux
serviceAccountName: ingress-nginx
terminationGracePeriodSeconds: 300
volumes:
- name: webhook-cert
secret:
secretName: ingress-nginx-admission
#持久化日志
- name: ingress-logs
persistentVolumeClaim:
claimName: ingress-logs-pvc
...
编写ingress 代理规则
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: prom-ingress
namespace: monitoring
annotations:
kubernetes.io/ingress.class: "nginx"
prometheus.io/http_probe: "true"
spec:
rules:
- host: alert.nercoa.com
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: alertmanager-main
port:
number: 9093
- host: grafana.nercoa.com
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: grafana
port:
number: 3000
- host: prom.nercoa.com
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: prometheus-k8s
port:
number: 9090
但是在执行时报错:Error from server (InternalError): error when creating "prom-ingress.yaml": Internal error occurred: failed calling webhook "validate.nginx.ingress.kubernetes.io": Post "https://ingress-nginx-controller-admission.ingress-nginx.svc:443/networking/v1/ingresses?timeout=10s": x509: certificate signed by unknown authority
删除指定资源后再创建正常:
kubectl delete -A validatingwebhookconfigurations.admissionregistration.k8s.io ingress-nginx-admission
监测WindowsNode
安装提供的Exporter组件即可。 下载地址
安装完成后需要在K8S环境中进行配置Secrets,参考链接
find . -name *-serviceMon* -exec mv {} ./serviceMonitor/ \;
增加Zabbix数据源
https://grafana.com/grafana/plugins/alexanderzobnin-zabbix-app/?tab=installation https://grafana.com/api/plugins/alexanderzobnin-zabbix-app/versions/4.2.4/download
[root@Master02 grafana-zabbix-plugins]# kubectl cp alexanderzobnin-zabbix-app-4.2.4.zip -n monitoring grafana-59d5b5c887-s2jcn:/var/lib/grafana/plugins/
最好是将这个Grafana的插件目录给挂载出来,这样新增插件时直接解压拷贝过去就好了。
[root@Master02 grafana]# kubectl exec -it -n monitoring grafana-59d5b5c887-s2jcn sh
kubectl exec [POD] [COMMAND] is DEPRECATED and will be removed in a future version. Use kubectl exec [POD] -- [COMMAND] instead.
/usr/share/grafana $ pwd
/usr/share/grafana
/usr/share/grafana $ cd /var/lib/grafana/plugins/
/var/lib/grafana/plugins $ ls
alexanderzobnin-zabbix-app-4.2.4.zip
/var/lib/grafana/plugins $ unzip alexanderzobnin-zabbix-app-4.2.4.zip
/var/lib/grafana/plugins $ rm -rf alexanderzobnin-zabbix-app-4.2.4.zip
新增Granfana不支持的数据源
https://grafana.com/grafana/plugins/grafana-simple-json-datasource/?tab=installation
评论区