1 Star 1 Fork 0

安家/tidb-ansible

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
克隆/下载
rolling_update_monitor.yml 17.06 KB
一键复制 编辑 原始数据 按行查看 历史
zhangjinpeng1987 提交于 2019-04-08 13:59 . TiKV 3.0 (#724)
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539
---
# Copyright 2016 PingCAP, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# See the License for the specific language governing permissions and
# limitations under the License.
# The rolling update playbook of TiDB
- name: check config locally
hosts: localhost
any_errors_fatal: true
tags:
- always
roles:
- check_config_static
- name: gather all facts, and check dest
hosts: all
any_errors_fatal: true
tags:
- always
roles:
- check_config_dynamic
- name: rolling update node_exporter
hosts: monitored_servers
any_errors_fatal: true
tags:
- node_exporter
pre_tasks:
- name: check node_exporter existed
stat:
path: "{{ deploy_dir }}/bin/node_exporter"
register: node_exporter_binary_file
- name: stop node_exporter by supervise
shell: cd {{ deploy_dir }}/scripts && ./stop_node_exporter.sh
when:
- process_supervision == 'supervise'
- node_exporter_binary_file.stat.exists == True
- name: stop node_exporter by systemd
systemd: name=node_exporter-{{ node_exporter_port }}.service state=stopped
become: true
when:
- process_supervision == 'systemd'
- node_exporter_binary_file.stat.exists == True
- name: wait until the node_exporter port is down
wait_for:
host: "{{ ansible_host }}"
port: "{{ node_exporter_port }}"
state: stopped
msg: "the node_exporter port {{ node_exporter_port }} is not down"
roles:
- node_exporter
post_tasks:
- name: start node_exporter by supervise
shell: cd {{ deploy_dir }}/scripts && ./start_node_exporter.sh
when: process_supervision == 'supervise'
- name: start node_exporter by systemd
systemd: name=node_exporter-{{ node_exporter_port }}.service state=started
become: true
when: process_supervision == 'systemd'
- name: wait until the node_exporter port is up
wait_for:
host: "{{ ansible_host }}"
port: "{{ node_exporter_port }}"
state: started
msg: "the node_exporter port {{ node_exporter_port }} is not up"
- name: wait until the node_exporter metrics page is available
uri:
url: "http://{{ ansible_host }}:{{ node_exporter_port }}/metrics"
register: node_exporter_http_result
until: node_exporter_http_result.status == 200
retries: 12
delay: 5
- name: rolling update blackbox_exporter
hosts: monitored_servers
any_errors_fatal: true
tags:
- blackbox_exporter
pre_tasks:
- name: check blackbox_exporter existed
stat:
path: "{{ deploy_dir }}/conf/blackbox.yml"
register: blackbox_exporter_configure_file
- name: stop blackbox_exporter by supervise
shell: cd {{ deploy_dir }}/scripts && ./stop_blackbox_exporter.sh
when:
- process_supervision == 'supervise'
- blackbox_exporter_configure_file.stat.exists == True
- name: stop blackbox_exporter by systemd
systemd: name=blackbox_exporter-{{ blackbox_exporter_port }}.service state=stopped
become: true
when:
- process_supervision == 'systemd'
- blackbox_exporter_configure_file.stat.exists == True
- name: wait until the blackbox_exporter port is down
wait_for:
host: "{{ ansible_host }}"
port: "{{ blackbox_exporter_port }}"
state: stopped
msg: "the blackbox_exporter port {{ blackbox_exporter_port }} is not down"
roles:
- blackbox_exporter
post_tasks:
- name: start blackbox_exporter by supervise
shell: cd {{ deploy_dir }}/scripts && ./start_blackbox_exporter.sh
when: process_supervision == 'supervise'
- name: start blackbox_exporter by systemd
systemd: name=blackbox_exporter-{{ blackbox_exporter_port }}.service state=started
become: true
when: process_supervision == 'systemd'
- name: wait until the blackbox_exporter port is up
wait_for:
host: "{{ ansible_host }}"
port: "{{ blackbox_exporter_port }}"
state: started
msg: "the blackbox_exporter port {{ blackbox_exporter_port }} is not up"
- name: wait until the blackbox_exporter metrics page is available
uri:
url: "http://{{ ansible_host }}:{{ blackbox_exporter_port }}/metrics"
register: blackbox_exporter_http_result
until: blackbox_exporter_http_result.status == 200
retries: 12
delay: 5
- name: rolling update alertmanager
hosts: alertmanager_servers
any_errors_fatal: true
tags:
- alertmanager
pre_tasks:
- name: check alertmanager existed
stat:
path: "{{ deploy_dir }}/conf/alertmanager.yml"
register: alertmanager_configure_file
- name: stop alertmanager by supervise
shell: cd {{ deploy_dir }}/scripts && ./stop_alertmanager.sh
when:
- process_supervision == 'supervise'
- alertmanager_configure_file.stat.exists == True
- name: stop alertmanager by systemd
systemd: name=alertmanager-{{ alertmanager_port }}.service state=stopped
become: true
when:
- process_supervision == 'systemd'
- alertmanager_configure_file.stat.exists == True
- name: wait until the alertmanager port is down
wait_for:
host: "{{ ansible_host }}"
port: "{{ alertmanager_port }}"
state: stopped
msg: "the alertmanager port {{ alertmanager_port }} is not down"
roles:
- alertmanager
post_tasks:
- name: start alertmanager by supervise
shell: cd {{ deploy_dir }}/scripts && ./start_alertmanager.sh
when: process_supervision == 'supervise'
- name: start alertmanager by systemd
systemd: name=alertmanager-{{ alertmanager_port }}.service state=started
become: true
when: process_supervision == 'systemd'
- name: wait until the alertmanager port is up
wait_for:
host: "{{ ansible_host }}"
port: "{{ alertmanager_port }}"
state: started
msg: "the alertmanager port {{ alertmanager_port }} is not up"
- name: rolling update pushgateway
hosts: monitoring_servers
any_errors_fatal: true
tags:
- pushgateway
pre_tasks:
- name: stop pushgateway by supervise
shell: cd {{ deploy_dir }}/scripts && ./stop_{{ item }}.sh
with_items:
- pushgateway
when: process_supervision == 'supervise'
- name: stop pushgateway by systemd
systemd: name={{ item }} state=stopped
when: process_supervision == 'systemd'
become: true
with_items:
- pushgateway-{{ pushgateway_port }}.service
- name: wait until the pushgateway port is down
wait_for:
host: "{{ ansible_host }}"
port: "{{ pushgateway_port }}"
state: stopped
msg: "the pushgateway port {{ pushgateway_port }} is not down"
roles:
- pushgateway
post_tasks:
- name: start pushgateway by supervise
shell: cd {{ deploy_dir }}/scripts && ./start_{{ item }}.sh
when: process_supervision == 'supervise'
with_items:
- pushgateway
- name: start pushgateway by systemd
systemd: name={{ item }} state=started enabled=no
when: process_supervision == 'systemd'
become: true
with_items:
- pushgateway-{{ pushgateway_port }}.service
- name: wait until the pushgateway port is up
wait_for:
host: "{{ ansible_host }}"
port: "{{ pushgateway_port }}"
state: started
msg: "the pushgateway port {{ pushgateway_port }} is not up"
- name: wait until the pushgateway metrics page is available
uri:
url: "http://{{ ansible_host }}:{{ pushgateway_port }}/metrics"
register: pushgateway_http_result
until: pushgateway_http_result.status == 200
retries: 12
delay: 5
- name: rolling update prometheus
hosts: monitoring_servers
any_errors_fatal: true
tags:
- prometheus
pre_tasks:
- name: stop prometheus by supervise
shell: cd {{ deploy_dir }}/scripts && ./stop_{{ item }}.sh
with_items:
- prometheus
when: process_supervision == 'supervise'
- name: stop prometheus by systemd
systemd: name={{ item }} state=stopped
when: process_supervision == 'systemd'
become: true
with_items:
- prometheus-{{ prometheus_port }}.service
- name: wait until the prometheus port is down
wait_for:
host: "{{ ansible_host }}"
port: "{{ prometheus_port }}"
state: stopped
msg: "the prometheus port {{ prometheus_port }} is not down"
roles:
- prometheus
post_tasks:
- name: start prometheus by supervise
shell: cd {{ deploy_dir }}/scripts && ./start_{{ item }}.sh
when: process_supervision == 'supervise'
with_items:
- prometheus
- name: start prometheus by systemd
systemd: name={{ item }} state=started enabled=no
when: process_supervision == 'systemd'
become: true
with_items:
- prometheus-{{ prometheus_port }}.service
- name: wait until the prometheus port is up
wait_for:
host: "{{ ansible_host }}"
port: "{{ prometheus_port }}"
state: started
msg: "the prometheus port {{ prometheus_port }} is not up"
- name: wait until the prometheus metrics page is available
uri:
url: "http://{{ ansible_host }}:{{ prometheus_port }}/metrics"
register: prometheus_http_result
until: prometheus_http_result.status == 200
retries: 12
delay: 5
- name: rolling update grafana
hosts: grafana_servers
any_errors_fatal: true
tags:
- grafana
pre_tasks:
- name: stop grafana by supervise
shell: cd {{ deploy_dir }}/scripts && ./stop_{{ item }}.sh
when: process_supervision == 'supervise'
with_items:
- grafana
- name: stop grafana by systemd
systemd: name=grafana-{{ grafana_port }}.service state=stopped
become: true
when: process_supervision == 'systemd'
- name: wait until the grafana port is down
wait_for:
host: "{{ ansible_host }}"
port: "{{ grafana_port }}"
state: stopped
msg: "the grafana port {{ grafana_port }} is not down"
- name: check grafana_collector existed
stat:
path: "{{ deploy_dir }}/conf/grafana_collector.toml"
register: grafana_collector_config_file
- name: stop grafana_collector by supervise
shell: cd {{ deploy_dir }}/scripts && ./stop_{{ item }}.sh
when:
- process_supervision == 'supervise'
- grafana_collector_config_file.stat.exists == True
with_items:
- grafana_collector
- name: stop grafana_collector by systemd
systemd: name=grafana_collector-{{ grafana_collector_port }}.service state=stopped
become: true
when:
- process_supervision == 'systemd'
- grafana_collector_config_file.stat.exists == True
- name: wait until the grafana_collector port is down
wait_for:
host: "{{ ansible_host }}"
port: "{{ grafana_collector_port }}"
state: stopped
msg: "the grafana_collector port {{ grafana_collector_port }} is not down"
roles:
- grafana
- grafana_collector
post_tasks:
- name: start grafana by supervise
shell: cd {{ deploy_dir }}/scripts && ./start_{{ item }}.sh
when: process_supervision == 'supervise'
with_items:
- grafana
- name: start grafana by systemd
systemd: name=grafana-{{ grafana_port }}.service state=started enabled=no
when: process_supervision == 'systemd'
become: true
- name: wait until the grafana port is up
wait_for:
host: "{{ ansible_host }}"
port: "{{ grafana_port }}"
state: started
msg: "the grafana port {{ grafana_port }} is not up"
- name: wait until the grafana login page is available
uri:
url: "http://{{ ansible_host }}:{{ grafana_port }}/login"
register: grafana_http_result
until: grafana_http_result.status == 200
retries: 12
delay: 5
- name: start grafana_collector by supervise
shell: cd {{ deploy_dir }}/scripts && ./start_{{ item }}.sh
when: process_supervision == 'supervise'
with_items:
- grafana_collector
- name: start grafana_collector by systemd
systemd: name=grafana_collector-{{ grafana_collector_port }}.service state=started enabled=no
when: process_supervision == 'systemd'
become: true
- name: wait until the grafana_collector port is up
wait_for:
host: "{{ ansible_host }}"
port: "{{ grafana_collector_port }}"
state: started
msg: "the grafana_collector port {{ grafana_collector_port }} is not up"
- set_fact:
grafana_host: "{{ ansible_host }}"
- include_tasks: "common_tasks/create_grafana_api_keys.yml"
- name: import grafana data source
shell: >
chdir={{ grafana_data_dir }}
warn=no
curl -q -X POST -d @data_source.json --header 'Content-Type: application/json'
"http://{{ grafana_admin_user }}:{{ grafana_admin_password }}@127.0.0.1:{{ grafana_port }}/api/datasources"
- name: import grafana dashboards - prepare config
delegate_to: localhost
template: src=grafana.dest.json.j2 dest={{ playbook_dir }}/scripts/dests.json
vars:
- ansible_become: false
- ansible_connection: local
- grafana_dest_config:
name: "{{ cluster_name | title }}"
url: "http://{{ grafana_host }}:{{ grafana_port }}/"
report_url: "http://{{ grafana_host }}:{{ grafana_collector_port }}/"
user: "{{ grafana_admin_user }}"
password: "{{ grafana_admin_password }}"
apikey: "{{ lookup('file', grafana_api_keys_dir + '/grafana_apikey.key') }}"
datasource: "{{ cluster_name }}"
titles:
node: "{{ cluster_name | title }}-Node_exporter"
pd: "{{ cluster_name | title }}-PD"
tidb: "{{ cluster_name | title }}-TiDB"
tikv_summary: "{{ cluster_name | title }}-TiKV-Summary"
tikv_details: "{{ cluster_name | title }}-TiKV-Details"
tikv_trouble_shot: "{{ cluster_name | title }}-TiKV-Trouble-Shooting"
binlog: "{{ cluster_name | title }}-Binlog"
overview: "{{ cluster_name | title }}-Overview"
disk_performance: "{{ cluster_name | title }}-Disk-Performance"
blackbox_exporter: "{{ cluster_name | title }}-Blackbox_exporter"
kafka_overview: "{{ cluster_name | title }}-Kafka-Overview"
lightning: "{{ cluster_name | title }}-Lightning"
- name: import grafana dashboards - run import script
delegate_to: localhost
shell: "python grafana-config-copy.py"
args:
chdir: "{{ playbook_dir }}/scripts"
vars:
- ansible_become: false
- ansible_connection: local
- name: rolling update kafka_exporter
hosts: kafka_exporter_servers
any_errors_fatal: true
tags:
- kafka_exporter
pre_tasks:
- name: check kafka_exporter existed
stat:
path: "{{ deploy_dir }}/bin/kafka_exporter"
register: kafka_exporter_binary_file
when: enable_binlog|default(false)
- name: stop kafka_exporter by supervise
shell: cd {{ deploy_dir }}/scripts && ./stop_kafka_exporter.sh
when:
- enable_binlog|default(false)
- process_supervision == 'supervise'
- kafka_exporter_binary_file.stat.exists == True
- name: stop kafka_exporter by systemd
become: true
systemd: name=kafka_exporter-{{ kafka_exporter_port }}.service state=stopped enabled=no
when:
- enable_binlog|default(false)
- process_supervision == 'systemd'
- kafka_exporter_binary_file.stat.exists == True
- name: wait until the kafka_exporter port is down
wait_for:
host: "{{ ansible_host }}"
port: "{{ kafka_exporter_port }}"
state: stopped
msg: "the kafka_exporter port {{ kafka_exporter_port }} is not down"
when: enable_binlog|default(false)
roles:
- { role: kafka_exporter, when: 'enable_binlog|default(false) and kafka_addrs|default("") != ""' }
post_tasks:
- name: start kafka_exporter by supervise
shell: cd {{ deploy_dir }}/scripts && ./start_kafka_exporter.sh
when:
- enable_binlog|default(false)
- process_supervision == 'supervise'
- name: start kafka_exporter by systemd
become: true
systemd: name=kafka_exporter-{{ kafka_exporter_port }}.service state=started enabled=no
when:
- enable_binlog|default(false)
- process_supervision == 'systemd'
- name: wait until the kafka_exporter port is up
wait_for:
host: "{{ ansible_host }}"
port: "{{ kafka_exporter_port }}"
state: started
msg: "the kafka_exporter port {{ kafka_exporter_port }} is not up"
when: enable_binlog|default(false)
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/anjia/tidb-ansible.git
git@gitee.com:anjia/tidb-ansible.git
anjia
tidb-ansible
tidb-ansible
master

搜索帮助

23e8dbc6 1850385 7e0993f3 1850385