原 Doris Manager的agent报错 Agent heartbeat failed too many times或Agent 多次心跳检查失败
Tags: 原创故障处理Apache DorisDoris Manager
错误
1 2 3 4 5 6 7 | 端口:8972 路径:/soft/manager-agent 异常信息:Agent heartbeat failed too many times 端口:8972 路径:/usr/local/manager-agent 异常信息:Agent 多次心跳检查失败 |
另一套环境也是这个报错:
排查
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 | [root@lhrdoris /]# netstat -tulnp | grep 8972 tcp6 0 0 :::8972 :::* LISTEN 2663/agent [root@lhrdoris /]# ps -ef | grep 2663 root 2663 1 0 Aug12 ? 00:07:46 /soft/manager-agent/lib/agent --config.file /soft/manager-agent/conf/agent.yaml root 209854 209679 0 08:57 pts/2 00:00:00 grep --color=auto 2663 [root@lhrdoris /]# [root@lhrdoris /]# telnet 127.0.0.1 8972 Trying 127.0.0.1... Connected to 127.0.0.1. Escape character is '^]'. HTTP/1.1 400 Bad Request Content-Type: text/plain; charset=utf-8 Connection: close 400 Bad RequestConnection closed by foreign host. [root@lhrdoris /]# [root@lhrdoris /]# curl http://127.0.0.1:8972/health { "code": 0, "data": "ok" } [root@lhrdoris /]# curl http://127.0.0.1:8972/heartbeat 404 page not found [root@lhrdoris /]# [root@lhrdoris /]# curl http://127.0.0.1:8972/metrics # HELP be_api_available show if be api is available, 1 as 'available', 0 as 'unavailable' # TYPE be_api_available gauge be_api_available{api="metrics",ip="127.0.0.1",port="8040"} 1 # HELP be_api_response_code show http response code of be api, eg. 200 # TYPE be_api_response_code gauge be_api_response_code{api="metrics",ip="127.0.0.1",port="8040"} 200 # HELP be_api_response_time_ms show response time of be api # TYPE be_api_response_time_ms gauge be_api_response_time_ms{api="metrics",ip="127.0.0.1",port="8040"} 4 # HELP cpu_freq_scaling_governor Show `cat /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor` result # TYPE cpu_freq_scaling_governor counter cpu_freq_scaling_governor{cpu="cpu0",governor="unknown"} 1 cpu_freq_scaling_governor{cpu="cpu1",governor="unknown"} 1 cpu_freq_scaling_governor{cpu="cpu10",governor="unknown"} 1 。。。。。 promhttp_metric_handler_requests_total{code="200"} 3281 promhttp_metric_handler_requests_total{code="500"} 0 promhttp_metric_handler_requests_total{code="503"} 0 # HELP sql_execute_result_status show sql execute status, 1 as 'success', 0 as 'fail' # TYPE sql_execute_result_status gauge sql_execute_result_status{ip="127.0.0.1",port="9030",sql="select * from __internal_schema.column_statistics limit 10"} 0 # HELP sql_execute_result_time_ms show execute time of sql # TYPE sql_execute_result_time_ms gauge sql_execute_result_time_ms{ip="127.0.0.1",port="9030",sql="select * from __internal_schema.column_statistics limit 10"} -1 |




