一、缘由
本脚本是阿里云资源利用率定期统计方案中的其中一个脚本。
本脚本可实现,从每天95个平均值数据中取平均值,15天15个中位数取平均值,得到最终的15天内CPU和内存使用率数值。
进而根据阈值进行判断,资源是否处于低利用率状态。
二、环境
Python3.7 + 阿里云云监控SDK + 阿里云SLB的SDK + pandas + numpy
三、代码实现
# -*- coding: utf-8 -*-
import time
import numpy
import json
import pandas as pd
from typing import List
from alibabacloud_slb20140515.client import Client as Slb20140515Client
from alibabacloud_tea_openapi import models as open_api_models
from alibabacloud_slb20140515 import models as slb_20140515_models
from alibabacloud_cms20190101.client import Client as Cms20190101Client
from alibabacloud_cms20190101 import models as cms_20190101_models
# cn-zhangjiakou 张家口区域的SLB在其他区域的config查不到,只能用张家口config
REGION_ID = ['cn-beijing', 'cn-hangzhou', 'cn-hongkong']
PAGE_SIZE = 100
class GetMonitorData:
def __init__(self):
pass
@staticmethod
def create_client(
access_key_id: str,
access_key_secret: str,
) -> Cms20190101Client:
config = open_api_models.Config(
access_key_id='xxxxxxxx',
access_key_secret='xxxxxxxxxxxx'
)
config.endpoint = 'metrics.cn-hangzhou.aliyuncs.com'
return Cms20190101Client(config)
@staticmethod
def main(
args: List[str],
) -> None:
client = GetMonitorData.create_client('acessKeyId', 'accessKeySecret')
describe_metric_list_request = cms_20190101_models.DescribeMetricListRequest(
metric_name=args[1],
namespace=args[0],
period='900',
start_time=args[2],
end_time=args[3],
length='100',
dimensions='{{"instanceId":{}}}'.format(args[4]),
next_token=args[5]
)
res = client.describe_metric_list(describe_metric_list_request)
return res.body
class GetInstanceIdName:
def __init__(self):
pass
@staticmethod
def create_client(
access_key_id: str,
access_key_secret: str,
) -> Slb20140515Client:
config = open_api_models.Config(
access_key_id='xxxxxxxxxxx',
access_key_secret='xxxxxxxxxxxxx'
)
# 访问的域名
config.endpoint = 'slb.aliyuncs.com'
return Slb20140515Client(config)
@staticmethod
def main(
args: List[str],
) -> None:
client = GetInstanceIdName.create_client('accessKeyId', 'accessKeySecret')
describe_load_balancers_request = slb_20140515_models.DescribeLoadBalancersRequest(
region_id=args[0],
page_size=PAGE_SIZE,
page_number=args[1]
)
res = client.describe_load_balancers(describe_load_balancers_request)
return res.body
def get_id_name_dict():
instance_dict = {}
for i in range(0, len(REGION_ID)):
result = GetInstanceIdName.main([REGION_ID[i], 1])
total_page = result.total_count // PAGE_SIZE + 1
for j in range(0, total_page):
result2 = GetInstanceIdName.main([REGION_ID[i], j+1])
slb_info = result2.load_balancers.load_balancer
for k in range(0, len(slb_info)):
instance_dict[slb_info[k].load_balancer_id] = slb_info[k].load_balancer_name
print('实例ID和名字的字典:', instance_dict)
return instance_dict
def get_average_24h(instance_dict, pre_days, metric_name):
average_dict = {}
today = time.strftime('%Y-%m-%d', time.localtime(time.time()))
today_time = time.mktime(time.strptime(today, '%Y-%m-%d'))
# 从昨天开始,往前推15天,15次循环,取1-15。取00:00:00-23:59:59的时间戳
start_time = str(round((today_time - 86400*pre_days)*1000))
end_time = str(round((today_time - 86400*(pre_days-1) - 1)*1000))
namespace = 'acs_slb_dashboard'
for i in instance_dict.keys():
token = 'init_data'
average_list = []
while token:
result = GetMonitorData.main([namespace, metric_name, start_time, end_time, i, token])
token = result.next_token
res_list = json.loads(result.datapoints)
# 停机和未安装监控agent的主机拿不到监控数据,res_list是个空列表,计算平均是会报错
if len(res_list) != 0:
for j in range(0, len(res_list)):
# print(res_list[i])
average_list.append(round(res_list[j]['Average'], 2))
else:
average_list = [0.00, ]
# 取平均值
average_dict[i] = numpy.mean(average_list)
time.sleep(0.2)
return average_dict
def get_average_15days(instance_dict, metric):
temp_dict = {}
median_dict1 = get_average_24h(instance_dict, 1, metric)
for k, v in median_dict1.items():
temp_dict[k] = []
for k1, v1 in median_dict1.items():
temp_dict[k1].append(v1)
for i in range(2, 16):
median_dict = get_average_24h(instance_dict, i, metric)
for k2, v2 in median_dict.items():
temp_dict[k2].append(v2)
for k3, v3 in temp_dict.items():
temp_dict[k3] = round(numpy.mean(v3), 3)
print(temp_dict)
return temp_dict
def write_to_execl(data):
df = pd.DataFrame.from_dict(data, orient='index', columns=['后端异常ECS实例个数', '实例每秒最大并发连接数'])
df.to_excel('slb_statistics.xlsx')
if __name__ == '__main__':
# 获取InstanceId和InstanceName的对应字典
str_time = time.time()
instance_dict = get_id_name_dict()
# 数据合并处理
id_list_dict = {}
for k, v in instance_dict.items():
id_list_dict[k] = []
ecs_metric = ['UnhealthyServerCount', 'InstanceMaxConnection']
for metric in ecs_metric:
metric_data = get_average_15days(instance_dict, metric)
for k2, v2 in metric_data.items():
id_list_dict[k2].append(v2)
for k_id, k_name in instance_dict.items():
if k_id in id_list_dict:
id_list_dict[k_name] = id_list_dict.pop(k_id)
# 数据写入excel表格
write_to_execl(id_list_dict)
print(time.time() - str_time)
转载请注明来源,欢迎对文章中的引用来源进行考证,欢迎指出任何有错误或不够清晰的表达。可以在下面评论区评论,也可以邮件至 lxwno.1@163.com