统计15天内阿里云SLB后端异常和并发指标

  1. 一、缘由
  2. 二、环境
  3. 三、代码实现

一、缘由

本脚本是阿里云资源利用率定期统计方案中的其中一个脚本。

本脚本可实现,从每天95个平均值数据中取平均值,15天15个中位数取平均值,得到最终的15天内CPU和内存使用率数值。

进而根据阈值进行判断,资源是否处于低利用率状态。

二、环境

Python3.7 + 阿里云云监控SDK + 阿里云SLB的SDK + pandas + numpy

三、代码实现

# -*- coding: utf-8 -*-
import time
import numpy
import json
import pandas as pd
from typing import List

from alibabacloud_slb20140515.client import Client as Slb20140515Client
from alibabacloud_tea_openapi import models as open_api_models
from alibabacloud_slb20140515 import models as slb_20140515_models
from alibabacloud_cms20190101.client import Client as Cms20190101Client
from alibabacloud_cms20190101 import models as cms_20190101_models

# cn-zhangjiakou 张家口区域的SLB在其他区域的config查不到,只能用张家口config
REGION_ID = ['cn-beijing', 'cn-hangzhou', 'cn-hongkong']
PAGE_SIZE = 100

class GetMonitorData:
    def __init__(self):
        pass

    @staticmethod
    def create_client(
        access_key_id: str,
        access_key_secret: str,
    ) -> Cms20190101Client:
        config = open_api_models.Config(
            access_key_id='xxxxxxxx',
            access_key_secret='xxxxxxxxxxxx'
        )
        config.endpoint = 'metrics.cn-hangzhou.aliyuncs.com'
        return Cms20190101Client(config)

    @staticmethod
    def main(
        args: List[str],
    ) -> None:
        client = GetMonitorData.create_client('acessKeyId', 'accessKeySecret')
        describe_metric_list_request = cms_20190101_models.DescribeMetricListRequest(
            metric_name=args[1],
            namespace=args[0],
            period='900',
            start_time=args[2],
            end_time=args[3],
            length='100',
            dimensions='{{"instanceId":{}}}'.format(args[4]),
            next_token=args[5]
        )
        res = client.describe_metric_list(describe_metric_list_request)
        return res.body


class GetInstanceIdName:
    def __init__(self):
        pass

    @staticmethod
    def create_client(
        access_key_id: str,
        access_key_secret: str,
    ) -> Slb20140515Client:
        config = open_api_models.Config(
            access_key_id='xxxxxxxxxxx',
            access_key_secret='xxxxxxxxxxxxx'
        )
        # 访问的域名
        config.endpoint = 'slb.aliyuncs.com'
        return Slb20140515Client(config)

    @staticmethod
    def main(
        args: List[str],
    ) -> None:
        client = GetInstanceIdName.create_client('accessKeyId', 'accessKeySecret')
        describe_load_balancers_request = slb_20140515_models.DescribeLoadBalancersRequest(
            region_id=args[0],
            page_size=PAGE_SIZE,
            page_number=args[1]
        )
        res = client.describe_load_balancers(describe_load_balancers_request)
        return res.body


def get_id_name_dict():
    instance_dict = {}
    for i in range(0, len(REGION_ID)):
        result = GetInstanceIdName.main([REGION_ID[i], 1])
        total_page = result.total_count // PAGE_SIZE + 1
        for j in range(0, total_page):
            result2 = GetInstanceIdName.main([REGION_ID[i], j+1])
            slb_info = result2.load_balancers.load_balancer
            for k in range(0, len(slb_info)):
                instance_dict[slb_info[k].load_balancer_id] = slb_info[k].load_balancer_name
    print('实例ID和名字的字典:', instance_dict)
    return instance_dict


def get_average_24h(instance_dict, pre_days, metric_name):
    average_dict = {}
    today = time.strftime('%Y-%m-%d', time.localtime(time.time()))
    today_time = time.mktime(time.strptime(today, '%Y-%m-%d'))
    # 从昨天开始,往前推15天,15次循环,取1-15。取00:00:00-23:59:59的时间戳
    start_time = str(round((today_time - 86400*pre_days)*1000))
    end_time = str(round((today_time - 86400*(pre_days-1) - 1)*1000))
    namespace = 'acs_slb_dashboard'
    for i in instance_dict.keys():
        token = 'init_data'
        average_list = []
        while token:
            result = GetMonitorData.main([namespace, metric_name, start_time, end_time, i, token])
            token = result.next_token
            res_list = json.loads(result.datapoints)
            # 停机和未安装监控agent的主机拿不到监控数据,res_list是个空列表,计算平均是会报错
            if len(res_list) != 0:
                for j in range(0, len(res_list)):
                    # print(res_list[i])
                    average_list.append(round(res_list[j]['Average'], 2))
            else:
                average_list = [0.00, ]
        # 取平均值
        average_dict[i] = numpy.mean(average_list)
        time.sleep(0.2)
    return average_dict


def get_average_15days(instance_dict, metric):
    temp_dict = {}
    median_dict1 = get_average_24h(instance_dict, 1, metric)
    for k, v in median_dict1.items():
        temp_dict[k] = []
    for k1, v1 in median_dict1.items():
        temp_dict[k1].append(v1)
    for i in range(2, 16):
        median_dict = get_average_24h(instance_dict, i, metric)
        for k2, v2 in median_dict.items():
            temp_dict[k2].append(v2)
    for k3, v3 in temp_dict.items():
        temp_dict[k3] = round(numpy.mean(v3), 3)
    print(temp_dict)
    return temp_dict


def write_to_execl(data):
    df = pd.DataFrame.from_dict(data, orient='index', columns=['后端异常ECS实例个数', '实例每秒最大并发连接数'])
    df.to_excel('slb_statistics.xlsx')


if __name__ == '__main__':
    # 获取InstanceId和InstanceName的对应字典
    str_time = time.time()
    instance_dict = get_id_name_dict()
    # 数据合并处理
    id_list_dict = {}
    for k, v in instance_dict.items():
        id_list_dict[k] = []
    ecs_metric = ['UnhealthyServerCount', 'InstanceMaxConnection']
    for metric in ecs_metric:
        metric_data = get_average_15days(instance_dict, metric)
        for k2, v2 in metric_data.items():
            id_list_dict[k2].append(v2)
    for k_id, k_name in instance_dict.items():
        if k_id in id_list_dict:
            id_list_dict[k_name] = id_list_dict.pop(k_id)
    # 数据写入excel表格
    write_to_execl(id_list_dict)
    print(time.time() - str_time)

转载请注明来源,欢迎对文章中的引用来源进行考证,欢迎指出任何有错误或不够清晰的表达。可以在下面评论区评论,也可以邮件至 lxwno.1@163.com

×

喜欢就点赞,疼爱就打赏