统计15天内阿里云ECS内存使用率和CPU使用率

  1. 一、缘由
  2. 二、环境
  3. 三、代码实现

一、缘由

本脚本是阿里云资源利用率定期统计方案中的其中一个脚本。

本脚本可实现,从每天95个平均值数据中取中位数,15天15个中位数取平均值,得到最终的15天内CPU和内存使用率数值。

进而根据阈值进行判断,资源是否处于低利用率状态。

二、环境

Python3.7 + 阿里云云监控SDK + 阿里云ECS的SDK + pandas + numpy

三、代码实现

# -*- coding: utf-7 -*-
import time
import numpy
import json
import pandas as pd
from typing import List
from alibabacloud_cms20190101.client import Client as Cms20190101Client
from alibabacloud_tea_openapi import models as open_api_models
from alibabacloud_cms20190101 import models as cms_20190101_models
from alibabacloud_ecs20140526.client import Client as Ecs20140527Client
from alibabacloud_ecs20140526 import models as ecs_20140526_models

REGION_ID = ['cn-beijing', 'cn-hangzhou', 'cn-zhangjiakou', 'cn-shenzhen', 'cn-shanghai', 'cn-hongkong',
             'ap-southeast-1', 'ap-northeast-1']


class GetMonitorData:
    def __init__(self):
        pass

    @staticmethod
    def create_client(
        access_key_id: str,
        access_key_secret: str,
    ) -> Cms20190101Client:
        config = open_api_models.Config(
            access_key_id='xxxxxxxxx',
            access_key_secret='xxxxxxxxxxxxx'
        )
        config.endpoint = 'metrics.cn-hangzhou.aliyuncs.com'
        return Cms20190101Client(config)

    @staticmethod
    def main(
        args: List[str],
    ) -> None:
        client = GetMonitorData.create_client('acessKeyId', 'accessKeySecret')
        describe_metric_list_request = cms_20190101_models.DescribeMetricListRequest(
            metric_name=args[1],
            namespace=args[0],
            period='900',
            start_time=args[2],
            end_time=args[3],
            length='100',
            dimensions='{{"instanceId":{}}}'.format(args[4])
        )
        res = client.describe_metric_list(describe_metric_list_request)
        return res.body


class GetInstanceIdName:
    def __init__(self):
        pass

    @staticmethod
    def create_client(
        access_key_id: str,
        access_key_secret: str,
    ) -> Ecs20140526Client:
        config = open_api_models.Config(
            access_key_id='xxxxxxxxxxxxxx',
            access_key_secret='xxxxxxxxxxxxxxxxxxxx'
        )
        config.endpoint = 'ecs-cn-hangzhou.aliyuncs.com'
        return Ecs20140526Client(config)

    @staticmethod
    def main(
        args: List[str],
    ) -> None:
        client = GetInstanceIdName.create_client('accessKeyId', 'accessKeySecret')
        describe_instances_request = ecs_20140526_models.DescribeInstancesRequest(
            region_id=args[1],
            next_token=args[0],
            max_results=50
        )
        res = client.describe_instances(describe_instances_request)
        return res.body


def get_id_name_dict():
    instance_dict = {}
    for i in range(0, len(REGION_ID)):
        token = 'init_data'
        while token:
            result = GetInstanceIdName.main([token, REGION_ID[i]])
            token = result.next_token
            info_list = result.instances.instance
            for j in range(0, len(info_list)):
                instance_dict[info_list[j].instance_id] = info_list[j].instance_name
    print('实例ID和名字的字典:', instance_dict)
    return instance_dict


def get_median_24h(instance_dict, pre_days, metric_name):
    median_dict = {}
    today = time.strftime('%Y-%m-%d', time.localtime(time.time()))
    today_time = time.mktime(time.strptime(today, '%Y-%m-%d'))
    # 从昨天开始,往前推15天,15次循环,取1-15。取00:00:00-23:59:59的时间戳
    start_time = str(round((today_time - 86400*pre_days)*1000))
    end_time = str(round((today_time - 86400*(pre_days-1) - 1)*1000))
    namespace = 'acs_ecs_dashboard'
    for i in instance_dict.keys():
        result = GetMonitorData.main([namespace, metric_name, start_time, end_time, i])
        average_list = []
        res_list = json.loads(result.datapoints)
        # 停机和未安装监控agent的主机拿不到监控数据,res_list是个空列表,计算平均是会报错
        if len(res_list) != 0:
            for j in range(0, len(res_list)):
                # print(res_list[i])
                average_list.append(round(res_list[j]['Average'], 2))
        else:
            average_list = [0.00, ]
        # print(len(average_list), average_list)
        # 取中位数
        median_dict[i] = numpy.median(average_list)
        time.sleep(0.2)
    return median_dict


def get_average_15days(instance_dict, metric):
    temp_dict = {}
    median_dict1 = get_median_24h(instance_dict, 1, metric)
    for k, v in median_dict1.items():
        temp_dict[k] = []
    for k1, v1 in median_dict1.items():
        temp_dict[k1].append(v1)
    for i in range(2, 16):
        median_dict = get_median_24h(instance_dict, i, metric)
        for k2, v2 in median_dict.items():
            temp_dict[k2].append(v2)
    for k3, v3 in temp_dict.items():
        temp_dict[k3] = round(numpy.mean(v3), 3)
    print(temp_dict)
    return temp_dict


def write_to_execl(data):
    df = pd.DataFrame.from_dict(data, orient='index', columns=['CPU使用率', '内存使用率'])
    df.to_excel('cpu_mem_15days.xlsx')


if __name__ == '__main__':

    # 获取InstanceId和InstanceName的对应字典
    # str_time = time.time()
    instance_dict = get_id_name_dict()
    # 数据合并处理,比较复杂
    id_list_dict = {}
    for k, v in instance_dict.items():
        id_list_dict[k] = []
    ecs_metric = ['CPUUtilization', 'memory_usedutilization']
    for metric in ecs_metric:
        metric_data = get_average_15days(instance_dict, metric)
        for k2, v2 in metric_data.items():
            id_list_dict[k2].append(v2)
    for k_id, k_name in instance_dict.items():
        if k_id in id_list_dict:
            id_list_dict[k_name] = id_list_dict.pop(k_id)
    # 数据写入excel表格
    write_to_execl(id_list_dict)
    # print(time.time() - str_time)

注意:以上代码中24小时内的数据,是一个实例一个循环取的数据,调用接口次数多,耗时较长。经实践400个ECS,执行完本脚本要5个小时左右,可耐心等待。


转载请注明来源,欢迎对文章中的引用来源进行考证,欢迎指出任何有错误或不够清晰的表达。可以在下面评论区评论,也可以邮件至 lxwno.1@163.com

×

喜欢就点赞,疼爱就打赏