代码拉取完成,页面将自动刷新
# -*- coding: utf-8 -*-
#!/usr/bin/python
import sys
import os
import random
import json
from settings import cassandra_conf_dir
'''
功能:使用优化方法在空间中搜索最优参数
'''
'''
类名:sample
功能:读取参数配置文件config.json并进行解析
'''
class sample:
def __init__(self,config_json):
json_data = open(config_json)
data = json.load(json_data)
self.confjson = data["sample_standard_list"]
'''
功能:产生参数列表,但是不包含输入数据和NlogN
'''
def sampleconf(self):
x_sample = []
for conf in self.confjson:
if conf['type'] == 'int':
x = float(random.randrange(int(conf['low-bound']),int(conf['high-bound']),int(conf['interval'])))
x_sample.append(x)
elif conf['type'] == 'float':
f_temp = float(conf['high-bound'])-float(conf['low-bound'])
x = float(conf['high-bound']) + random.randint(0,int(f_temp / float(conf['interval'])))*float(conf['interval'])
x_sample.append(x)
return x_sample
'''
功能:返回json文件中i号参数的步长
'''
def getstep(self,i):
j = 0
for conf in self.confjson:
if j == i:
return float(conf['interval'])
j+=1
return -1
'''
功能:返回json文件中i号参数的高边界值
'''
def get_high_bound(self,i):
j = 0
for conf in self.confjson:
if j == i:
return float(conf['high-bound'])
j+=1
return -1
'''
功能:返回json文件中i号参数的低边界值
'''
def get_low_bound(self,i):
j = 0
for conf in self.confjson:
if j == i:
return float(conf['low-bound'])
j+=1
return -1
'''
功能:返回配置的总数
'''
def getnum_of_conf(self):
j = 0
for conf in self.confjson:
j += 1
return j
def process_line(word ,value):
'''
1. 根据行特点进行创建字符串,比如形如“XXX = 12345”
2. 进行字符串拼接
'''
value = int(value)#转化为整数
line = word + ' : '+ str(value) + '\n'
return line
def rewrite(file_name, keywords, conf):
'''
1. 新建拷贝文件
2. 逐行读入,查表
(1)若存在关键字,进行字符串修改
(2)若不存在,直接写入新拷贝文件
'''
with open(file_name ,"r") as f:
lines = f.readlines()
with open(file_name ,"w") as f_w:
for line in lines:
if "#" not in line:#如果包含注释项,直接略过
for i in range(len(keywords)):
word = keywords[i]
if word in line:
line = process_line(word, conf[i])#传入需要修改的值
break
f_w.write(line)
import time
def restart():
'''
调用脚本重启cassandra
重新进行任务测验
'''
os.system('./stop.sh')
print 'INFO:stop cassandra to make parameters settings effective'
time.sleep(5)
print 'INFO:restart and sleep waking up'
os.system('./restart.sh')#调用重启脚本
time.sleep(90)
os.system('./empty_db.sh')
print 'INFO:restart success'
from benchmark import *
def generate_test(file_name, conf, read_ratio):
'''
1. 修改配置文件:
(1)找到配置文件关键字
(2)修改
2. 重启集群
3. 重新运行任务
'''
#关键词列表,需要与配置项一一对应
keywords = ['concurrent_reads', 'concurrent_writes']
rewrite(file_name, keywords, conf)
print 'INFO: parameters are updated in file: cassandra.yaml'
restart()
print 'INFO: start to test new parameters'
elapse = mixed_test(read_ratio)
print 'INFO:elapse time :',elapse
print 'SUCCESS:a whole test is over'
return elapse#benchmark测试函数
'''
模拟退火优化算法
参数名称:config_json:配置参数,T为温度,cool为降低温度的比例(幅度)
'''
import math
'''
为了演示及其他需要,如果读写比例与历史数据的读写比例接近,数据量也接近,那么直接返回调优配置
'''
def write_output(d):
with open("output.json", "w") as f:
f.write(json.dumps(d))
def have_optimized(file_name, ratio, num):
with open("history.json", "r") as f:
history = json.loads(f.read())
if len(history) < 1:
return False
for h in history:
read_ratio = h['read_ratio']
test_num = h['test_num']
r = 1.0
if num > test_num:
r = num*1.0/test_num
else:
r = test_num*1.0/num
if abs(ratio - read_ratio)< 0.02 and r <= 1.2:
keywords = ['concurrent_reads', 'concurrent_writes']
conf = []
p = h["parameters"]
del h['parameters']
write_output(h)
#填充前端json,并修改yaml
conf.append(p[0])
conf.append(p[1])
rewrite(file_name, keywords, conf)
return True
return False
def optimizer(config_json, T, cool, file_name, iter_num, read_ratio):
s = sample(config_json)
vec = [32,32]#默认参数
parameters_lst = []
time_lst = []
print "INFO:testing parameters:", vec
ea = generate_test(file_name, vec,read_ratio)
parameters_lst.append(vec)
time_lst.append(ea)
conflen = s.getnum_of_conf()#获取配置参数的总数
counter = 0
while T > 0.1 and counter < iter_num:
counter += 1
while True:
i = random.randint(0, conflen - 1) # 随机选择一个参数进行值的修改
step = s.getstep(i) # 获取选定参数的步长
dis = random.randint(-1, 1) * step # 移动的距离
vecb = []
for tmp in vec:
vecb.append(tmp)
vecb[i] += dis#将指定位置的参数进行修改
if vecb[i] < s.get_low_bound(i):
vecb[i] = s.get_low_bound(i)
#print i,':out of index of low'
elif vecb[i] > s.get_high_bound(i):
vecb[i] = s.get_high_bound(i)
#print i, ':out of index of high'
flag = 1
for para in parameters_lst:
if para == vecb:
flag = 0
break
if flag:
break
print "INFO:testing parameters:", vecb
eb = generate_test(file_name, vecb,read_ratio)
parameters_lst.append((vecb))
print parameters_lst
time_lst.append(eb)
if (eb<ea or random.random()<pow(math.e,-(eb-ea)/T)):
vec = vecb[:]
T = T*cool
if T <= 0.1:
out_d = {}
runtime = time_lst[0]
tunedtime = time_lst[-1]
out_d['runtime'] = runtime
out_d['tuned_runtime'] = tunedtime
lift_ratio = (runtime - tunedtime)*100.0/tunedtime
out_d['lift_ratio'] = lift_ratio
write_output(out_d)
with open("history.json", "r") as f:
h = json.loads(f.read())
h_d = {}#构造新历史数据
h_d["read_ratio"] = read_ratio
h_d["test_num"] = test_num
h_d["runtime"] = runtime
h_d["tuned_runtime"] = tunedtime
h_d["lift_ratio"] = lift_ratio
h_d["parameters"] = vec
h.append(h_d)
with open("history.json", "w") as f:
f.write(json.dumps(h))
return
#构造output.json的字典
min_pos = time_lst.index(min(time_lst))
out_d = {}
runtime = time_lst[0]
tunedtime = time_lst[min_pos]
out_d['runtime'] = runtime
out_d['tuned_runtime'] = tunedtime
lift_ratio = (runtime - tunedtime)*100.0/tunedtime
out_d['lift_ratio'] = lift_ratio
write_output(out_d)
with open("history.json", "r") as f:
h = json.loads(f.read())
h_d = {}#构造新历史数据
h_d["read_ratio"] = read_ratio
h_d["test_num"] = test_num
h_d["runtime"] = runtime
h_d["tuned_runtime"] = tunedtime
h_d["lift_ratio"] = lift_ratio
h_d["parameters"] = parameters_lst[min_pos]
h.append(h_d)
with open("history.json", "w") as f:
f.write(json.dumps(h))
#读取input json文件
with open("input.json", "r") as f:
user_in = json.loads(f.read())
read_ratio = user_in['read_ratio']
iteration_num = user_in['iteration_num']
test_num = user_in['test_num']#暂不使用
#判断是否要进行调优
if have_optimized(cassandra_conf_dir, read_ratio, test_num) == False:
#产生测试数据
generate_data(test_num)
optimizer('./config.json', 100000.0, 0.95, cassandra_conf_dir, iteration_num,read_ratio)
print 'tuned over'
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。