问题描述
我应用无模型 Q 学习算法,通过添加或删除 VM(水平自动缩放)将响应时间保持在 [0.4,0.6] 范围内。我使用因子 [0,1,2,-1,-2] 这意味着 0:不扩展,1 添加 1 个 VM,-1 删除 1 个 VM。我使用 Amazon Cloudwatch 每分钟读取一次测量值。我训练了 10 集的算法,每集包含 200 个步骤。结果是任何状态的动作都是 0。我得到的 Q 表是:
[[7.00356635 2.44257089 2.48274487 2.61551342 2.30166671]
[7.06118893 2.88096418 3.68686476 4.12289437 3.78545427]
[6.48884566 3.38330439 2.71853144 3.03036759 3.3645094 ]]
第一行是“低”我的意思是响应时间小于 0.4。最后一行是高表示响应时间高于 0.6。问题出在哪里,为什么agent没有学好? 我希望当响应时间很长时代理应该添加 VM,当响应时间很短时代理应该释放 VM。
代码是:
class AgentWrapper:
states = ["L","R","H"]
actions = [0,-2]
q_table = numpy.zeros((len(states),len(actions)))
def average_RT_normalize(self,a,b,R_value,average_measurment):
normalize = b*2/math.pi*math.atan(average_measurment/R_value-a)
return normalize
def reward_fun(self,lower_value,upper_value):
response_time_reward = 0
# calculate average response time reward
if self.list_metrics[0] <= lower_value:
response_time_reward = 0
elif self.list_metrics[0] > lower_value and self.list_metrics[0] <= R_value:
response_time_reward = (self.list_metrics[0] - lower_value)/(R_value - lower_value)
elif self.list_metrics[0] > R_value and self.list_metrics[0] <= upper_value:
response_time_reward = (upper_value - self.list_metrics[0])/(upper_value - R_value)
else:
response_time_reward = 0
reward_result = response_time_reward
return reward_result
def state_reward(self,namespaces,metric_names,dimensions,statistics,upper_value):
reward = 0
CWobject = CloudwatchWrapper()
self.list_metrics = CWobject.getmetrics(namespaces,statistics)
print ("Timestamp = ",self.list_metrics[1])
print ("Response Time = ",self.list_metrics[0])
require_RT_normal = self.average_RT_normalize(0,100,R_value)
lower_value_normalize = self.average_RT_normalize(0,lower_value)
upper_value_normalize = self.average_RT_normalize(0,upper_value)
normalize_measurment = self.average_RT_normalize(0,self.list_metrics[0])
print ("normalized Response Time = ",normalize_measurment)
if normalize_measurment < lower_value_normalize:
state = type(self).states[0]
if normalize_measurment >= lower_value_normalize and normalize_measurment <= upper_value_normalize :
state = type(self).states[1]
if normalize_measurment > upper_value_normalize:
state = type(self).states[2]
reward = self.reward_fun(R_value,upper_value)
return [state,reward]
def Q_learning(self,upper_value):
print ( "required Response Time Range = [",",upper_value,"]")
print ( "normalized required Response Time Range = [",self.average_RT_normalize(0,lower_value),upper_value),"]")
print ("Q-table = ")
print(type(self).q_table)
print("------------------------------------------------")
autoscaling = AutoscalingWrapper()
num_episodes = 10
max_step = 200
learning_rate = 0.1
discount_rate = 0.99
exploration_rate = 0.9
for episode in range(num_episodes):
print("Episode_number = ",episode)
state_reward_var = self.state_reward(namespaces,upper_value)
for step in range (max_step):
exploration_rate_threshold = random.uniform(0,1)
print("exploration_rate = ",exploration_rate)
print("exploration_rate_threshold = ",exploration_rate_threshold)
if exploration_rate_threshold >= exploration_rate:
action = type(self).actions[numpy.argmax(type(self).q_table[type(self).states.index(state_reward_var[0]),:])]
print("action q_table...........................")
else:
action = (random.choice(type(self).actions))
print("action randomly............................")
print("action = ",action)
print("state = ",state_reward_var[0])
if action > 0: #Run the inactive instances according to the action to make scale-out
inactive_instances = autoscaling.get_autoscaling_group_max(dimensions)-autoscaling.get_autoscaling_group_desired(dimensions)
print("inactive instances = ",inactive_instances)
# if amount of requiered inactive instances is available
if inactive_instances >= action and inactive_instances > 0:
print("inactive instances are available for scaling out................" )
autoscaling.updat_autoscaling_group(dimensions,action+autoscaling.get_autoscaling_group_desired(dimensions))
# if amount of requiered inactive instances is not available,run all the inactive instances
elif inactive_instances >0 and inactive_instances < action:
print("inactive instances are not available for scaling out.................")
#autoscaling.updat_autoscaling_group(dimensions,autoscaling.get_autoscaling_group_max(dimensions))
action = 0
print("Action will be replaced to action = 0")
elif inactive_instances == 0:
print("Scaling out is not possible since no instances are available......")
action = 0
print("Action will be replaced to action = 0")
elif action < 0: #release the active instances according to the action to make scale-in
active_instances = autoscaling.get_autoscaling_group_desired(dimensions)
print("active instances = ",active_instances)
if active_instances == autoscaling.get_autoscaling_group_min(dimensions):
print("Scale in is not possible since active instances are the minimum..... ")
action = 0
print("Action will be replaced to action = 0")
elif active_instances > abs(action) and \
active_instances + action >= autoscaling.get_autoscaling_group_min(dimensions):
print("active instances are available for scaling in..............")
autoscaling.updat_autoscaling_group(dimensions,action+autoscaling.get_autoscaling_group_desired(dimensions))
# if amount of requiered active instances is not available,stop all active instances
elif active_instances + action < autoscaling.get_autoscaling_group_min(dimensions) and active_instances <= abs(action):
print("active instances are not available for scaling in..............")
#autoscaling.updat_autoscaling_group(dimensions,autoscaling.get_autoscaling_group_min(dimensions))
action = 0
print("Action will be replaced to action = 0")
elif action == 0:
print("scaling is not active..................................")
#observe the new state
print("sleep for 1 minutes until get new metrics............")
time.sleep(60)
new_state_reward_var = self.state_reward(namespaces,upper_value)
#Get intermediate reward
print("reward = ",new_state_reward_var[1])
#update the Q-table for estimate Q(s,a)
old_estimate = type(self).q_table[type(self).states.index(state_reward_var[0]),type(self).actions.index(action)]
optimal_new_state = numpy.max(type(self).q_table[type(self).states.index(new_state_reward_var[0]),:])
print ("Estimation equation = ",old_estimate,"+",learning_rate,"*(",new_state_reward_var[1],"+(",discount_rate,"*",optimal_new_state,")-",")")
type(self).q_table[type(self).states.index(state_reward_var[0])][type(self).actions.index(action)] = old_estimate + \
learning_rate * (new_state_reward_var[1] + (discount_rate * optimal_new_state) - old_estimate )
print("Q_Table = ")
print(type(self).q_table)
# make the new state is the current state
state_reward_var = new_state_reward_var
#if new_state_reward_var[1] != 0:
# break
if exploration_rate > 0.1:
exploration_rate = round(exploration_rate-0.1,1)
time.sleep(60)
print("------------------------------------------------------------------------------------------")
解决方法
暂无找到可以解决该程序问题的有效方法,小编努力寻找整理中!
如果你已经找到好的解决方法,欢迎将解决方案带上本链接一起发送给小编。
小编邮箱:dio#foxmail.com (将#修改为@)