1 Star 0 Fork 0

spartanbin/logistics_optimization

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
simulation_environment.py 9.51 KB
一键复制 编辑 原始数据 按行查看 历史
spartanbin 提交于 2023-05-05 19:09 . rerun
'''
Simulation environment for RL
'''
from typing import Optional
from copy import deepcopy
import numpy as np
import pandas as pd
import torch
class rl_env:
def __init__(
self,
node_info: pd.DataFrame,
default_out_of_warehouse_load: Optional[int] = None,
):
'''
All times are in minutes
'''
# client information and status:
# node id, x, y, start time, end time, servicing time, delivery quantity, pickup quantity
node_info['开始时间'] = 0
node_info['结束时间'] = 100000000000
self.node_info = node_info[[
'编号', 'x坐标', 'y坐标', '开始时间', '结束时间', '服务时间', '送货量', '取货量']].values.astype(int)
self.node_info[0, 3:] = -1
self.nodes = self.node_info.copy()
# self.nodes = np.array([
# [0, 5, 5, -1, -1, -1, -1, -1], # warehouse
# [1, 6, 6, -1, -1, -1, -1, -1], # client 1
# [2, 4, 6, -1, -1, -1, -1, -1], # client 2
# [3, 4, 4, -1, -1, -1, -1, -1], # ...
# [4, 6, 4, -1, -1, -1, -1, -1],
# [5, 7, 7, -1, -1, -1, -1, -1],
# [6, 7, 3, -1, -1, -1, -1, -1],
# [7, 3, 3, -1, -1, -1, -1, -1],
# [8, 3, 7, -1, -1, -1, -1, -1],
# [9, 8, 8, -1, -1, -1, -1, -1],
# [10, 1, 2, -1, -1, -1, -1, -1], # client 10
# ])
# courier status: location (node id), load
self.courier = np.array([-1, -1])
# volume of courier
self.volume = 3000
assert (type(default_out_of_warehouse_load) == int) or (default_out_of_warehouse_load is None), ''
if type(default_out_of_warehouse_load) == int:
assert default_out_of_warehouse_load <= self.volume, ''
self.default_out_of_warehouse_load = default_out_of_warehouse_load
self.reaching_distance = np.zeros((len(self.nodes), len(self.nodes)), dtype=np.float32)
for row in range(len(self.nodes)):
for col in range(len(self.nodes)):
self.reaching_distance[row, col] = ((self.nodes[row, 1] - self.nodes[col, 1]) ** 2 + (
self.nodes[row, 2] - self.nodes[col, 2]) ** 2) ** 0.5
self.speed = 7 / 6
self.reaching_time = self.reaching_distance / self.speed
self.same_location_reward = -self.reaching_time[0, 1:].mean()
self.past_time = 0
self.no_longer_accessible = np.ones(len(self.nodes) - 1, dtype=bool)
self.timestep = 0
def reset(self,):
'''
When the simulation of an episode ends, run reset to start a new episode.
:return: state
'''
self.past_time = 0
# rerandom = True
# while rerandom:
# self.nodes[1:, 3] = np.random.normal(loc=0, scale=10, size=len(self.nodes) - 1).clip(min=0)
# self.nodes[1:, 4] = np.random.normal(loc=80, scale=20, size=len(self.nodes) - 1).clip(min=0)
# if (self.nodes[1:, 4] > self.nodes[1:, 3]).all():
# rerandom = False
# self.nodes[1:, 5] = np.random.normal(loc=2.4, scale=0.4, size=len(self.nodes) - 1).clip(min=0)
# self.nodes[1:, 6:] = np.random.randint(low=0, high=26, size=(len(self.nodes) - 1, 2)).clip(min=0)
self.nodes = self.node_info.copy()
if self.default_out_of_warehouse_load is None:
self.default_out_of_warehouse_load = self.nodes[1:, 6].sum() - self.nodes[1:, 7].sum()
if self.default_out_of_warehouse_load < 0:
self.default_out_of_warehouse_load = self.nodes[1:, 6].max()
elif self.default_out_of_warehouse_load > self.volume:
self.default_out_of_warehouse_load = self.volume
self.no_longer_accessible[:] = False
# update timed out clients
self.no_longer_accessible[
self.nodes[1:, 4] < (self.past_time + self.reaching_time[self.courier[0], 1:])] = True
# update unneeded clients
self.no_longer_accessible[(self.nodes[1:, [6, 7]] == 0).all(axis=1)] = True
# init courier status
self.courier[:] = [0, self.default_out_of_warehouse_load]
left_delivery_quantity = self.nodes[1:, 6][(1 - self.no_longer_accessible).astype(bool)].sum()
if self.courier[1] > left_delivery_quantity:
self.courier[1] = left_delivery_quantity
node_state = self.nodes.copy()
node_state = np.delete(node_state, [3, 4], axis=1)
state = (deepcopy(self.past_time), deepcopy(self.speed), deepcopy(self.volume), node_state,
self.courier.copy(), self.no_longer_accessible.copy())
self.timestep = 0
return state
def return_allowed_action(self):
allowed_action = list(range(self.courier[0])) + list(range(self.courier[0] + 1, len(self.nodes)))
return allowed_action
def step_by_action_prob(self, ac_prob: torch.Tensor, deterministic: bool):
'''
Env receives the action probability, sampling from it, and make one timestep forward.
:param ac_prob:
:param deterministic: Whether to perform deterministic actions, not random.
:return:
'''
allowed_action = self.return_allowed_action()
if not deterministic:
action = torch.multinomial(ac_prob, num_samples=1).item()
else:
action = allowed_action[ac_prob[allowed_action].max(-1)[1].item()]
start_id = self.courier[0]
if start_id == action:
same_location_reward = self.same_location_reward
else:
same_location_reward = 0
self.courier[0] = action
cost_time = self.reaching_time[start_id, self.courier[0]]
self.past_time += cost_time
if self.courier[0] == 0:
# update timed out clients
self.no_longer_accessible[
self.nodes[1:, 4] < (self.past_time + self.reaching_time[self.courier[0], 1:])] = True
self.courier[1] = self.default_out_of_warehouse_load
left_delivery_quantity = self.nodes[1:, 6][(1 - self.no_longer_accessible).astype(bool)].sum()
if self.courier[1] > left_delivery_quantity:
self.courier[1] = left_delivery_quantity
else:
if self.no_longer_accessible[self.courier[0] - 1] == False:
# add waiting time
if self.past_time < self.nodes[self.courier[0], 3]:
cost_time += self.nodes[self.courier[0], 3] - self.past_time
self.past_time = self.nodes[self.courier[0], 3]
# add serving time
self.past_time += self.nodes[self.courier[0], 5]
cost_time += self.nodes[self.courier[0], 5]
# update timed out clients
self.no_longer_accessible[
self.nodes[1:, 4] < (self.past_time + self.reaching_time[self.courier[0], 1:])] = True
self.no_longer_accessible[self.courier[0] - 1] = True
# update courier load status
volume = self.courier[1]
after_unloaded_volume = (volume - self.nodes[self.courier[0], 6]).clip(min=0)
self.nodes[self.courier[0], 6] -= (volume - after_unloaded_volume)
after_loaded_volume = (after_unloaded_volume + self.nodes[self.courier[0], 7]).clip(max=self.volume)
self.nodes[self.courier[0], 7] -= (after_loaded_volume - after_unloaded_volume)
self.courier[1] = after_loaded_volume
self.timestep += 1
node_state = self.nodes.copy()
node_state = np.delete(node_state, [3, 4], axis=1)
state = (deepcopy(self.past_time), deepcopy(self.speed), deepcopy(self.volume), node_state,
self.courier.copy(), self.no_longer_accessible.copy())
if ((self.no_longer_accessible == True).all()) or (self.timestep == 1000):
done = True
else:
done = False
time_reward = -cost_time
reward = time_reward + same_location_reward
return action, state, reward, done
if __name__ == '__main__':
np.random.seed(400)
torch.manual_seed(400)
'''
rc208_50.xls:
random cul reward: -1138.2509946956875 13.828585393918875
pass through in sequence cost time: 1113.9043805599213
total delivery: 969
total pickups: 255
rc208_100.xls:
random cul reward: -1687.2841777015926 12.058743988625375
pass through in sequence cost time: 2758.687739610672
total delivery: 1723
total pickups: 567
'''
node_info = pd.read_excel('node100.xls')
env = rl_env(node_info=node_info)
env.reset()
# time_ = 0
# for i in range(env.reaching_time.shape[0]):
# if i >= 1:
# time_ += env.reaching_time[i - 1, i]
# time_ += env.node_info[i, 5]
# time_ += env.reaching_time[env.reaching_time.shape[0] - 1, 0]
# print(time_)
# print(env.default_out_of_warehouse_load)
# print(env.node_info[:, 6].sum())
# print(env.node_info[:, 7].sum())
# ac_prob = torch.tensor([1 / 11] * 11)
# all_rewards = []
# for i in range(100):
# rewards = []
# for j in range(100):
# env.reset()
# done = False
# sum_reward = []
# while not done:
# _, _, reward, done = env.step_by_action_prob(ac_prob=ac_prob, deterministic=False)
# sum_reward.append(reward)
# sum_reward = np.sum(sum_reward)
# rewards.append(sum_reward)
# all_rewards.append(np.mean(rewards))
#
# print(np.mean(all_rewards), np.std(all_rewards))
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/spartanbin/logistics_optimization.git
git@gitee.com:spartanbin/logistics_optimization.git
spartanbin
logistics_optimization
logistics_optimization
main

搜索帮助