1 Star 0 Fork 0

Briefly/rldemo_paper_code

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
WaveEnvDiscreteAction.m 8.17 KB
一键复制 编辑 原始数据 按行查看 历史
Briefly 提交于 2023-09-10 17:56 . 添加所有文件
classdef WaveEnvDiscreteAction < rl.env.MATLABEnvironment
%LIGHTINTENSITYENV: Template for defining custom environment in MATLAB.
%% Properties (set properties' attributes accordingly)
properties
% 两个角度的范围应该是 -180 ~ 180 度之间
% 接收平面水平转动的角
YawAngle= 0;
% 接收平面俯仰角为 0 度
PitchAngle = 0;
% 接收平面的的法向量量初始时竖直向下
n = [0 0 -1]';
% UAV位置
rpos = 0;
% 接收器最低光强
IntensityThreshold = 10^(-10);
% 当前时间
t = 0;
% 采样时间
Ts = 0.02;
% 动作——调整大小
%Scale 越大训练出的角度越容易稳定,而强度波动也会变大
% Scale = 3.5;
%Scale =5;
Scale = 5;
% Scale = 8;
% scale 太大,就无法稳定了
% Scale = 5;
% Scale = 4;
% Scale = sqrt(5);
% 波浪环境
wave = 0;
% 当前是否为测试
sim = false;
% 历史状态集
HisIntensities = [];
HisReward = [];
HisAction = [];
HisAngle = [];
end
properties
% 初始化系统状态
State = zeros(4,1)
end
properties(Access = protected)
% Initialize internal flag to indicate episode termination
IsDone = false
end
% 1.8474e-7,最大值
%% Necessary Methods
methods
function this = WaveEnvDiscreteAction(rpos,sim)
% Initialize Observation settings.
numObs = 4;
ObservationInfo = rlNumericSpec([numObs 1]);
ObservationInfo.Name = 'observation';
% 给出造成结果的原因
% 针对固定位置处的 cos pitch, sin pitch ,cos yaw ,sin yaw
ObservationInfo.Description = 'cos pitch, sin pitch ,cos yaw ,sin yaw';
% 每次移动角度大小
ActionInfo = rlFiniteSetSpec({ ...
[-1 -1]/sqrt(2),[-1 0],[-1 1]/sqrt(2), ...
[ 0 -1],[ 0 0],[ 0 1], ...
[ 1 -1]/sqrt(2),[ 1 0],[ 1 1]/sqrt(2)});
ActionInfo.Name = 'action';
% 俯仰角和转向角的增量
ActionInfo.Description = 'delta pitch, delta yaw';
% The following line implements built-in functions of RL env
this = this@rl.env.MATLABEnvironment(ObservationInfo,ActionInfo);
this.rpos = rpos;
this.wave = WaveEnv();
this.sim = sim;
end
% Apply system dynamics and simulates the environment with the
% given action for one step. action is 2x1 matrix
function [Observation,Reward,IsDone,LoggedSignals] = step(this,action)
LoggedSignals = [];
act = action*this.Scale;
%当前为测试时,保存历史角度
if this.sim
this.HisAngle = [this.HisAngle [this.YawAngle;this.PitchAngle]];
end
deltaPitch = act(1);
delatYaw = act(2);
pitch = this.PitchAngle + deltaPitch;
yaw = this.YawAngle + delatYaw;
% pitch只能处于 -90~90度之间
if yaw > 90
yaw = 90;
elseif yaw < -20
yaw = -20;
end
% % 频率会影响收敛过程的中曲线的形状,原因在于最大强度随时间变化,在最大强度下降之前找到变化
this.t = this.t +1/40;
%接收到的光强
% intensity = this.wave.getIntensityByYP(this.rpos(1),this.rpos(2),yaw,pitch,0);
intensity = this.wave.getIntensityByYP(this.rpos(1),this.rpos(2),yaw,pitch,this.t);
% Update system states
this.PitchAngle = pitch;
this.YawAngle = yaw;
Observation = [cosd(this.PitchAngle);sind(this.PitchAngle);cosd(this.YawAngle);sind(this.YawAngle)];
this.State = Observation;
% Check terminal condition
IsDone = intensity<this.IntensityThreshold;
this.HisIntensities = [this.HisIntensities,intensity];
len = size(this.HisIntensities,2);
Tc = 2;
if len <= Tc
prestate = this.HisIntensities(1,:);
else
prestate = this.HisIntensities(1,end-Tc:end-1);
end
prestate = mean(prestate);
curstate = this.HisIntensities(1,end);
% ir = -(log10(curstate)+6)^2;
% ir = 0.5*exp(log10(curstate)+6.8); % 探索到最大值
% 6.8 太小无法形
% ir = log10(curstate)+6.8;
ir = log10(curstate)+7.2420;
% ir = log10(curstate/prestate);
% ir = curstate;
% ir = 0;
sr = log(curstate/prestate);
% sr = log10(curstate)+6.8;
% Reward = 0.5*ir+sr;
Reward = 1*ir+sr;
Reward = 1*ir+0.5*sr-0.06*norm(action);
% Reward = 1.5*ir-0.8*abs(sr)-0.06*norm(action);
% Reward = ir-0.1*norm(action);
% Reward = 1.5*ir-0.0*norm(action);
% Reward = 1*ir-0.5*abs(sr)-0.04*norm(action);
% Reward = 1.5*ir-0.3*abs(sr)-0.1*norm(action);
% Reward = 1*ir-0.3*abs(sr)-0.03*norm(action);
% Reward = 1*ir+sr-0.1*norm(action); %
% Reward = 1*ir+0.5*sr-0.15*norm(action); %** wind best**
% Reward = sr;
% 增加强度的奖励使得曲线更加稳定,最终的是通过增加幅度,
% Reward = 0.5*ir+sr-0.6*norm(action);
% norm(action)
% Reward = 1.1*ir+sr-0.6*norm(action);
%xxx
% Reward = 0.5*ir+sr-0.1*norm(action);
% Reward = 1.5*ir+sr-0.2*norm(action);
% Reward = 1.5*ir+sr-0.25*norm(action);
% Reward = 3*ir+0.5*sr;
this.HisReward = [this.HisReward;ir,sr];
notifyEnvUpdated(this);
end
% Reset environment to initial state and output initial observation
function InitialObservation = reset(this)
% "reset env" ,测试每次是否调用reset函数
this.t = 0;
this.IsDone = false;
this.YawAngle= 0;
this.PitchAngle = 0;
this.HisIntensities = [];
this.HisReward = [];
intensity = this.wave.getIntensityByYP(this.rpos(1),this.rpos(2),this.YawAngle ,this.PitchAngle,this.t);
InitialObservation = [cosd(this.PitchAngle);sind(this.PitchAngle);cosd(this.YawAngle);sind(this.YawAngle)];
this.State = InitialObservation;
this.HisIntensities = [this.HisIntensities,intensity];
notifyEnvUpdated(this);
end
end
%% Optional Methods (set methods' attributes accordingly)
methods
% (optional) Visualization method
function plot(this)
% Initiate the visualization
% Update the visualization
envUpdatedCallback(this)
end
% (optional) Properties validation through set methods
function set.State(this,state)
validateattributes(state,{'numeric'},{'finite','real','vector','numel',4},'','State');
this.State = state(:);
notifyEnvUpdated(this);
end
function set.PitchAngle(this,val)
validateattributes(val,{'numeric'},{'finite','real','scalar'},'','PitchAngle');
this.PitchAngle = val;
notifyEnvUpdated(this);
end
function set.YawAngle(this,val)
validateattributes(val,{'numeric'},{'finite','real','scalar'},'','YawAngle');
this.YawAngle = val;
notifyEnvUpdated(this);
end
end
methods (Access = protected)
% (optional) update visualization everytime the environment is updated
% (notifyEnvUpdated is called)
function envUpdatedCallback(this)
end
end
end
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/briefly/rldemo_paper_code.git
git@gitee.com:briefly/rldemo_paper_code.git
briefly
rldemo_paper_code
rldemo_paper_code
master

搜索帮助