master

分支 (1)

管理

管理

master

rldemo_paper_code
/
LightIntensityEnv.m

classdef LightIntensityEnv < rl.env.MATLABEnvironment
    %LIGHTINTENSITYENV: Template for defining custom environment in MATLAB.

    %% Properties (set properties' attributes accordingly)
    properties
        % Specify and initialize environment's necessary properties

%         % Action Max：pitch的增量，yaw的增量，
%         actionMax = [pi,pi];

        %接收平面初始俯仰角为0 度
        PitchAngle = 0;

        %接收平面初始水平转动的角为 0度
        YawAngle= 0;

        %接收平面的的法向量量初始时竖直向下
        n = [0 0 -1]';
        %    接收器位置
        rpos = [0 1];
        % 两个接收器之间的距离
        padding = 0.3;
        % 接收器最低光强
        IntensityThreshold = 5e-8;

        %当前时间
        t = 0;
        % 采样时间
        Ts = 0.02;

        % 历史状态集
        HisIntensities = [];

        HisReward = [];


    end

    properties
        % 初始化系统状态 [intensity1,intensity2]'
        State = zeros(2,1)
    end

    properties(Access = protected)
        % Initialize internal flag to indicate episode termination
        IsDone = false
    end

    %% Necessary Methods
    methods
        function this = LightIntensityEnv()
           % Initialize Observation settings.
            numObs = 4;
            ObservationInfo = rlNumericSpec([numObs 1]);
            ObservationInfo.Name = 'observation';
            ObservationInfo.Description = 'intensity1,intensity2 pitch yaw';

            % Initialize Action settings
            numAct = 2;
            ActionInfo = rlNumericSpec([numAct 1]);
            ActionInfo.Name = 'action';
            % 俯仰角和转向角的增量
            ActionInfo.Description = 'delta pitch, delta yaw';
            % The following line implements built-in functions of RL env
            this = this@rl.env.MATLABEnvironment(ObservationInfo,ActionInfo);
            rpos = [0.7 0.7];
            this.rpos = rpos;
        end

        % Apply system dynamics and simulates the environment with the
        % given action for one step. action is 2x1 matrix
        function [Observation,Reward,IsDone,LoggedSignals] = step(this,Action)
            LoggedSignals = [];

            % Get action
            act = saturate(this,Action);
            dp = act(1);
            dy = act(2);
            y = this.YawAngle + dy;
            p = this.PitchAngle+dp;
            % 旋转后的法向量需要使用如下变换
            % 固定⾓坐标系，绕固定轴旋转，角度单位应为度，旋转算⼦乘的顺序从右边开始
            R_xyz= rotz(y)*rotx(p);
            % 接收器的法向量
            n = R_xyz*this.n;
            %接收到的光强
            intensities = get_intensity(this.rpos,this.t,this.padding,n);

            % Update system states
            this.PitchAngle = p;
            this.YawAngle = y;

            %调用本类的函数需要传递this参数。
            iLevels = map2level(this,intensities);
            Observation = [iLevels(1);iLevels(2);this.PitchAngle;this.YawAngle];
            this.State = Observation;

            % Check terminal condition
            IsDone = any(abs(intensities) < this.IntensityThreshold);
            this.IsDone = IsDone;

            this.HisIntensities = [this.HisIntensities,this.State];
            % Get reward
            Reward = getReward(this);


            % (optional) use notifyEnvUpdated to signal that the
            % environment has been updated (e.g. to update visualization)
            notifyEnvUpdated(this);
        end

        % Reset environment to initial state and output initial observation
        function InitialObservation = reset(this)
%             "reset env"   ，测试每次是否调用reset函数
            %   重新初始化
            this.YawAngle = 0;
            this.PitchAngle = 0;
            this.HisIntensities = [];
            this.HisReward = [];
            % 旋转后的法向量需要使用如下变换
            % 固定⾓坐标系，绕固定轴旋转，角度单位应为度，旋转算⼦乘的顺序从右边开始

            R_xyz= rotz(this.YawAngle)*rotx(this.PitchAngle);
            % 接收器的法向量
            n = R_xyz*this.n;

            intensities = get_intensity(this.rpos,this.t,this.padding,n);
            iLevels = map2level(this,intensities);
            InitialObservation = [iLevels(1);iLevels(2);this.PitchAngle;this.YawAngle];
            this.State = InitialObservation;
%            角度为0时    1.0e-07 *
%                            0.9216
%                            0.7314

            % (optional) use notifyEnvUpdated to signal that the
            % environment has been updated (e.g. to update visualization)

            this.HisIntensities = [this.HisIntensities,this.State];
            notifyEnvUpdated(this);
        end
    end
    %% Optional Methods (set methods' attributes accordingly)
    methods
        % Helper methods to create the environment
        % Discrete force 1 or 2
        function saction = saturate(this,action)
%             没有限制
%             if abs(action)>actionMax
%                 action = sign(axtion)*actionMax;
%             end
            saction = double(action);
        end

        function intLevel = map2level(this,intensity)
            intLevel = sign(intensity).*log(abs(intensity)/this.IntensityThreshold)*10;
        end

        % Reward function
        function Reward = getReward(this)
             if this.IsDone
                Reward = -1;
                return
            end
            curstate = this.HisIntensities(:,end);
            curstate = curstate(1);
            s = size(this);
            len = s(2);
            Tc = 1;
            if len <= Tc
                prestate = this.HisIntensities(:,:);
            else
                prestate = this.HisIntensities(:,end-Tc:end-1);
            end
            prestate = mean(prestate,2);
            prestate = prestate(1);
            dIntensity = curstate - prestate;
            ir = 10* dIntensity;
            sr = curstate(1)/10;
            Reward = 10*(ir+sr);
            this.HisReward = [this.HisReward;ir,sr];
        end

        % (optional) Visualization method
        function plot(this)
            % Initiate the visualization

            % Update the visualization
            envUpdatedCallback(this)
        end

        % (optional) Properties validation through set methods
        function set.State(this,state)
            validateattributes(state,{'numeric'},{'finite','real','vector','numel',4},'','State');
            this.State = state(:);
            notifyEnvUpdated(this);
        end
        function set.PitchAngle(this,val)
            validateattributes(val,{'numeric'},{'finite','real','scalar'},'','PitchAngle');
            this.PitchAngle = val;
            notifyEnvUpdated(this);
        end
        function set.YawAngle(this,val)
            validateattributes(val,{'numeric'},{'finite','real','scalar'},'','YawAngle');
            this.YawAngle = val;
            notifyEnvUpdated(this);
        end
    end

    methods (Access = protected)
        % (optional) update visualization everytime the environment is updated
        % (notifyEnvUpdated is called)
        function envUpdatedCallback(this)
        end
    end
end