package org.deeplearning4j.rl4j.policy;

import org.deeplearning4j.gym.StepReply;
import org.deeplearning4j.rl4j.learning.HistoryProcessor;
import org.deeplearning4j.rl4j.learning.IHistoryProcessor;
import org.deeplearning4j.rl4j.learning.Learning;
import org.deeplearning4j.rl4j.mdp.MDP;
import org.deeplearning4j.rl4j.network.NeuralNet;
import org.deeplearning4j.rl4j.observation.Observation;
import org.deeplearning4j.rl4j.space.ActionSpace;
import org.deeplearning4j.rl4j.space.Encodable;
import org.deeplearning4j.rl4j.util.LegacyMDPWrapper;

/* loaded from: input_file:org/deeplearning4j/rl4j/policy/Policy.class */
public abstract class Policy<A> implements IPolicy<A> {
    public abstract NeuralNet getNeuralNet();

    @Override // org.deeplearning4j.rl4j.policy.IPolicy
    public abstract A nextAction(Observation observation);

    public <O extends Encodable, AS extends ActionSpace<A>> double play(MDP<O, A, AS> mdp) {
        return play(mdp, (IHistoryProcessor) null);
    }

    public <O extends Encodable, AS extends ActionSpace<A>> double play(MDP<O, A, AS> mdp, IHistoryProcessor.Configuration configuration) {
        return play(mdp, new HistoryProcessor(configuration));
    }

    /* JADX WARN: Multi-variable type inference failed */
    @Override // org.deeplearning4j.rl4j.policy.IPolicy
    public <O extends Encodable, AS extends ActionSpace<A>> double play(MDP<O, A, AS> mdp, IHistoryProcessor iHistoryProcessor) {
        resetNetworks();
        LegacyMDPWrapper<O, A, AS> legacyMDPWrapper = new LegacyMDPWrapper<>(mdp, iHistoryProcessor);
        Learning.InitMdp<Observation> refacInitMdp = refacInitMdp(legacyMDPWrapper, iHistoryProcessor);
        Observation lastObs = refacInitMdp.getLastObs();
        double reward = refacInitMdp.getReward();
        A noOp = legacyMDPWrapper.getActionSpace().noOp();
        while (!legacyMDPWrapper.isDone()) {
            A nextAction = lastObs.isSkipped() ? noOp : nextAction(lastObs);
            noOp = nextAction;
            StepReply<Observation> step = legacyMDPWrapper.step(nextAction);
            reward += step.getReward();
            lastObs = (Observation) step.getObservation();
        }
        return reward;
    }

    protected void resetNetworks() {
        getNeuralNet().reset();
    }

    @Override // org.deeplearning4j.rl4j.policy.IPolicy
    public void reset() {
        resetNetworks();
    }

    /* JADX WARN: Multi-variable type inference failed */
    protected <O extends Encodable, AS extends ActionSpace<A>> Learning.InitMdp<Observation> refacInitMdp(LegacyMDPWrapper<O, A, AS> legacyMDPWrapper, IHistoryProcessor iHistoryProcessor) {
        double d = 0.0d;
        Observation m34reset = legacyMDPWrapper.m34reset();
        Object noOp = legacyMDPWrapper.getActionSpace().noOp();
        while (m34reset.isSkipped() && !legacyMDPWrapper.isDone()) {
            StepReply<Observation> step = legacyMDPWrapper.step(noOp);
            d += step.getReward();
            m34reset = (Observation) step.getObservation();
        }
        return new Learning.InitMdp<>(0, m34reset, d);
    }
}
