/*
 * Decompiled with CFR 0.152.
 */
package aima.probability.decision;

import aima.probability.Randomizer;
import aima.probability.decision.MDPPerception;
import aima.probability.decision.MDPPolicy;
import aima.probability.decision.MDPRewardFunction;
import aima.probability.decision.MDPSource;
import aima.probability.decision.MDPTransition;
import aima.probability.decision.MDPTransitionModel;
import aima.probability.decision.MDPUtilityFunction;
import aima.util.Pair;
import java.util.List;

public class MDP<STATE_TYPE, ACTION_TYPE> {
    private STATE_TYPE initialState;
    private MDPTransitionModel<STATE_TYPE, ACTION_TYPE> transitionModel;
    private MDPRewardFunction<STATE_TYPE> rewardFunction;
    private List<STATE_TYPE> nonFinalstates;
    private List<STATE_TYPE> terminalStates;
    private MDPSource<STATE_TYPE, ACTION_TYPE> source;

    public MDP(MDPSource<STATE_TYPE, ACTION_TYPE> mDPSource) {
        this.initialState = mDPSource.getInitialState();
        this.transitionModel = mDPSource.getTransitionModel();
        this.rewardFunction = mDPSource.getRewardFunction();
        this.nonFinalstates = mDPSource.getNonFinalStates();
        this.terminalStates = mDPSource.getFinalStates();
        this.source = mDPSource;
    }

    public MDP<STATE_TYPE, ACTION_TYPE> emptyMdp() {
        MDP<STATE_TYPE, ACTION_TYPE> mDP = new MDP<STATE_TYPE, ACTION_TYPE>(this.source);
        mDP.rewardFunction = new MDPRewardFunction();
        mDP.rewardFunction.setReward(this.initialState, this.rewardFunction.getRewardFor(this.initialState));
        mDP.transitionModel = new MDPTransitionModel(this.terminalStates);
        return mDP;
    }

    public MDPUtilityFunction<STATE_TYPE> valueIteration(double d, double d2, double d3) {
        MDPUtilityFunction<STATE_TYPE> mDPUtilityFunction = this.initialUtilityFunction();
        MDPUtilityFunction<STATE_TYPE> mDPUtilityFunction2 = this.initialUtilityFunction();
        double d4 = d2 * d / (1.0 - d);
        do {
            mDPUtilityFunction = mDPUtilityFunction2.copy();
            d3 = 0.0;
            for (STATE_TYPE STATE_TYPE : this.nonFinalstates) {
                Pair<ACTION_TYPE, Double> pair = this.transitionModel.getTransitionWithMaximumExpectedUtility(STATE_TYPE, mDPUtilityFunction);
                double d5 = this.rewardFunction.getRewardFor(STATE_TYPE) + d * pair.getSecond();
                mDPUtilityFunction2.setUtility(STATE_TYPE, d5);
                if (!(Math.abs(mDPUtilityFunction2.getUtility(STATE_TYPE) - mDPUtilityFunction.getUtility(STATE_TYPE)) > d3)) continue;
                d3 = Math.abs(mDPUtilityFunction2.getUtility(STATE_TYPE) - mDPUtilityFunction.getUtility(STATE_TYPE));
            }
        } while (d3 < d4);
        return mDPUtilityFunction;
    }

    public MDPUtilityFunction<STATE_TYPE> valueIterationForFixedIterations(int n, double d) {
        MDPUtilityFunction<STATE_TYPE> mDPUtilityFunction = this.initialUtilityFunction();
        for (int i = 0; i < n; ++i) {
            MDPUtilityFunction<STATE_TYPE> mDPUtilityFunction2 = mDPUtilityFunction.copy();
            Pair<MDPUtilityFunction<STATE_TYPE>, Double> pair = this.valueIterateOnce(d, mDPUtilityFunction);
            mDPUtilityFunction = pair.getFirst();
            double d2 = pair.getSecond();
        }
        return mDPUtilityFunction;
    }

    public MDPUtilityFunction<STATE_TYPE> valueIterationTillMAximumUtilityGrowthFallsBelowErrorMargin(double d, double d2) {
        int n = 0;
        double d3 = 0.0;
        MDPUtilityFunction<STATE_TYPE> mDPUtilityFunction = this.initialUtilityFunction();
        do {
            Pair<MDPUtilityFunction<STATE_TYPE>, Double> pair = this.valueIterateOnce(d, mDPUtilityFunction);
            mDPUtilityFunction = pair.getFirst();
            d3 = pair.getSecond();
            ++n;
        } while (d3 > d2);
        return mDPUtilityFunction;
    }

    public Pair<MDPUtilityFunction<STATE_TYPE>, Double> valueIterateOnce(double d, MDPUtilityFunction<STATE_TYPE> mDPUtilityFunction) {
        double d2 = 0.0;
        MDPUtilityFunction<STATE_TYPE> mDPUtilityFunction2 = new MDPUtilityFunction<STATE_TYPE>();
        for (STATE_TYPE STATE_TYPE : this.nonFinalstates) {
            Pair<ACTION_TYPE, Double> pair = this.transitionModel.getTransitionWithMaximumExpectedUtility(STATE_TYPE, mDPUtilityFunction);
            double d3 = this.valueIterateOnceForGivenState(d, mDPUtilityFunction, STATE_TYPE);
            double d4 = Math.abs(d3 - mDPUtilityFunction.getUtility(STATE_TYPE));
            if (d4 > d2) {
                d2 = d4;
            }
            mDPUtilityFunction2.setUtility(STATE_TYPE, d3);
            for (STATE_TYPE STATE_TYPE2 : this.terminalStates) {
                mDPUtilityFunction2.setUtility(STATE_TYPE2, mDPUtilityFunction.getUtility(STATE_TYPE2));
            }
        }
        return new Pair(mDPUtilityFunction2, d2);
    }

    private double valueIterateOnceForGivenState(double d, MDPUtilityFunction<STATE_TYPE> mDPUtilityFunction, STATE_TYPE STATE_TYPE) {
        Pair<ACTION_TYPE, Double> pair = this.transitionModel.getTransitionWithMaximumExpectedUtility(STATE_TYPE, mDPUtilityFunction);
        double d2 = this.rewardFunction.getRewardFor(STATE_TYPE) + d * pair.getSecond();
        return d2;
    }

    public MDPPolicy<STATE_TYPE, ACTION_TYPE> policyIteration(double d) {
        MDPUtilityFunction<STATE_TYPE> mDPUtilityFunction = this.initialUtilityFunction();
        MDPPolicy<STATE_TYPE, ACTION_TYPE> mDPPolicy = this.randomPolicy();
        boolean bl = false;
        do {
            bl = true;
            mDPUtilityFunction = this.policyEvaluation(mDPPolicy, mDPUtilityFunction, d, 3);
            for (STATE_TYPE STATE_TYPE : this.nonFinalstates) {
                Pair<ACTION_TYPE, Double> pair = this.transitionModel.getTransitionWithMaximumExpectedUtility(STATE_TYPE, mDPUtilityFunction);
                Pair<ACTION_TYPE, Double> pair2 = this.transitionModel.getTransitionWithMaximumExpectedUtilityUsingPolicy(mDPPolicy, STATE_TYPE, mDPUtilityFunction);
                if (!(pair.getSecond() > pair2.getSecond())) continue;
                mDPPolicy.setAction(STATE_TYPE, pair.getFirst());
                bl = false;
            }
        } while (!bl);
        return mDPPolicy;
    }

    public MDPUtilityFunction<STATE_TYPE> policyEvaluation(MDPPolicy<STATE_TYPE, ACTION_TYPE> mDPPolicy, MDPUtilityFunction<STATE_TYPE> mDPUtilityFunction, double d, int n) {
        MDPUtilityFunction<STATE_TYPE> mDPUtilityFunction2 = mDPUtilityFunction.copy();
        for (int i = 0; i < n; ++i) {
            mDPUtilityFunction2 = this.valueIterateOnceWith(d, mDPPolicy, mDPUtilityFunction2);
        }
        return mDPUtilityFunction2;
    }

    private MDPUtilityFunction<STATE_TYPE> valueIterateOnceWith(double d, MDPPolicy<STATE_TYPE, ACTION_TYPE> mDPPolicy, MDPUtilityFunction<STATE_TYPE> mDPUtilityFunction) {
        MDPUtilityFunction<STATE_TYPE> mDPUtilityFunction2 = mDPUtilityFunction.copy();
        for (STATE_TYPE STATE_TYPE : this.nonFinalstates) {
            Pair<ACTION_TYPE, Double> pair = this.transitionModel.getTransitionWithMaximumExpectedUtilityUsingPolicy(mDPPolicy, STATE_TYPE, mDPUtilityFunction);
            double d2 = this.rewardFunction.getRewardFor(STATE_TYPE) + d * pair.getSecond();
            mDPUtilityFunction2.setUtility(STATE_TYPE, d2);
        }
        return mDPUtilityFunction2;
    }

    public MDPPolicy<STATE_TYPE, ACTION_TYPE> randomPolicy() {
        MDPPolicy<STATE_TYPE, ACTION_TYPE> mDPPolicy = new MDPPolicy<STATE_TYPE, ACTION_TYPE>();
        for (STATE_TYPE STATE_TYPE : this.nonFinalstates) {
            mDPPolicy.setAction(STATE_TYPE, this.transitionModel.randomActionFor(STATE_TYPE));
        }
        return mDPPolicy;
    }

    public MDPUtilityFunction<STATE_TYPE> initialUtilityFunction() {
        return this.rewardFunction.asUtilityFunction();
    }

    public STATE_TYPE getInitialState() {
        return this.initialState;
    }

    public double getRewardFor(STATE_TYPE STATE_TYPE) {
        return this.rewardFunction.getRewardFor(STATE_TYPE);
    }

    public void setReward(STATE_TYPE STATE_TYPE, double d) {
        this.rewardFunction.setReward(STATE_TYPE, d);
    }

    public void setTransitionProbability(MDPTransition<STATE_TYPE, ACTION_TYPE> mDPTransition, double d) {
        this.transitionModel.setTransitionProbability(mDPTransition.getInitialState(), mDPTransition.getAction(), mDPTransition.getDestinationState(), d);
    }

    public double getTransitionProbability(MDPTransition<STATE_TYPE, ACTION_TYPE> mDPTransition) {
        return this.transitionModel.getTransitionProbability(mDPTransition.getInitialState(), mDPTransition.getAction(), mDPTransition.getDestinationState());
    }

    public MDPPerception<STATE_TYPE> execute(STATE_TYPE STATE_TYPE, ACTION_TYPE ACTION_TYPE, Randomizer randomizer) {
        return this.source.execute(STATE_TYPE, ACTION_TYPE, randomizer);
    }

    public boolean isTerminalState(STATE_TYPE STATE_TYPE) {
        return this.terminalStates.contains(STATE_TYPE);
    }

    public List<MDPTransition<STATE_TYPE, ACTION_TYPE>> getTransitionsWith(STATE_TYPE STATE_TYPE, ACTION_TYPE ACTION_TYPE) {
        return this.transitionModel.getTransitionsWithStartingStateAndAction(STATE_TYPE, ACTION_TYPE);
    }

    public List<ACTION_TYPE> getAllActions() {
        return this.source.getAllActions();
    }

    public String toString() {
        return "initial State = " + this.initialState.toString() + "\n rewardFunction = " + this.rewardFunction.toString() + "\n transitionModel = " + this.transitionModel.toString() + "\n states = " + this.nonFinalstates.toString();
    }
}

