class CameraSale:
    """sujet de DS 2015-2016"""

    
    def transition(self,s,a):
        """ trois etats possibles "propre", "sale" et "photo, deux actions "nettoyer","prendre photo" """
        """ retourne distribution sur etat [(etat,proba)] """

        net="nettoyer"
        prendre="prendre photo"
       
        if (s=="propre"):
            if (a==net):
                return [("propre",0.9),("sale",0.1)]
            if (a==prendre):
                return [("photo",0.6),("sale",0.4)]

        if (s=="sale"):
            if (a==net):
                return [("sale",0.),("propre",1.)]
            if (a==prendre):
                return [("photo",0.1),("sale",0.9)]

        if (s=="photo"):
            return [("photo",1.)]
          
       
       
    

    #les recompenses 
    def recompense(self,s,a,sarr):
        if (sarr=="photo"):
            return(0)
        return(-1)


    def etats(self):
        return ["propre","sale","photo"]
    
          
      

    #la liste des actions
    def actions(self):
        return ["nettoyer","prendre photo"]

              
    

class Systeme:
    """permet d'executer un probleme
    - pb : attribut du probleme"""

    def __init__(self,pb):
        """construit un systeme a partir d'un probleme"""
        self.pb=pb

    def execute(self,s,a):
        #print ("etat depart: ",s)
        #print ("action: ",a)
        sArriv = self.pb.transition(s,a)
        #print ("etat arrivee: ",sArriv)
        #print ("recompense: ",self.pb.recompense(s,a,sArriv))
        #print("********")
        return(sArriv)

    def intialiseQ(self):
        """construit des Qvaleurs vides"""
        Q = {}
        for etat in self.pb.etats():
            for action in self.pb.actions():
                Q[(etat,action)]=0
        return(Q)
    

    def planifie(self,nb):
        gamma=1
        Q=self.intialiseQ()
            
       
        #une iteration
        for i in range(0,nb):
            #print("iteration :",i)
            Q2={}
            
            #pour chaque etat,action
            for etat in self.pb.etats():
                for action in self.pb.actions():
                    #calculer arrivee
                    Q2[(etat,action)]=0
                    etatsArrivee=self.pb.transition(etat,action)
                    
                    #parcourir les etats possibles
                    for arrivee in etatsArrivee:
                        etatfin = arrivee[0]
                        proba = arrivee[1]
                        r = self.pb.recompense(etat,action,etatfin)
                        # cherche max arrivee
                        max=-100000
                        for actionMax in self.pb.actions():
                            if (Q[(etatfin,actionMax)]>max):
                                max=Q[(etatfin,actionMax)]
                        Q2[(etat,action)]+=proba*(r+gamma*max)
                        
            #on augmente iteration
            Q=Q2
            #print("* nb etats:",len(Q))
        return(Q)

    def executerPi(self,pi,depart,nb):
        s=depart
        for i in range(nb):
            action=pi[s]
            sFin=self.execute(s,action)
            r=self.pb.recompense(s,action,sFin)
            print(s," -> ",action," : ",sFin,"<",r,">")
            s=sFin
                    

    def afficherQ(self,Q):
        for etatAction in Q:
            etat=etatAction[0]
            action=etatAction[1]
            chaine = ""+str(etat)+" - "+str(action)+" -> "+str(Q[(etat,action)])+", "
            print(chaine)

    def afficherQS(self,Q):
        for etat in self.pb.etats():
            chaine = ""+str(etat)
            for action in self.pb.actions():
                chaine+="\n - "+action+" -> "+str(Q[(etat,action)])+", "
            print(chaine)
            

    def politiqueFromQ(self,Q):
        pi={}
        for etatAction in Q:
            etat=etatAction[0]
            # cherche max arrivee
            max=-100000
            amax=-1;
            for actionMax in self.pb.actions():
                if (Q[(etat,actionMax)]>max):
                    max=Q[(etat,actionMax)]
                    amax=actionMax
            pi[etat]=amax
        return(pi)

    def apprentissage(self,Sdep):
        print("")
        



pb=CameraSale()
#print(pb.etats())
#print(pb.actions())
print("****************************************")

systeme=Systeme(pb)

print("****************************************")
print("planification 1 ")
Q=systeme.planifie(1)
print(len(Q))
systeme.afficherQS(Q)

print("****************************************")
print("planification 2 ")
Q=systeme.planifie(2)
print(len(Q))
systeme.afficherQS(Q)

print("****************************************")
print("planification 50")
Q=systeme.planifie(50)
print(len(Q))
systeme.afficherQS(Q)

print("****************************************")
print("planification 1000")
Q=systeme.planifie(1000)
print(len(Q))
systeme.afficherQS(Q)


#systeme.afficherQS(Q,(3,2))
#systeme.afficherQS(Q,(3,3))


print("****************************************")
#pi=systeme.politiqueFromQ(Q)
#print(pi)
#systeme.executerPi(pi,sDep,30)