class Lampe:
    
    def actions(self):
        return(['appuyer','rien'])

    def etats(self):
        return(['allume','eteint'])
            
        
    def transition(self,s,a):
        if (a=='rien'):
            return(s)
        if (a=='appuyer'):
            if (s=='allume'):
                return('eteint')
            if (s=='eteint'):
                return('allume')
        return('erreur')
            

    def recompense(self,s,a,sarr):
        if (s=='eteint') and (a=='appuyer'):
            return(10)
        return(0)

      
class SystemeExecute:

    def __init__(self,pb):
        self.pb=pb

    def executerPi(self,pi,depart,nb):
        s=depart
        for i in range(nb):
            action=pi[s]
            sFin=pb.transition(s,action)
            print(s," -> ",action," : ",sFin)
            s=sFin

    def executerPiRec(self,pi,depart,nb):
        s=depart
        somme=0
        for i in range(nb):
            action=pi[s]
            sFin=pb.transition(s,action)
            r=pb.recompense(s,action,sFin)
            somme=somme+r
            print(s," -> ",action," : ",sFin,"<",r,">")
            s=sFin
        return somme

            

#************************************************************
pb=Lampe()

pi={}
pi['allume']='rien'
pi['eteint']='appuyer'
print ("*** politique ***")
print(pi)

print("*** test execution ***")
systemExec = SystemeExecute(pb)
systemExec.executerPi(pi,'eteint',10)

print("*** test execution recompense ***")
systemExec = SystemeExecute(pb)
somme=systemExec.executerPiRec(pi,'eteint',10)
print("somme: ",somme)
