class Probleme:
    """"permet de efinir un probleme"""

    def actions(self):
        """returne la liste d'actions"""
        return ([])

    def etats(self):
        """returne la liste d'etats"""
        return ([])


    def transition(self,s,a):
        """definit les consequence d une action"""
        return(s)



    def recompense(self,s,a,sarr):
        """definit la recompense obtenue"""
        return(0)





class Laby(Probleme):
    """un probleme ou on se deplace
    - les actions connues sont N,S,E,O
    - l'etat est un couple (x,y)"""

    """pose des trous dans laby"""
    def __init__(self):
        self.obstacle=[]
        self.ajouterZoneObstacle(-5,-5,0,20)
        self.ajouterZoneObstacle(-5,-5,20,0)
        self.ajouterZoneObstacle(0,15,20,20)
        self.ajouterZoneObstacle(15,0,20,20)
        self.ajouterZoneObstacle(0,3,9,5)
        self.ajouterZoneObstacle(2,9,20,12)




    def ajouterZoneObstacle(self,dx,dy,fx,fy):
        """ajoute un obstacle dans la zone retcangle (dx,dx)->(fx,fy)"""
        for x in range(dx,fx+1):
            for y in range(dy,fy+1):
                self.obstacle+=[(x,y)]



    def printLaby(self):
        for j in range(-5,20):
            chaine=""
            for i in range(-5,20):
                etat=(i,j)
                if etat in self.obstacle:
                    chaine+="X"
                else:
                    chaine+="."
            print(chaine)



    def transition(self,s,a):
        """fait evoluer d une unite le systeme"""
        """un etat est le quadruplet ((xdep,ydep),(xprec,yprec))"""
        """action c'est (dx,dy) (delat et delty)"""

        #si on est dans un obstacle, etat = "Mort"
        if ((s=="mort") or (s[0] in self.obstacle)):
            #print("mort")
            return("mort")

        #sinon on fait calcul
        x=s[0][0]
        y=s[0][1]

        xPrec=s[1][0]
        yPrec=s[1][1]

        xApres=x+(x-xPrec)
        yApres=y+(y-yPrec)

        xApres=xApres+a[0]
        yApres=yApres+a[1]

        if ((xApres,yApres) in self.obstacle):
            return("mort")

        return(((xApres,yApres),(x,y)))


    #les recompenses
    def recompense(self,s,a,sarr):

        #si on tombe sur obstacle
        if (s!="mort")and(sarr=="mort"):
            return(-200)

        #si est deja sur un obstacle
        if (s=="mort")and(sarr=="mort"):
            return(0)


        posFin=sarr[0]
        #si l'etat d arrivee est (14,14) ==> ok
        if ((posFin[0]==14)and(posFin[1]==14)):
            return(100)

        #enfin, sinon, cout de 1
        return(-1)

    #la liste des actions
    def actions(self):
        return([(1,0),(1,1),(1,-1),(0,0),(0,1),(0,-1),(-1,0),(-1,-1),(-1,1)])




class Systeme:
    """permet d'executer un probleme
    - pb : attribut du probleme"""

    def __init__(self,pb):
        """construit un systeme a partir d'un probleme"""
        self.pb=pb

    def execute(self,s,a):
        #print ("etat depart: ",s)
        #print ("action: ",a)
        sArriv = self.pb.transition(s,a)
        #print ("etat arrivee: ",sArriv)
        #print ("recompense: ",self.pb.recompense(s,a,sArriv))
        #print("********")
        return(sArriv)

    def intialiseQ(self):
        """construit des Qvaleurs vides"""
        Q = {}


    def planifie(self,nb,s):
        gamma=0.999
        #Q=self.intialiseQ()
        Q={}
        for action in self.pb.actions():
            Q[(s,action)]=0

        #une iteration
        for i in range(0,nb):
            print("iteration :",i)
            Q2={}
            #pour chaque etat,action
            for etatAction in Q:
                    etat=etatAction[0]
                    action=etatAction[1]
                    #calculer arrivee
                    sArriv=self.pb.transition(etat,action)
                    r=self.pb.recompense(etat,action,sArriv)
                    # cherche max arrivee
                    max=-100000
                    for actionMax in self.pb.actions():
                        #si la clef n'existe pas, on cree
                        if (not((sArriv,actionMax) in Q)):
                            Q2[(sArriv,actionMax)]=0
                            if (max<0):
                                max=0
                        else :
                            if (Q[(sArriv,actionMax)]>max):
                                max=Q[(sArriv,actionMax)]
                    #mise a jour
                    Q2[(etat,action)]=r+gamma*max

            #on augmente iteration
            Q=Q2
            print("* nb etats:",len(Q))
        return(Q)

    def executerPi(self,pi,depart,nb):
        s=depart
        for i in range(nb):
            action=pi[s]
            sFin=self.execute(s,action)
            r=self.pb.recompense(s,action,sFin)
            print(s," -> ",action," : ",sFin,"<",r,">")
            s=sFin


    def afficherQ(self,Q):
        for etatAction in Q:
            etat=etatAction[0]
            action=etatAction[1]
            chaine = ""+str(etat)+" - "+str(action)+" -> "+str(Q[(etat,action)])+", "
            print(chaine)

    def afficherQS(self,Q,etat):
        chaine = ""+str(etat)+" - "
        for action in self.pb.actions():
            chaine+=action+" -> "+str(Q[(etat,action)])+", "
        print(chaine)


    def politiqueFromQ(self,Q):
        pi={}
        for etatAction in Q:
            etat=etatAction[0]
            # cherche max arrivee
            max=-100000
            amax=-1;
            for actionMax in self.pb.actions():
                if (Q[(etat,actionMax)]>max):
                    max=Q[(etat,actionMax)]
                    amax=actionMax
            pi[etat]=amax
        return(pi)

    def apprentissage(self,Sdep):
        print("")


import pygame

class RenduGraphique:

    def __init__(self,laby,etat,pi):
        self.laby=laby
        self.etat=etat
        self.pi=pi
        self.etatdep=etat
        self.taille=20


    def evoluer(self):
        print(self.etat)
        action=self.pi[self.etat]
        self.etat=self.laby.transition(self.etat,action)


    def dessiner(self,screen):
        print("ok")
        RED = (0xFF, 0x00, 0x00)
        WHITE = (0xFF, 0xFF, 0xFF)
        GREY = (0x50, 0x50, 0x50)
        YELLOW = (0xAD, 0xFF, 0x2F)
        GREEN= (0x00, 0xFF, 0x00)
        taille=self.taille

        #dessin trous
        for t in self.laby.obstacle:
            pygame.draw.ellipse(screen,RED,(t[0]*taille-5,t[1]*taille-5,10,10))


        #dessin etat
        if (self.etat!="mort"):
            dep=self.etat[0]
            arr=self.etat[1]

            pygame.draw.ellipse(screen,GREEN,(dep[0]*taille-5,dep[1]*taille-5,10,10))
            pygame.draw.ellipse(screen,GREEN,(arr[0]*taille-5,arr[1]*taille-5,10,10))
            pygame.draw.line(screen,GREEN,(dep[0]*taille,dep[1]*taille),(arr[0]*taille,arr[1]*taille),2)


    #on reinitialise l'etat quand on clique avec la souris
    def reinitEtat(self,x,y):
        s=(int(x/self.taille),int(y/self.taille))
        #si ce n'est pas un obstacle
        if not(s in self.laby.obstacle):
            self.etat=(s,s)

    #permet de lancer le jeu
    def lancer(self):
        pygame.init()
        size = (700, 500)
        screen = pygame.display.set_mode(size)
        pygame.display.set_caption("Moteur")
        done = False
        mouse=False
        # -------- boucle jeu
        clock = pygame.time.Clock()
        while not done:
            # --- gestion des evenements
            for event in pygame.event.get():
                # si on arrete
                if event.type == pygame.QUIT:
                    done = True

                # gere le click de souris
                # passe en mode modifie coefficients
                if event.type == pygame.MOUSEBUTTONDOWN  :
                    self.reinitEtat(pygame.mouse.get_pos()[0],pygame.mouse.get_pos()[1])


            # --- mise a jour du jeu
            self.evoluer()

            # --- mise a jour dessins
            self.dessiner(screen)


            # --- graphique
            pygame.display.flip()

            # --- attente
            clock.tick(10)
        pygame.quit()




pb=Laby()
#print(pb.etats())
#print(pb.actions())
print("****************************************")
pb.printLaby()
print("****************************************")

systeme=Systeme(pb)
sDep=((1,1),(1,1))
s=systeme.execute(sDep,(1,1))
s=systeme.execute(s,(1,1))

print("****************************************")
print("planification")
Q=systeme.planifie(50,sDep)
print(len(Q))
#systeme.afficherQ(Q)
#systeme.afficherQS(Q,(3,2))
#systeme.afficherQS(Q,(3,3))


print("****************************************")
pi=systeme.politiqueFromQ(Q)
#print(pi)
systeme.executerPi(pi,sDep,50)

print("****************************************")
rendu=RenduGraphique(pb,sDep,pi)
rendu.lancer()