Theory Tarjan

theory Tarjan
imports Main
theory Tarjan
imports Main
begin

text ‹
  Tarjan's algorithm computes the strongly connected components of
  a finite graph using depth-first search. We formalize a functional
  version of the algorithm in Isabelle/HOL, following a development
  of Lévy et al. in Why3 that is available at
  \url{http://pauillac.inria.fr/~levy/why3/graph/abs/scct/1-68bis/scc.html}.
›

text ‹Make the simplifier expand let-constructions automatically›
declare Let_def[simp]

text ‹
  Definition of an auxiliary data structure holding local variables
  during the execution of Tarjan's algorithm.
›
record 'v env =
  black :: "'v set"
  gray  :: "'v set"
  stack :: "'v list"
  sccs  :: "'v set set"
  sn    :: nat
  num   :: "'v ⇒ int"

definition colored where
  "colored e ≡ black e ∪ gray e"

locale graph =
  fixes vertices :: "'v set"
    and successors :: "'v ⇒ 'v set"
  assumes vfin: "finite vertices"
    and sclosed: "∀x ∈ vertices. successors x ⊆ vertices"

context graph
begin

section {* Reachability in graphs *}

abbreviation edge where
  "edge x y ≡ y ∈ successors x"

definition xedge_to where
  ― ‹@{text ys} is a suffix of @{text xs}, @{text y} appears in @{text ys},
      and there is an edge from some node in the prefix of @{text xs} to @{text y}›
  "xedge_to xs ys y ≡
    y ∈ set ys
  ∧ (∃zs. xs = zs @ ys ∧ (∃z ∈ set zs. edge z y))"

inductive reachable where
  reachable_refl[iff]: "reachable x x"
| reachable_succ[elim]: "⟦edge x y; reachable y z⟧ ⟹ reachable x z"

lemma reachable_edge: "edge x y ⟹ reachable x y"
  by auto

lemma succ_reachable:
  assumes "reachable x y" and "edge y z"
  shows "reachable x z"
  using assms by induct auto

lemma reachable_trans:
  assumes y: "reachable x y" and z: "reachable y z"
  shows "reachable x z"
  using assms by induct auto

text {*
  Given some set @{text S} and two vertices @{text x} and @{text y}
  such that @{text y} is reachable from @{text x}, and @{text x} is
  an element of @{text S} but @{text y} is not, then there exists
  some vertices @{text x'} and @{text y'} linked by an edge such that
  @{text x'} is an element of @{text S}, @{text y'} is not,
  @{text x'} is reachable from @{text x}, and @{text y} is reachable  
  from @{text y'}.
*}
lemma reachable_crossing_set:
  assumes 1: "reachable x y" and 2: "x ∈ S" and 3: "y ∉ S"
  obtains x' y' where
    "x' ∈ S" "y' ∉ S" "edge x' y'" "reachable x x'" "reachable y' y"
proof -
  from assms
  have "∃x' y'. x' ∈ S ∧ y' ∉ S ∧ edge x' y' ∧ reachable x x' ∧ reachable y' y"
    by induct (blast intro: reachable_edge reachable_trans)+
  with that show ?thesis by blast
qed

section {* Strongly connected components *}

definition is_subscc where
  "is_subscc S ≡ ∀x ∈ S. ∀y ∈ S. reachable x y"

definition is_scc where
  "is_scc S ≡ S ≠ {} ∧ is_subscc S ∧ (∀S'. S ⊆ S' ∧ is_subscc S' ⟶ S' = S)"

lemma subscc_add:
  assumes "is_subscc S" and "x ∈ S"
      and "reachable x y" and "reachable y x"
  shows "is_subscc (insert y S)"
using assms unfolding is_subscc_def by (metis insert_iff reachable_trans)

lemma sccE:
  ― ‹Two vertices that are reachable from each other are in the same SCC.›
  assumes "is_scc S" and "x ∈ S"
      and "reachable x y" and "reachable y x"
  shows "y ∈ S"
using assms unfolding is_scc_def by (metis insertI1 subscc_add subset_insertI)

lemma scc_partition:
  ― ‹Two SCCs that contain a common element are identical.›
  assumes "is_scc S" and "is_scc S'" and "x ∈ S ∩ S'"
  shows "S = S'"
  using assms unfolding is_scc_def is_subscc_def
  by (metis IntE assms(2) sccE subsetI)


section {* Auxiliary functions *}

abbreviation infty ("∞") where
  ― ‹integer exceeding any one used as a vertex number during the algorithm›
  "∞ ≡ int (card vertices)"

definition set_infty where
  ― ‹set @{text "f x"} to @{text ∞} for all x in xs›
  "set_infty xs f = fold (λx g. g (x := ∞)) xs f"

lemma set_infty:
  "(set_infty xs f) x = (if x ∈ set xs then ∞ else f x)"
  unfolding set_infty_def by (induct xs arbitrary: f) auto

text {*
  Split a list at the first occurrence of a given element.
  Returns the two sublists of elements before (and including)
  the element and those strictly after the element. 
  If the element does not occur in the list, returns a pair 
  formed by the entire list and the empty list.
*}
fun split_list where
  "split_list x []  = ([], [])"
| "split_list x (y # xs) =
    (if x = y then ([x], xs) else 
       (let (l, r) = split_list x xs in
         (y # l, r)))" 

lemma split_list_concat:
  ― ‹Concatenating the two sublists produced by @{text "split_list"}
      yields back the original list.›
  assumes "x ∈ set xs"
  shows "(fst (split_list x xs)) @ (snd (split_list x xs)) = xs"
  using assms by (induct xs) (auto simp: split_def)

lemma fst_split_list:
  assumes "x ∈ set xs"
  shows "∃ys. fst (split_list x xs) = ys @ [x] ∧ x ∉ set ys"
  using assms by (induct xs) (auto simp: split_def)

text ‹
  Push a vertex on the stack and increment the sequence number.
  The pushed vertex is associated with the (old) sequence number.
  It is also added to the set of gray nodes.
›
definition add_stack_incr where
  "add_stack_incr x e =
      e ⦇ gray := insert x (gray e),
          stack := x # (stack e),
          sn := sn e +1,
          num := (num e) (x := int (sn e)) ⦈"

text ‹
  Add vertex @{text x} to the set of black vertices in @{text e}
  and remove it from the set of gray vertices.
›
definition add_black where
  "add_black x e = e ⦇ black := insert x (black e),
                       gray := (gray e) - {x} ⦈"


section ‹Main functions used for Tarjan's algorithms›

subsection ‹Function definitions›

text {*
  We define two mutually recursive functions that contain the essence
  of Tarjan's algorithm. Their arguments are respectively a single
  vertex and a set of vertices, as well as an environment that contains
  the local variables of the algorithm, and an auxiliary parameter
  representing the set of ``gray'' vertices, which is used only for
  the proof. The main function is then obtained by specializing the
  function operating on a set of vertices.
*}

function (domintros) dfs1 and dfs where
  "dfs1 x e  =
    (let (n1, e1) = dfs (successors x) (add_stack_incr x e) in
      if n1 < int (sn e) then (n1, add_black x e1)
      else
       (let (l,r) = split_list x (stack e1) in
         (∞, 
           ⦇ black = insert x (black e1),
             gray = gray e,
             stack = r,
             sccs = insert (set l) (sccs e1),
             sn = sn e1,
             num = set_infty l (num e1) ⦈ )))"
| "dfs roots e =
    (if roots = {} then (∞, e)
    else
      (let x = SOME x. x ∈ roots;
           res1 = (if num e x ≠ -1 then (num e x, e) else dfs1 x e);
           res2 = dfs (roots - {x}) (snd res1)
      in (min (fst res1) (fst res2), snd res2) ))"
  by pat_completeness auto

definition init_env where
  "init_env ≡ ⦇ black = {},            gray = {},
                stack = [],            sccs = {},
                sn = 0,                num = λ_. -1 ⦈"

definition tarjan where
  "tarjan ≡ sccs (snd (dfs vertices init_env))"


subsection ‹Well-definedness of the functions›

text ‹
  We did not prove termination when we defined the two mutually 
  recursive functions @{text dfs1} and @{text dfs} defined above,
  and indeed it is easy to see that they do not terminate for
  arbitrary arguments. Isabelle allows us to define ``partial''
  recursive functions, for which it introduces an auxiliary
  domain predicate that characterizes their domain of definition.
  We now make this more concrete and prove that the two functions
  terminate when called for nodes of the graph, also assuming an
  elementary well-definedness condition for environments. These
  conditions are met in the cases of interest, and in particular
  in the call to @{text dfs} in the main function @{text tarjan}.
  Intuitively, the reason is that every (possibly indirect)
  recursive call to @{text dfs} either decreases the set of roots
  or increases the set of nodes colored black or gray.
›

text ‹
  The set of nodes colored black never decreases in the course
  of the computation.
›
lemma black_increasing:
  "dfs1_dfs_dom (Inl (x,e)) ⟹ black e ⊆ black (snd (dfs1 x e))"
  "dfs1_dfs_dom (Inr (roots,e)) ⟹ black e ⊆ black (snd (dfs roots e))"
  by (induct rule: dfs1_dfs.pinduct,
      (fastforce simp: dfs1.psimps dfs.psimps case_prod_beta 
                   add_black_def add_stack_incr_def)+)

text ‹
  Similarly, the set of nodes colored black or gray 
  never decreases in the course of the computation.
›
lemma colored_increasing:
  "dfs1_dfs_dom (Inl (x,e)) ⟹
    colored e ⊆ colored (snd (dfs1 x e)) ∧
    colored (add_stack_incr x e)
    ⊆ colored (snd (dfs (successors x) (add_stack_incr x e)))"
  "dfs1_dfs_dom (Inr (roots,e)) ⟹
    colored e ⊆ colored (snd (dfs roots e))"
proof (induct rule: dfs1_dfs.pinduct)
  case (1 x e)
  from `dfs1_dfs_dom (Inl (x,e))`
  have "black e ⊆ black (snd (dfs1 x e))"
    by (rule black_increasing)
  with 1 show ?case
    by (auto simp: dfs1.psimps case_prod_beta add_stack_incr_def
                   add_black_def colored_def)
next
  case (2 roots e) then show ?case
    by (fastforce simp: dfs.psimps case_prod_beta)
qed

text ‹
  The functions @{text dfs1} and @{text dfs} never assign the
  number of a vertex to -1.
›
lemma dfs_num_defined:
  "⟦dfs1_dfs_dom (Inl (x,e)); num (snd (dfs1 x e)) v = -1⟧ ⟹
    num e v = -1"
  "⟦dfs1_dfs_dom (Inr (roots,e)); num (snd (dfs roots e)) v = -1⟧ ⟹
    num e v = -1"
  by (induct rule: dfs1_dfs.pinduct,
      (auto simp: dfs1.psimps dfs.psimps case_prod_beta add_stack_incr_def 
                  add_black_def set_infty
            split: if_split_asm))

text ‹
  We are only interested in environments that assign positive
  numbers to colored nodes, and we show that calls to @{text dfs1}
  and @{text dfs} preserve this property.
›
definition colored_num where
  "colored_num e ≡ ∀v ∈ colored e. v ∈ vertices ∧ num e v ≠ -1"

lemma colored_num:
  "⟦dfs1_dfs_dom (Inl (x,e)); x ∈ vertices; colored_num e⟧ ⟹
    colored_num (snd (dfs1 x e))"
  "⟦dfs1_dfs_dom (Inr (roots,e)); roots ⊆ vertices; colored_num e⟧ ⟹
    colored_num (snd (dfs roots e))"
proof (induct rule: dfs1_dfs.pinduct)
  case (1 x e) 
  let ?rec = "dfs (successors x) (add_stack_incr x e)"
  from sclosed `x ∈ vertices`
  have "successors x ⊆ vertices" ..
  moreover
  from `colored_num e` `x ∈ vertices`
  have "colored_num (add_stack_incr x e)" 
    by (auto simp: colored_num_def add_stack_incr_def colored_def)
  ultimately 
  have rec: "colored_num (snd ?rec)"
    using 1 by blast
  have x: "x ∈ colored (add_stack_incr x e)"
    by (simp add: add_stack_incr_def colored_def)
  from `dfs1_dfs_dom (Inl (x,e))` colored_increasing
  have colrec: "colored (add_stack_incr x e) ⊆ colored (snd ?rec)"
    by blast
  show ?case
  proof (cases "fst ?rec < int (sn e)")
    case True
    with rec x colrec `dfs1_dfs_dom (Inl (x,e))` show ?thesis
      by (auto simp: dfs1.psimps case_prod_beta 
                     colored_num_def add_black_def colored_def)
  next
    case False
    let ?e' = "snd (dfs1 x e)"
    have "colored e ⊆ colored (add_stack_incr x e)"
      by (auto simp: colored_def add_stack_incr_def)
    with False x colrec `dfs1_dfs_dom (Inl (x,e))`
    have "colored ?e' ⊆ colored (snd ?rec)" 
         "∃xs. num ?e' = set_infty xs (num (snd ?rec))"
      by (auto simp: dfs1.psimps case_prod_beta colored_def)
    with rec show ?thesis
      by (auto simp: colored_num_def set_infty split: if_split_asm)
  qed
next
  case (2 roots e)
  show ?case
  proof (cases "roots = {}")
    case True
    with `dfs1_dfs_dom (Inr (roots,e))` `colored_num e`
    show ?thesis by (auto simp: dfs.psimps)
  next
    case False
    let ?x = "SOME x. x ∈ roots"
    from False obtain r where "r ∈ roots" by blast
    hence "?x ∈ roots" by (rule someI)
    with `roots ⊆ vertices` have x: "?x ∈ vertices" ..
    let ?res1 = "if num e ?x ≠ -1 then (num e ?x, e) else dfs1 ?x e"
    let ?res2 = "dfs (roots - {?x}) (snd ?res1)"
    from 2 False `roots ⊆ vertices` x
    have "colored_num (snd ?res1)" by auto
    with 2 False `roots ⊆ vertices`
    have "colored_num (snd ?res2)"
      by blast
    moreover
    from False `dfs1_dfs_dom (Inr (roots,e))`
    have "dfs roots e = (min (fst ?res1) (fst ?res2), snd ?res2)"
      by (auto simp: dfs.psimps)
    ultimately show ?thesis by simp
  qed
qed

text ‹
  The following relation underlies the termination argument used
  for proving well-definedness of the functions @{text dfs1} and
  @{text dfs}. It is defined on the disjoint sum of the types of
  arguments of the two functions and relates the arguments of
  (mutually) recursive calls.
›
definition dfs1_dfs_term where
  "dfs1_dfs_term ≡
    { (Inl(x, e::'v env), Inr(roots,e)) | 
      x e roots .
      roots ⊆ vertices ∧ x ∈ roots ∧ colored e ⊆ vertices }
  ∪ { (Inr(roots, add_stack_incr x e), Inl(x, e)) |
      x e roots .
      colored e ⊆ vertices ∧ x ∈ vertices - colored e }
  ∪ { (Inr(roots, e::'v env), Inr(roots', e')) | 
      roots roots' e e' .
      roots' ⊆ vertices ∧ roots ⊂ roots' ∧ 
      colored e' ⊆ colored e ∧ colored e ⊆ vertices }"

text ‹
  In order to prove that the above relation is well-founded, we
  use the following function that embeds it into triples whose first
  component is the complement of the colored nodes, whose second
  component is the set of root nodes, and whose third component
  is 1 or 2 depending on the function being called. The third
  component corresponds to the first case in the definition of
  @{text dfs1_dfs_term}.
›
fun dfs1_dfs_to_tuple where
  "dfs1_dfs_to_tuple (Inl(x::'v, e::'v env)) = (vertices - colored e, {x}, 1::nat)"
| "dfs1_dfs_to_tuple (Inr(roots, e::'v env)) = (vertices - colored e, roots, 2)"

lemma wf_term: "wf dfs1_dfs_term"
proof -
  let ?r = "(finite_psubset :: ('v set × 'v set) set)
            <*lex*> (finite_psubset :: ('v set × 'v set) set)
            <*lex*> pred_nat"
  have "wf ?r"
    using wf_finite_psubset wf_pred_nat by blast
  moreover
  have "dfs1_dfs_term ⊆ inv_image ?r dfs1_dfs_to_tuple"
    unfolding dfs1_dfs_term_def pred_nat_def using vfin 
    by (auto dest: finite_subset simp: add_stack_incr_def colored_def)
  ultimately show ?thesis
    using wf_inv_image wf_subset by blast
qed

text ‹
  The following theorem establishes sufficient conditions under
  which the two functions @{text dfs1} and @{text dfs} terminate.
  The proof proceeds by well-founded induction using the relation
  @{text dfs1_dfs_term} and makes use of the theorem
  @{text dfs1_dfs.domintros} that was generated by Isabelle from
  the mutually recursive definitions in order to characterize the
  domain conditions for these functions.
›
theorem dfs1_dfs_termination:
  "⟦x ∈ vertices - colored e; colored_num e⟧ ⟹ dfs1_dfs_dom (Inl(x, e))"
  "⟦roots ⊆ vertices; colored_num e⟧ ⟹ dfs1_dfs_dom (Inr(roots, e))"
proof -
  { fix args
    have "(case args
          of Inl(x,e) ⇒ 
             x ∈ vertices - colored e ∧ colored_num e
          |  Inr(roots,e) ⇒ 
             roots ⊆ vertices ∧ colored_num e)
        ⟶ dfs1_dfs_dom args" (is "?P args ⟶ ?Q args")
    proof (rule wf_induct[OF wf_term])
      fix arg :: "('v × 'v env) + ('v set × 'v env)"
      assume ih: "∀arg'. (arg',arg) ∈ dfs1_dfs_term
                      ⟶ (?P arg' ⟶ ?Q arg')"
      show "?P arg ⟶ ?Q arg"
      proof
        assume P: "?P arg"
        show "?Q arg"
        proof (cases arg)
          case (Inl a)
          then obtain x e where a: "arg = Inl(x,e)"
            using dfs1.cases by metis
          have "?Q (Inl(x,e))"
          proof (rule dfs1_dfs.domintros)
            let ?recarg = "Inr (successors x, add_stack_incr x e)"
            from a P have "(?recarg, arg) ∈ dfs1_dfs_term"
              by (auto simp: add_stack_incr_def colored_num_def dfs1_dfs_term_def)
            moreover
            from a P sclosed have "?P ?recarg"
              by (auto simp: add_stack_incr_def colored_num_def colored_def)
            ultimately show "?Q ?recarg"
              using ih by auto
          qed
          with a show ?thesis by simp
        next
          case (Inr b)
          then obtain roots e where b: "arg = Inr(roots,e)"
            using dfs.cases by metis
          let ?sx = "SOME x. x ∈ roots"
          let ?rec1arg = "Inl (?sx, e)"
          let ?rec2arg = "Inr (roots - {?sx}, e)"
          let ?rec3arg = "Inr (roots - {?sx}, snd (dfs1 ?sx e))"
          have "?Q (Inr(roots,e))"
          proof (rule dfs1_dfs.domintros)
            fix x
            assume 1: "x ∈ roots"
               and 2: "num e ?sx = -1"
               and 3: "¬ dfs1_dfs_dom ?rec1arg"
            from 1 have sx: "?sx ∈ roots" by (rule someI)
            with P b have "(?rec1arg, arg) ∈ dfs1_dfs_term"
              by (auto simp: dfs1_dfs_term_def colored_num_def)
            moreover
            from sx 2 P b have "?P ?rec1arg"
              by (auto simp: colored_num_def)
            ultimately show False
              using ih 3 by auto
          next
            fix x
            assume "x ∈ roots"
            hence sx: "?sx ∈ roots" by (rule someI)
            from sx b P have "(?rec2arg, arg) ∈ dfs1_dfs_term"
              by (auto simp: dfs1_dfs_term_def colored_num_def)
            moreover
            from P b have "?P ?rec2arg" by auto
            ultimately show "dfs1_dfs_dom ?rec2arg"
              using ih by auto
          next
            fix x
            assume 1: "x ∈ roots" and 2: "num e ?sx = -1"
            from 1 have sx: "?sx ∈ roots" by (rule someI)
            have "dfs1_dfs_dom ?rec1arg"
            proof -
              from sx P b have "(?rec1arg, arg) ∈ dfs1_dfs_term"
                by (auto simp: dfs1_dfs_term_def colored_num_def)
              moreover
              from sx 2 P b have "?P ?rec1arg"
                by (auto simp: colored_num_def)
              ultimately show ?thesis
                using ih by auto
            qed
            with P b sx have "colored_num (snd (dfs1 ?sx e))"
              by (auto elim: colored_num)
            moreover
            from this sx b P `dfs1_dfs_dom ?rec1arg`
            have "(?rec3arg, arg) ∈ dfs1_dfs_term"
              by (auto simp: dfs1_dfs_term_def colored_num_def 
                       dest: colored_increasing)
            moreover
            from this P b `colored_num (snd (dfs1 ?sx e))`
            have "?P ?rec3arg" by auto
            ultimately show "dfs1_dfs_dom ?rec3arg"
              using ih by auto
          qed
          with b show ?thesis by simp
        qed
      qed
    qed
  }
  note dom = this
  from dom 
  show "⟦x ∈ vertices - colored e; colored_num e⟧ ⟹ dfs1_dfs_dom (Inl(x,e))"
    by auto
  from dom
  show "⟦roots ⊆ vertices; colored_num e⟧ ⟹ dfs1_dfs_dom (Inr(roots,e))"
    by auto
qed


section {* Auxiliary notions for the proof of partial correctness *}

text ‹
  The proof of partial correctness is more challenging and requires
  some further concepts that we now define.

  We need to reason about the relative order of elements in a list
  (specifically, the stack used in the algorithm).
›
definition precedes ("_ ≼ _ in _" [100,100,100] 39) where
  ― ‹@{text x} has an occurrence in @{text xs} that
      precedes an occurrence of @{text y}.›
  "x ≼ y in xs ≡ ∃l r. xs = l @ (x # r) ∧ y ∈ set (x # r)"

lemma precedes_mem:
  assumes "x ≼ y in xs"
  shows "x ∈ set xs" "y ∈ set xs"
  using assms unfolding precedes_def by auto

lemma head_precedes:
  assumes "y ∈ set (x # xs)"
  shows "x ≼ y in (x # xs)"
  using assms unfolding precedes_def by force

lemma precedes_in_tail:
  assumes "x ≠ z"
  shows "x ≼ y in (z # zs) ⟷ x ≼ y in zs"
  using assms unfolding precedes_def by (auto simp: Cons_eq_append_conv)

lemma tail_not_precedes:
  assumes "y ≼ x in (x # xs)" "x ∉ set xs"
  shows "x = y"
  using assms unfolding precedes_def
  by (metis Cons_eq_append_conv Un_iff list.inject set_append)

lemma split_list_precedes:
  assumes "y ∈ set (ys @ [x])"
  shows "y ≼ x in (ys @ x # xs)"
  using assms unfolding precedes_def
  by (metis append_Cons append_assoc in_set_conv_decomp
            rotate1.simps(2) set_ConsD set_rotate1)

lemma precedes_refl [simp]: "(x ≼ x in xs) = (x ∈ set xs)"
proof
  assume "x ≼ x in xs" thus "x ∈ set xs"
    by (simp add: precedes_mem)
next
  assume "x ∈ set xs"
  from this[THEN split_list] show "x ≼ x in xs"
    unfolding precedes_def by auto
qed

lemma precedes_append_left:
  assumes "x ≼ y in xs"
  shows "x ≼ y in (ys @ xs)"
  using assms unfolding precedes_def by (metis append.assoc)

lemma precedes_append_left_iff:
  assumes "x ∉ set ys"
  shows "x ≼ y in (ys @ xs) ⟷ x ≼ y in xs" (is "?lhs = ?rhs")
proof
  assume "?lhs"
  then obtain l r where lr: "ys @ xs = l @ (x # r)" "y ∈ set (x # r)"
    unfolding precedes_def by blast
  then obtain us where
    "(ys = l @ us ∧ us @ xs = x # r) ∨ (ys @ us = l ∧ xs = us @ (x # r))"
    by (auto simp: append_eq_append_conv2)
  thus ?rhs
  proof
    assume us: "ys = l @ us ∧ us @ xs = x # r"
    with assms have "us = []"
      by (metis Cons_eq_append_conv in_set_conv_decomp)
    with us lr show ?rhs
      unfolding precedes_def by auto
  next
    assume us: "ys @ us = l ∧ xs = us @ (x # r)"
    with ‹y ∈ set (x # r)› show ?rhs
      unfolding precedes_def by blast
  qed
next
  assume "?rhs" thus "?lhs" by (rule precedes_append_left)
qed

lemma precedes_append_right:
  assumes "x ≼ y in xs"
  shows "x ≼ y in (xs @ ys)"
  using assms unfolding precedes_def by force

lemma precedes_append_right_iff:
  assumes "y ∉ set ys"
  shows "x ≼ y in (xs @ ys) ⟷ x ≼ y in xs" (is "?lhs = ?rhs")
proof
  assume ?lhs
  then obtain l r where lr: "xs @ ys = l @ (x # r)" "y ∈ set (x # r)"
    unfolding precedes_def by blast
  then obtain us where
    "(xs = l @ us ∧ us @ ys = x # r) ∨ (xs @ us = l ∧ ys = us @ (x # r))"
    by (auto simp: append_eq_append_conv2)
  thus ?rhs
  proof
    assume us: "xs = l @ us ∧ us @ ys = x # r"
    with ‹y ∈ set (x # r)› assms show ?rhs
      unfolding precedes_def by (metis Cons_eq_append_conv Un_iff set_append)
  next
    assume us: "xs @ us = l ∧ ys = us @ (x # r)"
    with ‹y ∈ set (x # r)› assms 
    show ?rhs by auto ― ‹contradiction›
  qed
next
  assume ?rhs thus ?lhs by (rule precedes_append_right)
qed

text ‹
  Precedence determines an order on the elements of a list,
  provided elements have unique occurrences. However, consider
  a list such as @{term "[2,3,1,2]"}: then $1$ precedes $2$ and
  $2$ precedes $3$, but $1$ does not precede $3$.
›
lemma precedes_trans:
  assumes "x ≼ y in xs" and "y ≼ z in xs" and "distinct xs"
  shows "x ≼ z in xs"
  using assms unfolding precedes_def
  by (smt Un_iff append.assoc append_Cons_eq_iff distinct_append 
          not_distinct_conv_prefix set_append split_list_last)

lemma precedes_antisym:
  assumes "x ≼ y in xs" and "y ≼ x in xs" and "distinct xs"
  shows "x = y"
proof -
  from ‹x ≼ y in xs› ‹distinct xs› obtain as bs where
    1: "xs = as @ (x # bs)" "y ∈ set (x # bs)" "y ∉ set as"
    unfolding precedes_def by force
  from ‹y ≼ x in xs› ‹distinct xs› obtain cs ds where
    2: "xs = cs @ (y # ds)" "x ∈ set (y # ds)" "x ∉ set cs"
    unfolding precedes_def by force
  from 1 2 have "as @ (x # bs) = cs @ (y # ds)"
    by simp
  then obtain zs where
    "(as = cs @ zs ∧ zs @ (x # bs) = y # ds) 
     ∨ (as @ zs = cs ∧ x # bs = zs @ (y # ds))"  (is "?P ∨ ?Q")
    by (auto simp: append_eq_append_conv2)
  then show ?thesis
  proof
    assume "?P" with ‹y ∉ set as› show ?thesis 
      by (cases "zs") auto
  next
    assume "?Q" with ‹x ∉ set cs› show ?thesis
      by (cases "zs") auto
  qed
qed


section {* Predicates and lemmas about environments *}

definition subenv where
  "subenv e e' ≡
    (∃s. stack e' = s @ (stack e) ∧ set s ⊆ black e')
  ∧ black e ⊆ black e' ∧ gray e = gray e'
  ∧ sccs e ⊆ sccs e'
  ∧ (∀x ∈ set (stack e). num e x = num e' x)"

lemma subenv_refl [simp]: "subenv e e"
  by (auto simp: subenv_def)

lemma subenv_trans:
  assumes "subenv e e'" and "subenv e' e''"
  shows "subenv e e''"
  using assms unfolding subenv_def by force

definition wf_color where
  ― ‹conditions about colors, part of the invariant of the algorithm›
  "wf_color e ≡
    colored e ⊆ vertices
  ∧ black e ∩ gray e = {}
  ∧ (⋃ sccs e) ⊆ black e
  ∧ set (stack e) = gray e ∪ (black e - ⋃ sccs e)"

definition wf_num where
  ― ‹conditions about vertex numbers›
  "wf_num e ≡
    int (sn e) ≤ ∞
  ∧ (∀x. -1 ≤ num e x ∧ (num e x = ∞ ∨ num e x < int (sn e)))
  ∧ sn e = card (colored e)
  ∧ (∀x. num e x = ∞ ⟷ x ∈ ⋃ sccs e)
  ∧ (∀x. num e x = -1 ⟷ x ∉ colored e)
  ∧ (∀x ∈ set (stack e). ∀y ∈ set (stack e).
        (num e x ≤ num e y ⟷ y ≼ x in (stack e)))"

lemma subenv_num:
  ― ‹If @{text e} and @{text e'} are two well-formed environments,
      and @{text e} is a sub-environment of @{text e'} then the number
      assigned by @{text e'} to any vertex is at least that assigned
      by @{text e}.›
  assumes sub: "subenv e e'"
      and e: "wf_color e" "wf_num e"
      and e': "wf_color e'" "wf_num e'"
  shows "num e x ≤ num e' x"
(*
  using assms unfolding wf_color_def wf_num_def subenv_def colored_def
  by (smt DiffI UnCI UnE mem_simps(9) subsetCE)
*)
proof (cases "x ∈ colored e")
  case True then show ?thesis unfolding colored_def
  proof
    assume "x ∈ gray e"
    with e sub show ?thesis 
      by (auto simp: wf_color_def subenv_def)
  next
    assume "x ∈ black e"
    show ?thesis
    proof (cases "x ∈ ⋃ sccs e")
      case True
      with sub e e' have "num e x = ∞" "num e' x = ∞"
        by (auto simp: subenv_def wf_num_def)
      thus ?thesis by simp
    next
      case False
      with ‹x ∈ black e› e sub show ?thesis
        by (auto simp: wf_color_def subenv_def)
    qed
  qed
next
  case False with e e' show ?thesis
    unfolding wf_num_def by metis
qed

definition no_black_to_white where
  ― ‹successors of black vertices cannot be white›
  "no_black_to_white e ≡ ∀x y. edge x y ∧ x ∈ black e ⟶ y ∈ colored e"

definition wf_env where
  "wf_env e ≡
    wf_color e ∧ wf_num e
  ∧ no_black_to_white e ∧ distinct (stack e)
  ∧ (∀x y. y ≼ x in (stack e) ⟶ reachable x y)
  ∧ (∀y ∈ set (stack e). ∃g ∈ gray e. y ≼ g in (stack e) ∧ reachable y g)
  ∧ sccs e = { C . C ⊆ black e ∧ is_scc C }"

lemma num_in_stack:
  assumes "wf_env e" and "x ∈ set (stack e)"
  shows "num e x ≠ -1"
        "num e x < int (sn e)"
proof -
  from assms 
  show "num e x ≠ -1"
    by (auto simp: wf_env_def wf_color_def wf_num_def colored_def)
  from `wf_env e`
  have "num e x < int (sn e) ∨ x ∈ ⋃ sccs e"
    unfolding wf_env_def wf_num_def by metis
  with assms show "num e x < int (sn e)"
    unfolding wf_env_def wf_color_def by blast
qed

text ‹
  Numbers assigned to different stack elements are distinct.
›
lemma num_inj:
  assumes "wf_env e" and "x ∈ set (stack e)" 
      and "y ∈ set (stack e)" and "num e x = num e y"
    shows "x = y"
  using assms unfolding wf_env_def wf_num_def
  by (metis precedes_refl precedes_antisym) 

text ‹
  The set of black elements at the top of the stack together
  with the first gray element always form a sub-SCC. This lemma
  is useful for the ``else'' branch of @{text dfs1}.
›
lemma first_gray_yields_subscc:
  assumes e: "wf_env e"
    and x: "stack e = ys @ (x # zs)"
    and g: "x ∈ gray e"
    and ys: "set ys ⊆ black e"
  shows "is_subscc (insert x (set ys))"
proof -
  from e x have "∀y ∈ set ys. ∃g ∈ gray e. reachable y g"
    unfolding wf_env_def by force
  moreover
  have "∀g ∈ gray e. reachable g x"
  proof
    fix g
    assume "g ∈ gray e"
    with e x ys have "g ∈ set (x # zs)"
      unfolding wf_env_def wf_color_def by auto
    with e x show "reachable g x"
      unfolding wf_env_def precedes_def by blast
  qed
  moreover
  from e x g have "∀y ∈ set ys. reachable x y"
    unfolding wf_env_def by (simp add: split_list_precedes)
  ultimately show ?thesis
    unfolding is_subscc_def 
    by (metis reachable_trans reachable_refl insertE)
qed

section ‹Partial correctness of the main functions›

text ‹
  We now define the pre- and post-conditions for proving that
  the functions @{text dfs1} and @{text dfs} are partially correct.
  The parameters of the preconditions, as well as the first parameters
  of the postconditions, coincide with the parameters of the
  functions @{text dfs1} and @{text dfs}. The final parameter of
  the postconditions represents the result computed by the function.
›

definition dfs1_pre where
  "dfs1_pre x e ≡
    x ∈ vertices
  ∧ x ∉ colored e
  ∧ (∀g ∈ gray e. reachable g x)
  ∧ wf_env e"

definition dfs1_post where
  "dfs1_post x e res ≡
    let n = fst res; e' = snd res
    in  wf_env e'
      ∧ subenv e e'
      ∧ x ∈ black e'
      ∧ n ≤ num e' x
      ∧ (n = ∞ ∨ (∃y ∈ set (stack e'). num e' y = n ∧ reachable x y))
      ∧ (∀y. xedge_to (stack e') (stack e) y ⟶ n ≤ num e' y)"

definition dfs_pre where
  "dfs_pre roots e ≡
    roots ⊆ vertices
  ∧ (∀x ∈ roots. ∀g ∈ gray e. reachable g x)
  ∧ wf_env e"

definition dfs_post where
  "dfs_post roots e res ≡
    let n = fst res; e' = snd res
    in  wf_env e'
      ∧ subenv e e'
      ∧ roots ⊆ colored e'
      ∧ (∀x ∈ roots. n ≤ num e' x)
      ∧ (n = ∞ ∨ (∃x ∈ roots. ∃y ∈ set (stack e'). num e' y = n ∧ reachable x y))
      ∧ (∀y. xedge_to (stack e') (stack e) y ⟶ n ≤ num e' y)"

text ‹
  The following lemmas express some useful consequences of the
  pre- and post-conditions. In particular, the preconditions
  ensure that the function calls terminate.
›

lemma dfs1_pre_domain:
  assumes "dfs1_pre x e"
  shows "colored e ⊆ vertices" 
        "x ∈ vertices - colored e"
        "x ∉ set (stack e)"
        "int (sn e) < ∞"
  using assms vfin
  unfolding dfs1_pre_def wf_env_def wf_color_def wf_num_def colored_def
  by (auto intro: psubset_card_mono)

lemma dfs1_pre_dfs1_dom:
  "dfs1_pre x e ⟹ dfs1_dfs_dom (Inl(x,e))"
  unfolding dfs1_pre_def wf_env_def wf_color_def wf_num_def
  by (auto simp: colored_num_def intro!: dfs1_dfs_termination)

lemma dfs_pre_dfs_dom:
  "dfs_pre roots e ⟹ dfs1_dfs_dom (Inr(roots,e))"
  unfolding dfs_pre_def wf_env_def wf_color_def wf_num_def
  by (auto simp: colored_num_def intro!: dfs1_dfs_termination)

(** not needed, but potentially useful
lemma dfs1_post_num:
  "dfs1_post x e res ⟹ fst res ≤ ∞"
  unfolding dfs1_post_def wf_env_def wf_num_def
  by smt

lemma dfs_post_num:
  "dfs_post roots e res ⟹ fst res ≤ ∞"
  unfolding dfs_post_def wf_env_def wf_num_def
  by smt
**)

lemma dfs_post_stack:
  assumes "dfs_post roots e res"
  obtains s where 
    "stack (snd res) = s @ stack e" 
    "set s ⊆ black (snd res)"
    "∀x ∈ set (stack e). num (snd res) x = num e x"
  using assms unfolding dfs_post_def subenv_def by auto

lemma dfs_post_split:
  fixes x e res
  defines "n' ≡ fst res"
  defines "e' ≡ snd res"
  defines "l ≡ fst (split_list x (stack e'))"
  defines "r ≡ snd (split_list x (stack e'))"
  assumes post: "dfs_post (successors x) (add_stack_incr x e) res"
             (is "dfs_post ?roots ?e res")
  obtains ys where
    "l = ys @ [x]"
    "x ∉ set ys"
    "set ys ⊆ black e'"
    "stack e' = l @ r"
    "is_subscc (set l)"
    "r = stack e"
proof -
  from post have dist: "distinct (stack e')"
    unfolding dfs_post_def wf_env_def e'_def by auto
  from post obtain s where
    s: "stack e' = s @ (x # stack e)" "set s ⊆ black e'"
    unfolding add_stack_incr_def e'_def
    by (auto intro: dfs_post_stack)
  then obtain ys where ys: "l = ys @ [x]" "x ∉ set ys" "stack e' = l @ r"
    unfolding add_stack_incr_def l_def r_def
    by (metis in_set_conv_decomp split_list_concat fst_split_list)
  with s have l: "l = (s @ [x]) ∧ r = stack e"
    by (metis dist append.assoc append.simps(1) append.simps(2) 
              append_Cons_eq_iff distinct.simps(2) distinct_append)
  from post have "wf_env e'" "x ∈ gray e'"
    by (auto simp: dfs_post_def subenv_def add_stack_incr_def e'_def)
  with s l have "is_subscc (set l)"
    by (auto simp: add_stack_incr_def intro: first_gray_yields_subscc)
  with s ys l that show ?thesis by auto
qed

text {*
  A crucial lemma establishing a condition after the ``then'' branch
  following the recursive call in function @{text dfs1}.
*}
lemma dfs_post_reach_gray:
  fixes x e res
  defines "n' ≡ fst res"
  defines "e' ≡ snd res"
  assumes e: "wf_env e"
      and post: "dfs_post (successors x) (add_stack_incr x e) res"
             (is "dfs_post ?roots ?e res")
      and n': "n' < int (sn e)"
  obtains g where
    "g ≠ x" "g ∈ gray e'" "x ≼ g in (stack e')" 
    "reachable x g" "reachable g x"
proof -
  from post have e': "wf_env e'" "subenv ?e e'"
    by (auto simp: dfs_post_def e'_def)
  hence x_e': "x ∈ set (stack e')" "x ∈ vertices" "num e' x = int(sn e)"
    by (auto simp: add_stack_incr_def subenv_def wf_env_def wf_color_def colored_def)
  from e n' have "n' ≠ ∞"
    unfolding wf_env_def wf_num_def by simp
  with post e' obtain sx y g where
    g: "sx ∈ ?roots" "y ∈ set (stack e')" "num e' y = n'" "reachable sx y"
       "g ∈ gray e'" "g ∈ set (stack e')" "y ≼ g in (stack e')" "reachable y g"
    unfolding dfs_post_def e'_def n'_def wf_env_def
    by (fastforce intro: precedes_mem )
  with e' have "num e' g ≤ num e' y"
    unfolding wf_env_def wf_num_def by metis
  with n' x_e' ‹num e' y = n'› 
  have "num e' g ≤ num e' x" "g ≠ x" by auto
  with ‹g ∈ set (stack e')› ‹x ∈ set (stack e')› e'
  have "g ≠ x ∧ x ≼ g in (stack e') ∧ reachable g x"
    unfolding wf_env_def wf_num_def by auto
  moreover 
  from g have "reachable x g"
    by (metis reachable_succ reachable_trans)
  moreover
  note ‹g ∈ gray e'› that
  ultimately show ?thesis by auto
qed

text {*
  The following lemmas represent steps in the proof of partial correctness.
*}

lemma dfs1_pre_dfs_pre:
  ― ‹The precondition of @{text dfs1} establishes that of the recursive
      call to @{text dfs}.›
  assumes "dfs1_pre x e"
  shows "dfs_pre (successors x) (add_stack_incr x e)"
        (is "dfs_pre ?roots' ?e'")
proof -
  from assms sclosed have "?roots' ⊆ vertices"
    unfolding dfs1_pre_def by blast
  moreover
  from assms have "∀y ∈ ?roots'. ∀g ∈ gray ?e'. reachable g y"
    unfolding dfs1_pre_def add_stack_incr_def
    by (auto dest: succ_reachable reachable_trans)
  moreover
  {
    from assms have wf_col': "wf_color ?e'"
      by (auto simp: dfs1_pre_def wf_env_def wf_color_def 
                     add_stack_incr_def colored_def)
    note 1 = dfs1_pre_domain[OF assms]
    from assms 1 have dist': "distinct (stack ?e')"
      unfolding dfs1_pre_def wf_env_def add_stack_incr_def by auto
    from assms have 3: "sn e = card (colored e)"
      unfolding dfs1_pre_def wf_env_def wf_num_def by simp
    from 1 have 4: "int (sn ?e') ≤ ∞"
      unfolding add_stack_incr_def by simp
    with assms have 5: "∀x. -1 ≤ num ?e' x ∧ (num ?e' x = ∞ ∨ num ?e' x < int (sn ?e'))"
      unfolding dfs1_pre_def wf_env_def wf_num_def add_stack_incr_def by auto
    from 1 vfin have "finite (colored e)" using finite_subset by blast
    with 1 3 have 6: "sn ?e' = card (colored ?e')"
      unfolding add_stack_incr_def colored_def by auto
    from assms 1 3 have 7: "∀y. num ?e' y = ∞ ⟷ y ∈ ⋃ sccs ?e'"
      by (auto simp: dfs1_pre_def wf_env_def wf_num_def
                     add_stack_incr_def colored_def)
    from assms 3 have 8: "∀y. num ?e' y = -1 ⟷ y ∉ colored ?e'"
      by (auto simp: dfs1_pre_def wf_env_def wf_num_def add_stack_incr_def colored_def)
    from assms 1 have "∀y ∈ set (stack e). num ?e' y < num ?e' x"
      unfolding dfs1_pre_def add_stack_incr_def
      by (auto dest: num_in_stack)
    moreover
    have "∀y ∈ set (stack e). x ≼ y in (stack ?e')"
      unfolding add_stack_incr_def by (auto intro: head_precedes)
    moreover
    from 1 have "∀y ∈ set (stack e). ¬(y ≼ x in (stack ?e'))"
      unfolding add_stack_incr_def by (auto dest: tail_not_precedes)
    moreover
    {
      fix y z
      assume "y ∈ set (stack e)" "z ∈ set (stack e)"
      with 1 have "x ≠ y" by auto
      hence "y ≼ z in (stack ?e') ⟷ y ≼ z in (stack e)"
        by (simp add: add_stack_incr_def precedes_in_tail)
    }
    ultimately
    have 9: "∀y ∈ set (stack ?e'). ∀z ∈ set (stack ?e').
                num ?e' y ≤ num ?e' z ⟷ z ≼ y in (stack ?e')"
      using assms
      unfolding dfs1_pre_def wf_env_def wf_num_def add_stack_incr_def
      by auto
    from 4 5 6 7 8 9 have wf_num': "wf_num ?e'"
      unfolding wf_num_def by blast
    from assms have nbtw': "no_black_to_white ?e'"
      by (auto simp: dfs1_pre_def wf_env_def no_black_to_white_def 
                     add_stack_incr_def colored_def)

    have stg': "∀y ∈ set (stack ?e'). ∃g ∈ gray ?e'.
                   y ≼ g in (stack ?e') ∧ reachable y g"
    proof
      fix y
      assume y: "y ∈ set (stack ?e')"
      show "∃g ∈ gray ?e'. y ≼ g in (stack ?e') ∧ reachable y g"
      proof (cases "y = x")
        case True
        then show ?thesis
          unfolding add_stack_incr_def by auto
      next
        case False
        with y have "y ∈ set (stack e)"
          by (simp add: add_stack_incr_def)
        with assms obtain g where 
          "g ∈ gray e ∧ y ≼ g in (stack e) ∧ reachable y g"
          unfolding dfs1_pre_def wf_env_def by blast
        thus ?thesis
          unfolding add_stack_incr_def
          by (auto dest: precedes_append_left[where ys="[x]"])
      qed
    qed

    have str': "∀y z. y ≼ z in (stack ?e') ⟶ reachable z y"
    proof (clarify)
      fix y z
      assume yz: "y ≼ z in stack ?e'"
      show "reachable z y"
      proof (cases "y = x")
        case True
        from yz[THEN precedes_mem(2)] stg'
        obtain g where "g ∈ gray ?e'" "reachable z g" by blast
        with True assms show ?thesis
          unfolding dfs1_pre_def add_stack_incr_def
          by (auto elim: reachable_trans)
      next
        case False
        with yz have yze: "y ≼ z in stack e"
          by (simp add: add_stack_incr_def precedes_in_tail)
        with assms show ?thesis
          unfolding dfs1_pre_def wf_env_def by blast
      qed
    qed
    from assms have "sccs (add_stack_incr x e) = 
           {C . C ⊆ black (add_stack_incr x e) ∧ is_scc C}"
      by (auto simp: dfs1_pre_def wf_env_def add_stack_incr_def)
    with wf_col' wf_num' nbtw' dist' str' stg'
    have "wf_env ?e'"
      unfolding wf_env_def by blast
  }
  ultimately show ?thesis
    unfolding dfs_pre_def by blast
qed

lemma dfs_pre_dfs1_pre:
  ― ‹The precondition of @{text dfs} establishes that of the recursive
      call to @{text dfs1}, for any @{text "x ∈ roots"} such that
      @{text "num e x = -1"}.›
  assumes "dfs_pre roots e" and "x ∈ roots" and "num e x = -1"
  shows "dfs1_pre x e"
  using assms unfolding dfs_pre_def dfs1_pre_def wf_env_def wf_num_def by auto

text ‹
  Prove the post-condition of @{text dfs1} for the ``then'' branch
  in the definition of @{text dfs1}, assuming that the recursive call
  to @{text dfs} establishes its post-condition.
›
lemma dfs_post_dfs1_post_case1:
  fixes x e
  defines "res1 ≡ dfs (successors x) (add_stack_incr x e)"
  defines "n1 ≡ fst res1"
  defines "e1 ≡ snd res1"
  defines "res ≡ dfs1 x e"
  assumes pre: "dfs1_pre x e"
      and post: "dfs_post (successors x) (add_stack_incr x e) res1"
      and lt: "fst res1 < int (sn e)"
  shows "dfs1_post x e res"
proof -
  let ?e' = "add_black x e1"
  from pre have dom: "dfs1_dfs_dom (Inl (x, e))"
    by (rule dfs1_pre_dfs1_dom)
  from lt dom have dfs1: "res = (n1, ?e')"
    by (simp add: res1_def n1_def e1_def res_def case_prod_beta dfs1.psimps)
  from post have wf_env1: "wf_env e1"
    unfolding dfs_post_def e1_def by auto
  from post obtain s where s: "stack e1 = s @ stack (add_stack_incr x e)"
    unfolding e1_def by (blast intro: dfs_post_stack)
  from post have x_e1: "x ∈ set (stack e1)"
    by (auto intro: dfs_post_stack simp: e1_def add_stack_incr_def)
  from post have se1: "subenv (add_stack_incr x e) e1"
    unfolding dfs_post_def by (simp add: e1_def split_def)
  from pre lt post obtain g where
    g: "g ≠ x" "g ∈ gray e1" "x ≼ g in (stack e1)" 
       "reachable x g" "reachable g x"
    unfolding e1_def using dfs_post_reach_gray dfs1_pre_def by blast

  have wf_env': "wf_env ?e'"
  proof -
    from wf_env1 dfs1_pre_domain[OF pre] x_e1 have "wf_color ?e'"
      by (auto simp: dfs_pre_def wf_env_def wf_color_def add_black_def colored_def)
    moreover
    from se1
    have "x ∈ gray e1" "colored ?e' = colored e1"
      by (auto simp: subenv_def add_stack_incr_def add_black_def colored_def)
    with wf_env1 have "wf_num ?e'"
      unfolding dfs_pre_def wf_env_def wf_num_def add_black_def by auto
    moreover
    from post wf_env1 have "no_black_to_white ?e'"
      unfolding dfs_post_def wf_env_def no_black_to_white_def 
                add_black_def e1_def subenv_def colored_def
      by auto
    moreover
    {
      fix y
      assume "y ∈ set (stack ?e')"
      hence y: "y ∈ set (stack e1)" by (simp add: add_black_def)
      with wf_env1 obtain z where 
        z: "z ∈ gray e1"
           "y ≼ z in stack e1"
           "reachable y z"
        unfolding wf_env_def by blast
      have "∃g ∈ gray ?e'.
             y ≼ g in (stack ?e') ∧ reachable y g"
      proof (cases "z ∈ gray ?e'")
        case True with z show ?thesis by (auto simp: add_black_def)
      next
        case False
        with z have "z = x" by (simp add: add_black_def)
        with g z wf_env1 show ?thesis
          unfolding wf_env_def add_black_def
          by (auto elim: reachable_trans precedes_trans)
      qed
    }
    moreover
    have "sccs ?e' = {C . C ⊆ black ?e' ∧ is_scc C}"
    proof -
      {
        fix C
        assume "C ∈ sccs ?e'"
        with post have "is_scc C ∧ C ⊆ black ?e'"
          unfolding dfs_post_def wf_env_def add_black_def e1_def by auto
      }
      moreover
      {
        fix C
        assume C: "is_scc C" "C ⊆ black ?e'"
        have "x ∉ C"
        proof
          assume xC: "x ∈ C"
          with ‹is_scc C› g have "g ∈ C"
            unfolding is_scc_def by (auto dest: subscc_add)
          with wf_env1 g ‹C ⊆ black ?e'›  show "False"
            unfolding wf_env_def wf_color_def add_black_def by auto
        qed
        with post C have "C ∈ sccs ?e'"
          unfolding dfs_post_def wf_env_def add_black_def e1_def by auto
      }
      ultimately show ?thesis by blast
    qed

    ultimately show ?thesis  ― ‹the remaining conjuncts carry over trivially›
      using wf_env1 unfolding wf_env_def add_black_def by auto
  qed

  from pre have "x ∉ set (stack e)" "x ∉ gray e"
    unfolding dfs1_pre_def wf_env_def wf_color_def colored_def by auto
  with se1 have subenv': "subenv e ?e'"
    unfolding subenv_def add_stack_incr_def add_black_def
    by (auto split: if_split_asm)

  have xblack': "x ∈ black ?e'"
    unfolding add_black_def by simp

  from lt have "n1 < num (add_stack_incr x e) x"
    unfolding add_stack_incr_def n1_def by simp
  also have "… = num e1 x"
    using se1 unfolding subenv_def add_stack_incr_def by auto
  finally have xnum': "n1 ≤ num ?e' x"
    unfolding add_black_def by simp

  from lt pre have "n1 ≠ ∞"
    unfolding dfs1_pre_def wf_env_def wf_num_def n1_def by simp
  with post obtain sx y where
    "sx ∈ successors x" "y ∈ set (stack ?e')" "num ?e' y = n1" "reachable sx y"
    unfolding dfs_post_def add_black_def n1_def e1_def by auto
  with dfs1_pre_domain[OF pre] 
  have n1': "∃y ∈ set (stack ?e'). num ?e' y = n1 ∧ reachable x y"
    by (auto intro: reachable_trans)

  {
    fix y
    assume "xedge_to (stack ?e') (stack e) y"
    then obtain zs z where
      y: "stack ?e' = zs @ (stack e)" "z ∈ set zs" "y ∈ set (stack e)" "edge z y"
      unfolding xedge_to_def by auto
    have "n1 ≤ num ?e' y"
    proof (cases "z=x")
      case True
      with ‹edge z y› post show ?thesis
        unfolding dfs_post_def add_black_def n1_def e1_def by auto
    next
      case False
      with s y have "xedge_to (stack e1) (stack (add_stack_incr x e)) y"
        unfolding xedge_to_def add_black_def add_stack_incr_def by auto
      with post show ?thesis
        unfolding dfs_post_def add_black_def n1_def e1_def by auto
    qed
  }

  with dfs1 wf_env' subenv' xblack' xnum' n1'
  show ?thesis unfolding dfs1_post_def by simp
qed

text ‹
  Prove the post-condition of @{text dfs1} for the ``else'' branch
  in the definition of @{text dfs1}, assuming that the recursive call
  to @{text dfs} establishes its post-condition.
›
lemma dfs_post_dfs1_post_case2:
  fixes x e
  defines "res1 ≡ dfs (successors x) (add_stack_incr x e)"
  defines "n1 ≡ fst res1"
  defines "e1 ≡ snd res1"
  defines "res ≡ dfs1 x e"
  assumes pre: "dfs1_pre x e"
      and post: "dfs_post (successors x) (add_stack_incr x e) res1"
      and nlt: "¬(n1 < int (sn e))"
  shows "dfs1_post x e res"
proof -
  let ?split = "split_list x (stack e1)"
  let ?e' = "⦇ black = insert x (black e1),
               gray = gray e,
               stack = snd ?split,
               sccs = insert (set (fst ?split)) (sccs e1),
               sn = sn e1,
               num = set_infty (fst ?split) (num e1) ⦈"
  from pre have dom: "dfs1_dfs_dom (Inl (x, e))"
    by (rule dfs1_pre_dfs1_dom)
  from dom nlt have res: "res = (∞, ?e')"
    by (simp add: res1_def n1_def e1_def res_def case_prod_beta dfs1.psimps)
  from post have wf_e1: "wf_env e1" "subenv (add_stack_incr x e) e1" 
                        "successors x ⊆ colored e1"
    by (auto simp: dfs_post_def e1_def)
  hence gray': "gray e1 = insert x (gray e)"
    by (auto simp: subenv_def add_stack_incr_def)
  from post obtain l where
    l: "fst ?split = l @ [x]"
       "x ∉ set l"
       "set l ⊆ black e1"
       "stack e1 = fst ?split @ snd ?split"
       "is_subscc (set (fst ?split))"
       "snd ?split = stack e"
    unfolding e1_def by (blast intro: dfs_post_split)
  hence x: "x ∈ set (stack e1)" by auto
  from l have stack: "set (stack e) ⊆ set (stack e1)" by auto
  from wf_e1 l 
  have dist: "x ∉ set l"    "x ∉ set (stack e)" 
             "set l ∩ set (stack e) = {}"   
             "set (fst ?split) ∩ set (stack e) = {}"
    unfolding wf_env_def by auto
  with ‹stack e1 = fst ?split @ snd ?split› ‹snd ?split = stack e›
  have prec: "∀y ∈ set (stack e). ∀z. y ≼ z in (stack e1) ⟷ y ≼ z in (stack e)"
    by (metis precedes_append_left_iff Int_iff empty_iff) 
  from post have numx: "num e1 x = int (sn e)"
    unfolding dfs_post_def subenv_def add_stack_incr_def e1_def by auto

  text ‹
    All nodes contained in the same SCC as @{text x} are elements of 
    @{text "fst ?split"}. Therefore, @{text "set (fst ?split)"} constitutes an SCC.›
  {
    fix y
    assume xy: "reachable x y" and yx: "reachable y x"
       and y: "y ∉ set (fst ?split)"
    from l(1) have "x ∈ set (fst ?split)" by simp
    with xy y obtain x' y' where
      y': "reachable x x'" "edge x' y'" "reachable y' y"
          "x' ∈ set (fst ?split)" "y' ∉ set (fst ?split)"
      using reachable_crossing_set by metis
    with wf_e1 l have "y' ∈ colored e1"
      unfolding wf_env_def no_black_to_white_def by auto
    from ‹reachable x x'› ‹edge x' y'› have "reachable x y'"
      using reachable_succ reachable_trans by blast
    moreover
    from ‹reachable y' y› ‹reachable y x› have "reachable y' x"
      by (rule reachable_trans)
    ultimately have "y' ∉ ⋃ sccs e1"
      using wf_e1 gray'
      by (auto simp: wf_env_def wf_color_def dest: sccE) 
    with wf_e1 ‹y' ∈ colored e1› have y'e1: "y' ∈ set (stack e1)"
      unfolding wf_env_def wf_color_def e1_def colored_def by auto
    with y' l have y'e: "y' ∈ set (stack e)" by auto
    with y' post l have numy': "n1 ≤ num e1 y'"
      unfolding dfs_post_def e1_def n1_def xedge_to_def add_stack_incr_def
      by force
    with numx nlt have "num e1 x ≤ num e1 y'" by auto
    with y'e1 x wf_e1 have "y' ≼ x in stack e1"
      unfolding wf_env_def wf_num_def e1_def n1_def by auto
    with y'e have "y' ≼ x in stack e" by (auto simp: prec)
    with dist have "False" by (simp add: precedes_mem)
  }
  hence "∀y. reachable x y ∧ reachable y x ⟶ y ∈ set (fst ?split)"
    by blast
  with l have scc: "is_scc (set (fst ?split))"
    by (simp add: is_scc_def is_subscc_def subset_antisym subsetI)

  have wf_e': "wf_env ?e'"
  proof -
    have wfc: "wf_color ?e'"
    proof -
      from post dfs1_pre_domain[OF pre] l
      have "gray ?e' ⊆ vertices ∧ black ?e' ⊆ vertices 
           ∧ gray ?e' ∩ black ?e' = {}
           ∧ (⋃ sccs ?e') ⊆ black ?e'"
        by (auto simp: dfs_post_def wf_env_def wf_color_def e1_def subenv_def
                       add_stack_incr_def colored_def)
      moreover
      have "set (stack ?e') = gray ?e' ∪ (black ?e' - ⋃ sccs ?e')" (is "?lhs = ?rhs")
      proof
        from wf_e1 dist l show "?lhs ⊆ ?rhs"
          by (auto simp: wf_env_def wf_color_def e1_def subenv_def 
                         add_stack_incr_def colored_def)
      next
        from l have "stack ?e' = stack e" "gray ?e' = gray e" by simp+
        moreover
        from pre have "gray e ⊆ set (stack e)"
          unfolding dfs1_pre_def wf_env_def wf_color_def by auto
        moreover
        {
          fix v
          assume "v ∈ black ?e' - ⋃ sccs ?e'"
          with l wf_e1
          have "v ∈ black e1" "v ∉ ⋃ sccs e1" "v ∉ insert x (set l)" 
               "v ∈ set (stack e1)"
            unfolding wf_env_def wf_color_def by auto
          with l have "v ∈ set (stack e)" by auto
        }
        ultimately show "?rhs ⊆ ?lhs" by auto
      qed
      ultimately show ?thesis
        unfolding wf_color_def colored_def by blast
    qed
    moreover
    from wf_e1 l dist prec gray' have "wf_num ?e'"
      unfolding wf_env_def wf_num_def colored_def
      by (auto simp: set_infty)
    moreover 
    from wf_e1 gray' have "no_black_to_white ?e'"
      by (auto simp: wf_env_def no_black_to_white_def colored_def)
    moreover 
    from wf_e1 l have "distinct (stack ?e')"
      unfolding wf_env_def by auto
    moreover 
    from wf_e1 prec
    have "∀y z. y ≼ z in (stack e) ⟶ reachable z y"
      unfolding wf_env_def by (metis precedes_mem(1))
    moreover
    from wf_e1 prec stack dfs1_pre_domain[OF pre] gray'
    have "∀y ∈ set (stack e). ∃g ∈ gray e. y ≼ g in (stack e) ∧ reachable y g"
      unfolding wf_env_def by (metis insert_iff subsetCE precedes_mem(2))
    moreover
    from wf_e1 l scc have "sccs ?e' = {C . C ⊆ black ?e' ∧ is_scc C}"
      by (auto simp: wf_env_def dest: scc_partition)
    ultimately show ?thesis
      using l unfolding wf_env_def by simp
  qed

  from post l dist have sub: "subenv e ?e'"
    unfolding dfs_post_def subenv_def e1_def add_stack_incr_def
    by (auto simp: set_infty)

  from l have num: "∞ ≤ num ?e' x"
    by (auto simp: set_infty)

  from l have "∀y. xedge_to (stack ?e') (stack e) y ⟶ ∞ ≤ num ?e' y"
    unfolding xedge_to_def by auto

  with res wf_e' sub num show ?thesis
    unfolding dfs1_post_def res_def by simp
qed

text ‹
  The following main lemma establishes the partial correctness
  of the two mutually recursive functions. The domain conditions
  appear explicitly as hypotheses, although we already know that
  they are subsumed by the preconditions. They are needed for the
  application of the ``partial induction'' rule generated by
  Isabelle for recursive functions whose termination was not proved.
  We will remove them in the next step.
›

lemma dfs_partial_correct:
  fixes x roots e
  shows
  "⟦dfs1_dfs_dom (Inl(x,e)); dfs1_pre x e⟧ ⟹ dfs1_post x e (dfs1 x e)"
  "⟦dfs1_dfs_dom (Inr(roots,e)); dfs_pre roots e⟧ ⟹ dfs_post roots e (dfs roots e)"
proof (induct rule: dfs1_dfs.pinduct)
  fix x e
  let ?res1 = "dfs1 x e"
  let ?res' = "dfs (successors x) (add_stack_incr x e)"
  assume ind: "dfs_pre (successors x) (add_stack_incr x e)
           ⟹ dfs_post (successors x) (add_stack_incr x e) ?res'"
     and pre: "dfs1_pre x e"
  have post: "dfs_post (successors x) (add_stack_incr x e) ?res'"
    by (rule ind) (rule dfs1_pre_dfs_pre[OF pre])
  show "dfs1_post x e ?res1"
  proof (cases "fst ?res' < int (sn e)")
    case True with pre post show ?thesis by (rule dfs_post_dfs1_post_case1)
  next
    case False
    with pre post show ?thesis by (rule dfs_post_dfs1_post_case2)
  qed
next
  fix roots e
  let ?res' = "dfs roots e"
  let ?dfs1 = "λx. dfs1 x e"
  let ?dfs = "λx e'. dfs (roots - {x}) e'"
  assume ind1: "⋀x. ⟦ roots ≠ {}; x = (SOME x. x ∈ roots);
                          ¬ num e x ≠ - 1; dfs1_pre x e ⟧
                ⟹ dfs1_post x e (?dfs1 x)"
     and ind': "⋀x res1.
                  ⟦ roots ≠ {}; x = (SOME x. x ∈ roots);
                    res1 = (if num e x ≠ - 1 then (num e x, e) else ?dfs1 x);
                    dfs_pre (roots - {x}) (snd res1) ⟧
               ⟹ dfs_post (roots - {x}) (snd res1) (?dfs x (snd res1))"
     and pre: "dfs_pre roots e"
  from pre have dom: "dfs1_dfs_dom (Inr (roots, e))"
    by (rule dfs_pre_dfs_dom)
  show "dfs_post roots e ?res'"
  proof (cases "roots = {}")
    case True
    with pre dom show ?thesis
      unfolding dfs_pre_def dfs_post_def subenv_def xedge_to_def
      by (auto simp: dfs.psimps)
  next
    case nempty: False
    define x where "x = (SOME x. x ∈ roots)"
    with nempty have x: "x ∈ roots" by (auto intro: someI)
    define res1 where
      "res1 = (if num e x ≠ - 1 then (num e x, e) else ?dfs1 x)"
    define res2 where
      "res2 = ?dfs x (snd res1)"
    have post1: "num e x = -1 ⟶ dfs1_post x e (?dfs1 x)"
    proof
      assume num: "num e x = -1"
      with pre x have "dfs1_pre x e"
        by (rule dfs_pre_dfs1_pre)
      with nempty num x_def show "dfs1_post x e (?dfs1 x)"
        by (simp add: ind1)
    qed
    have sub1: "subenv e (snd res1)"
    proof (cases "num e x = -1")
      case True
      with post1 res1_def show ?thesis
        by (auto simp: dfs1_post_def)
    next
      case False
      with res1_def show ?thesis by simp
    qed
    have wf1: "wf_env (snd res1)"
    proof (cases "num e x = -1")
      case True
      with res1_def post1 show ?thesis
        by (auto simp: dfs1_post_def)
    next
      case False
      with res1_def pre show ?thesis
        by (auto simp: dfs_pre_def)
    qed
    from post1 pre res1_def
    have res1: "dfs_pre (roots - {x}) (snd res1)"
      unfolding dfs_pre_def dfs1_post_def subenv_def by auto
    with nempty x_def res1_def ind'
    have post: "dfs_post (roots - {x}) (snd res1) (?dfs x (snd res1))"
      by blast
    with res2_def have sub2: "subenv (snd res1) (snd res2)"
      by (auto simp: dfs_post_def)
    from post res2_def have wf2: "wf_env (snd res2)"
      by (auto simp: dfs_post_def)
    from dom nempty x_def res1_def res2_def
    have res: "dfs roots e = (min (fst res1) (fst res2), snd res2)"
      by (auto simp add: dfs.psimps)
    show ?thesis
    proof -
      let ?n2 = "min (fst res1) (fst res2)"
      let ?e2 = "snd res2"

      from post res2_def 
      have "wf_env ?e2"
        unfolding dfs_post_def by auto

      moreover
      from sub1 sub2 have sub: "subenv e ?e2"
        by (rule subenv_trans)

      moreover
      have "x ∈ colored ?e2"
      proof (cases "num e x = -1")
        case True
        with post1 res1_def sub2 show ?thesis
          by (auto simp: dfs1_post_def subenv_def colored_def)
      next
        case False
        with pre sub show ?thesis
          by (auto simp: dfs_pre_def wf_env_def wf_num_def subenv_def colored_def)
      qed
      with post res2_def have "roots ⊆ colored ?e2"
        unfolding dfs_post_def by auto

      moreover
      have "∀y ∈ roots. ?n2 ≤ num ?e2 y"
      proof
        fix y
        assume y: "y ∈ roots"
        show "?n2 ≤ num ?e2 y"
        proof (cases "y = x")
          case True
          show ?thesis
          proof (cases "num e x = -1")
            case True
            with post1 res1_def have "fst res1 ≤ num (snd res1) x"
              unfolding dfs1_post_def by auto
            moreover
            from wf1 wf2 sub2 have "num (snd res1) x ≤ num (snd res2) x"
              unfolding wf_env_def by (auto elim: subenv_num)
            ultimately show ?thesis
              using ‹y=x› by simp
          next
            case False
            with res1_def wf1 wf2 sub2 have "fst res1 ≤ num (snd res2) x"
              unfolding wf_env_def by (auto elim: subenv_num)
            with ‹y=x› show ?thesis by simp
          qed
        next
          case False
          with y post res2_def have "fst res2 ≤ num ?e2 y"
            unfolding dfs_post_def by auto
          thus ?thesis by simp
        qed
      qed

      moreover
      {
        assume n2: "?n2 ≠ ∞"
        hence "(fst res1 ≠ ∞ ∧ ?n2 = fst res1)
             ∨ (fst res2 ≠ ∞ ∧ ?n2 = fst res2)" by auto
        hence "∃r ∈ roots. ∃y ∈ set (stack ?e2). num ?e2 y = ?n2 ∧ reachable r y"
        proof
          assume n2: "fst res1 ≠ ∞ ∧ ?n2 = fst res1"
          have "∃y ∈ set (stack (snd res1)). 
                     num (snd res1) y = (fst res1) ∧ reachable x y"
          proof (cases "num e x = -1")
            case True
            with post1 res1_def n2 show ?thesis
              unfolding dfs1_post_def by auto
          next
            case False
            with wf1 res1_def n2 have "x ∈ set (stack (snd res1))"
              unfolding wf_env_def wf_color_def wf_num_def colored_def by auto
            with False res1_def show ?thesis
              by auto
          qed
          with sub2 x n2 show ?thesis
            unfolding subenv_def by fastforce
        next
          assume "fst res2 ≠ ∞ ∧ ?n2 = fst res2"
          with post res2_def show ?thesis
            unfolding dfs_post_def by auto
        qed
      }
      hence "?n2 = ∞ ∨ (∃r ∈ roots. ∃y ∈ set (stack ?e2). num ?e2 y = ?n2 ∧ reachable r y)"
        by blast

      moreover
      have "∀y. xedge_to (stack ?e2) (stack e) y ⟶ ?n2 ≤ num ?e2 y"
      proof (clarify)
        fix y
        assume y: "xedge_to (stack ?e2) (stack e) y"
        show "?n2 ≤ num ?e2 y"
        proof (cases "num e x = -1")
          case True
          from sub1 obtain s1 where
            s1: "stack (snd res1) = s1 @ stack e"
            by (auto simp: subenv_def)
          from sub2 obtain s2 where
            s2: "stack ?e2 = s2 @ stack (snd res1)"
            by (auto simp: subenv_def)
          from y obtain zs z where
            z: "stack ?e2 = zs @ stack e" "z ∈ set zs" 
               "y ∈ set (stack e)" "edge z y"
            by (auto simp: xedge_to_def)
          with s1 s2 have "z ∈ (set s1) ∪ (set s2)" by auto
          thus ?thesis
          proof
            assume "z ∈ set s1"
            with s1 z have "xedge_to (stack (snd res1)) (stack e) y"
              by (auto simp: xedge_to_def)
            with post1 res1_def ‹num e x = -1›
            have "fst res1 ≤ num (snd res1) y"
              by (auto simp: dfs1_post_def)
            moreover
            with wf1 wf2 sub2 have "num (snd res1) y ≤ num ?e2 y"
              unfolding wf_env_def by (auto elim: subenv_num)
            ultimately show ?thesis by simp
          next
            assume "z ∈ set s2"
            with s1 s2 z have "xedge_to (stack ?e2) (stack (snd res1)) y"
              by (auto simp: xedge_to_def)
            with post res2_def show ?thesis
              by (auto simp: dfs_post_def)
          qed
        next
          case False
          with y post res1_def res2_def show ?thesis
            unfolding dfs_post_def by auto
        qed
      qed

      ultimately show ?thesis
        using res unfolding dfs_post_def by simp
    qed
  qed
qed

section ‹Theorems establishing total correctness›

text ‹
  Combining the previous theorems, we show total correctness for
  both the auxiliary functions and the main function @{text tarjan}.
›

theorem dfs_correct:
  "dfs1_pre x e ⟹ dfs1_post x e (dfs1 x e)"
  "dfs_pre roots e ⟹ dfs_post roots e (dfs roots e)"
  using dfs_partial_correct dfs1_pre_dfs1_dom dfs_pre_dfs_dom by (blast+)

theorem tarjan_correct: "tarjan = { C . is_scc C ∧ C ⊆ vertices }"
proof -
  have "dfs_pre vertices init_env"
    by (auto simp: dfs_pre_def init_env_def wf_env_def wf_color_def colored_def
                   wf_num_def no_black_to_white_def is_scc_def precedes_def)
  hence res: "dfs_post vertices init_env (dfs vertices init_env)"
    by (rule dfs_correct)
  thus ?thesis
    by (auto simp: tarjan_def init_env_def dfs_post_def wf_env_def wf_color_def 
                   colored_def subenv_def)
qed

end ― ‹context graph›
end ― ‹theory Tarjan›