;; ---------------------------------------------------------------------------
;; ~/events/scanning.events
;; ---------------------------------------------------------------------------

;; The scanner will split a sequence of bytes into a sequence of tokens by
;; checking all prefixes with an automaton that recognizes tokens, 
;; and selecting the last one found, which is the longest such prefix. 

;; I use an automaton similar to the one in the fsa proof, but with 
;; (state, re) pairs in the set of finals instead of just states. This
;; enables me to determine which regular expression was responsible for
;; the acceptance of the prefix.

(enable-theory tokens)
(enable-theory sets)
(enable-theory lists)

(add-shell fsa* nil fsap*
  ((alphabet (none-of) zero)
   (states   (none-of) zero)
   (starts   (none-of) zero)
   (table    (none-of) zero)
   (finals   (none-of) zero)))

;; A "real" finite state automaton is an NQTHM-automaton with the following
;; properties. Note that finals is an association list. The domain is a
;; subset of the set of states. 

(defn fsap (auto)
  (let ((al (alphabet auto))
        (st (states auto))
        (s0 (starts auto))
        (tr (table auto))
        (fi (finals auto)))
    (and (fsap* auto)
         (listp al)
         (listp st)
         (listp s0)
         (alistp fi)
         (subsetp s0 st)
         (setp st)
         (setp fi)
         (subsetp (domain fi) st))))

(defn mk-transition (state input nexts)
  (cons (cons state input) nexts))

;; These are the selector functions for the transitions in the table.

(defn state (trans) (caar trans))
(defn input (trans) (cdar trans))
(defn nexts (trans) (cdr trans))

(defn transitionp (trans alphabet states)
 (and (member (input trans) alphabet)
      (member (state trans) states)
      (subsetp (nexts trans) states)
      (plistp (nexts trans))))

(defn wf-table (table alphabet states)
  (if (nlistp table)
      (equal table nil)
      (and (transitionp (car table) alphabet states)
           (wf-table (cdr table) alphabet states))))

(defn ndfsap (a)
  (and (fsap a)
       (wf-table (table a) (alphabet a) (states a))))

;; The automaton must be run against a tape.
;; (cons st a) is the state and symbol look-up element.

(defn next-states (table st a)
  (if (nlistp table)
      nil
      (if (equal (cons st a) (caar table))
          (nexts (car table))
          (next-states (cdr table) st a))))

(defn next-states-list (table states a)
  (if (nlistp states)
      nil
      (union
       (next-states table (car states) a)
       (next-states-list table (cdr states) a))))

(defn cc-name (char cc)
 (if (nlistp cc)
     'character-class-not-defined
     (if (member char (cadar cc))
         (caar cc)
         (cc-name char (cdr cc)))))

(defn all-regular-expressions-for-state (state finals)
 (if (nlistp finals)
     nil
     (if (equal state (caar finals))
         (append (list (cdar finals))
                 (all-regular-expressions-for-state state (cdr finals)))
         (all-regular-expressions-for-state state (cdr finals)))))

(defn all-regular-expressions (states finals)
 (if (nlistp states)
     nil
     (append (all-regular-expressions-for-state (car states) finals)
             (all-regular-expressions (cdr states) finals))))

;; The function accept1 returns a list of all regular expressions matching 
;; the states in finals. If there are no such expressions, nil is returned.

(defn accept1 (table states finals tape cc)
 (if (nlistp tape)
     (all-regular-expressions states finals)
     (accept1 table
              (next-states-list table states
                                (cc-name (car tape) cc))
              finals
              (cdr tape)
              cc)))

(defn accepting-regular-expressions (fsa cc tape)
 (accept1 (table fsa) (starts fsa) (finals fsa) tape cc))

(defn accept (fsa cc tape)
 (listp (accepting-regular-expressions fsa cc tape)))

;; ---------------------------------------------------------------------------
;; Proof
;; ---------------------------------------------------------------------------

;; I need the concept of "what does it mean to be a prefix?" prefixp 
;; returns T if a is a prefix of b. Note that nlisps are prefixes of anything.

(defn prefixp (a b)
    (if (nlistp a)
        T
        (if (listp b)
            (if (equal (car a) (car b))
                (prefixp (cdr a) (cdr b))
                F)
            F)))

;; prefixp is transitive.

(prove-lemma prefixp-transitive (rewrite)
               (implies (and (prefixp a b)
                             (prefixp b c))
                        (prefixp a c)))

;; The function consl conses x onto each element in l. I need this in
;; order to construct all the prefixes for a list.

(defn consl (x l)
   (if (listp l)
       (cons (cons x (car l))
             (consl x (cdr l)))
       (list (list x)))); changed this case

;; This is all prefixes without the nil prefix. They happen to be sorted 
;; longest first, but I will not use that fact.

(defn all-prefixes (tape)
  (if (nlistp tape)
      nil
      (consl (car tape) (all-prefixes (cdr tape)))))

;; When does a list l contain only prefixes of full?

(defn all-prefixp (l full)
  (if (nlistp l)
      T
      (and (prefixp (car l) full)
           (all-prefixp (cdr l) full))))

(prove-lemma all-prefixp-all-prefixes (rewrite)
 (all-prefixp (all-prefixes tape) tape))

;; When are all the elements of a list accepting tapes for the 
;; automaton nfsa/cc?

(defn all-accepting (prefixes nfsa cc)
  (if (nlistp prefixes)
      nil
      (if (accept nfsa cc (car prefixes))
          (cons (car prefixes) (all-accepting (cdr prefixes) nfsa cc))
          (all-accepting (cdr prefixes) nfsa cc))))

(prove-lemma all-prefixp-all-accepting (rewrite)
 (implies (all-prefixp x y)
          (all-prefixp (all-accepting x nfsa cc) y)))

;; This function expresses the concept of longest element of a list  -  the
;; longest-to-date is kept around until the entire list has been seen. If I
;; find something longer, that is then the longest-to-date.

(defn longest1 (rest longest-to-date)
 (if (nlistp rest)
     longest-to-date
     (if (lessp (length longest-to-date)
                (length (car rest)))
         (longest1 (cdr rest) (car rest))
         (longest1 (cdr rest) longest-to-date))))

(prove-lemma prefixp-longest1 (rewrite)
 (implies (and (all-prefixp rest tape)
               (prefixp longest-to-date tape))
          (prefixp (longest1 rest longest-to-date) tape)))

(defn longest (l)
 (if (nlistp l)
     nil
     (longest1 l (car l))))

(prove-lemma prefixp-longest (rewrite)
 (implies (all-prefixp l tape)
          (prefixp (longest l) tape)))

;; This function delivers the longest of all the accepting prefixes of tape.

(defn lop (nfsa cc tape)
 (longest (all-accepting (all-prefixes tape) nfsa cc )))

;; ---------------------------------------------------------------------------
;; Theorem 1: Prefixp longest accepting prefix
;; ---------------------------------------------------------------------------

(prove-lemma all-prefixp-all-accepting-all-prefixes (rewrite)
 (all-prefixp (all-accepting (all-prefixes tape) nfsa cc) tape))

(prove-lemma prefixp-lop (rewrite)
 (prefixp (lop nfsa cc tape) tape)
  ((use (prefixp-longest (l (all-accepting (all-prefixes tape) nfsa cc))
                          (tape tape)))))

;; ---------------------------------------------------------------------------
;; Theorem 2: longest accepting prefix really accepts
;; ---------------------------------------------------------------------------
        
(defn accept-all (l nfsa cc)
 (if (nlistp l)
     T
     (and (accept nfsa cc (car l))
          (accept-all (cdr l) nfsa cc))))
     
(prove-lemma member-accept-all-accepts (rewrite)
 (implies (and (accept-all l nfsa cc)
               (member p l))
          (accept nfsa cc p)))

(prove-lemma accept-all-all-accepting (rewrite)
 (accept-all (all-accepting x nfsa cc) nfsa cc))

(prove-lemma member-longest1 (rewrite)
      (implies (not (equal (longest1 x z) z))
               (member (longest1 x z) x)))

(prove-lemma member-longest (rewrite)
 (implies (listp l)
           (member (longest l) l)))

;; This lemma never seemed to prove on a SPARC, but on a Pentium-90 it
;; zipped through in less time than you need to drink a cup of coffee!

(prove-lemma helper (rewrite)
 (implies (and (accept-all l nfsa cc) 
               (listp l))
          (accept nfsa cc (longest l))))

;; The theorem should have as its hypothesis something like
;; (or (accepts-longest-prefix ...)
;;     (accepts-no-prefix ...)
;; So I restate this to be: if the longest prefix is not nil, then it
;; accepts. If the longest prefix IS nil (for epsilon productions) then we
;; have the problem of deciding their acceptance.

(prove-lemma accepts-lop ()
 (implies (and (listp tape)
               (not (equal (lop nfsa cc tape) nil)))
          (accept nfsa cc (lop nfsa cc tape)))
          ((use (helper (l (all-accepting (all-prefixes tape) nfsa cc))
               (tape tape))))))

;; ---------------------------------------------------------------------------
;; Theorem 3: longest accepting prefix is really the longest one
;; ---------------------------------------------------------------------------

;; The definition of longest is twofold, it has to be a member and there can
;; be none larger.

(defn none-larger (x l)
 (if (nlistp l)
     T
     (if (lessp (length x) (length (car l)))
         F
         (none-larger x (cdr l)))))

(defn longestp (x l)
  (and (member x l)
       (none-larger x l)))

(prove-lemma not-lessp-length-longest1-other (rewrite)
 (not (lessp (length (longest1 v z))
                      (length z))))

(prove-lemma none-larger-longest1 (rewrite)
 (none-larger (longest1 v z) v))

;; This gets accepted using generalization THREE times!

(prove-lemma accepting-prefix-is-longest ()
 (implies (and (listp tape)
               (not (equal (lop nfsa cc tape) nil)))
          (longestp (lop nfsa cc tape) 
                    (all-accepting (all-prefixes tape) nfsa cc))))

;; ---------------------------------------------------------------------------
;; Now we have to make something useful out of the prefix: a token
;; ---------------------------------------------------------------------------

(defn longest-prefix-token (nfsa cc prefix)
 (mk-token (car (accepting-regular-expressions nfsa cc prefix))
              prefix))

;; Once a token has been made out of the longest prefix, I have to continue.

(defn remove-common-prefix (a b)
  (if (nlistp a)
      b
      (if (nlistp b)
          nil
          (if (equal (car a) (car b))
              (remove-common-prefix (cdr a) (cdr b))
              nil))))

;; This lemma is needed for the termination argument in split.

(prove-lemma remove-common-prefix-lessp (rewrite)
 (implies (and (prefixp a b)
               (listp a)
               (listp b))
          (equal (lessp (length (remove-common-prefix a b)) (length b))
                 T)))
(disable lop)

;; Now one must split off the longest prefix token. Note that Ie 
;; have to exclude epsilon prefixes or I have no termination argument!

(defn split (nfsa cc tape)
 (if (nlistp tape)
     tape
     (let ((prefix (lop nfsa cc tape)))
       (if (equal (length prefix) 0)
           (cons (mk-token 'lexicographic-error tape) nil)
           (cons (longest-prefix-token nfsa cc prefix)
                 (split nfsa cc (remove-common-prefix prefix tape))))))
 ((lessp (length tape))))
        
;; And the theorem for split is that it splits the tape, i.e. collecting up
;; the token-values gives us the tape again.

(defn collect-values (toklist)
 (if (nlistp toklist)
     toklist
     (if (not (tokenp (car toklist)))
         'not-a-token-list
         (append (token-value (car toklist))
                 (collect-values (cdr toklist))))))

(prove-lemma plistp-remove-common-prefix (rewrite)
 (implies (plistp tape)
          (plistp (remove-common-prefix a tape))))

(prove-lemma collect-values-cons (rewrite)
 (implies (tokenp a)
          (equal (collect-values (cons a b))
                 (append (token-value a) (collect-values b)))))

(prove-lemma append-remove-common-prefix (rewrite)
 (implies (prefixp a b)
          (equal (append a (remove-common-prefix a b))
                 b)))

; The nasty induction structure must be spoon-fed to the prover.

(defn split-splits-hint (nfsa cc tape)
  (if (or (nlistp tape)
          (nlistp (lop nfsa cc tape)))
      T
      (cons (lop nfsa cc tape)
            (split-splits-hint 
             nfsa cc (remove-common-prefix (lop nfsa cc tape) tape))))
 ((lessp (length tape))))

(prove-lemma split-splits (rewrite)
 (implies (plistp tape)
          (equal (collect-values (split nfsa cc tape))
                 tape))
  ((do-not-generalize T)
   (induct (split-splits-hint nfsa cc tape ))))

(deftheory scanning-defs
 (SPLIT-SPLITS-HINT COLLECT-VALUES REMOVE-COMMON-PREFIX LONGEST-PREFIX-TOKEN
  SPLIT LONGESTP NONE-LARGER ACCEPT-ALL LOP LONGEST LONGEST1 ALL-ACCEPTING
  ALL-PREFIXP ALL-PREFIXES CONSL PREFIXP ACCEPT ACCEPTING-REGULAR-EXPRESSIONS
  ACCEPT1 ALL-REGULAR-EXPRESSIONS ALL-REGULAR-EXPRESSIONS-FOR-STATE CC-NAME
  NEXT-STATES-LIST NEXT-STATES NDFSAP WF-TABLE TRANSITIONP NEXTS INPUT STATE
  MK-TRANSITION FSAP FSA* ))

(disable-theory scanning-defs)

(deftheory scanning-proofs 
 (SPLIT-SPLITS APPEND-REMOVE-COMMON-PREFIX
  COLLECT-VALUES-CONS PLISTP-REMOVE-COMMON-PREFIX REMOVE-COMMON-PREFIX-LESSP
  ACCEPTING-PREFIX-IS-LONGEST NONE-LARGER-LONGEST1
  NOT-LESSP-LENGTH-LONGEST1-OTHER ACCEPTS-LOP HELPER MEMBER-LONGEST
  MEMBER-LONGEST1 ACCEPT-ALL-ALL-ACCEPTING MEMBER-ACCEPT-ALL-ACCEPTS
  PREFIXP-LOP ALL-PREFIXP-ALL-ACCEPTING-ALL-PREFIXES PREFIXP-LONGEST
  PREFIXP-LONGEST1 ALL-PREFIXP-ALL-ACCEPTING ALL-PREFIXP-ALL-PREFIXES
  PREFIXP-TRANSITIVE ))

(disable-theory scanning-proofs)
