#lang racket/base
(require racket/list
         racket/match
         (prefix-in is: data/integer-set)
         "util.rkt")
  
(provide ->re build-epsilon build-zero build-char-set build-concat
         build-repeat build-or build-and build-neg
         epsilonR? zeroR? char-setR? concatR? repeatR? orR? andR? negR?
         char-setR-chars concatR-re1 concatR-re2 repeatR-re repeatR-low repeatR-high
         orR-res andR-res negR-re
         re-nullable? re-index)
  
;; get-index : -> nat
(define get-index (make-counter))

;; An re is either
;; - (make-epsilonR bool nat)
;; - (make-zeroR bool nat)
;; - (make-char-setR bool nat char-set)
;; - (make-concatR bool nat re re)
;; - (make-repeatR bool nat nat nat-or-+inf.0 re)
;; - (make-orR bool nat (list-of re))    Must not directly contain any orRs
;; - (make-andR bool nat (list-of re))   Must not directly contain any andRs
;; - (make-negR bool nat re)
;;
;; Every re must have an index field globally different from all
;; other re index fields.
(define-struct re (nullable? index) #:inspector (make-inspector))
(define-struct (epsilonR re) () #:inspector (make-inspector))
(define-struct (zeroR re) () #:inspector (make-inspector))
(define-struct (char-setR re) (chars) #:inspector (make-inspector))
(define-struct (concatR re) (re1 re2) #:inspector (make-inspector))
(define-struct (repeatR re) (low high re) #:inspector (make-inspector))
(define-struct (orR re) (res) #:inspector (make-inspector))
(define-struct (andR re) (res) #:inspector (make-inspector))
(define-struct (negR re) (re) #:inspector (make-inspector))

;; e : re
;; The unique epsilon re
(define e (make-epsilonR #t (get-index)))

;; z : re
;; The unique zero re
(define z (make-zeroR #f (get-index)))


;; s-re = char                       constant
;;      | string                     constant (sequence of characters)
;;      | re                         a precompiled re
;;      | (repetition low high s-re) repetition between low and high times (inclusive)
;;      | (union s-re ...)           
;;      | (intersection s-re ...)    
;;      | (complement s-re)          
;;      | (concatenation s-re ...)   
;;      | (char-range rng rng)       match any character between two (inclusive)
;;      | (char-complement char-set) match any character not listed
;; low = natural-number
;; high = natural-number or +inf.0
;; rng = char or string with length 1
;; (concatenation) (repetition 0 0 x), and "" match the empty string.
;; (union) matches no strings.
;; (intersection) matches any string.
  
(define loc:make-range is:make-range)
(define loc:union is:union)
(define loc:split is:split)
(define loc:complement is:complement)
  
;; ->re : s-re cache -> re
(define (->re exp cache)
  (match exp
    [(? char?) (build-char-set (loc:make-range (char->integer exp)) cache)]
    [(? string?) (->re `(concatenation ,@(string->list exp)) cache)]
    [(? re?) exp]
    [`(repetition ,low ,high ,r)
     (build-repeat low high (->re r cache) cache)]
    [`(union ,rs ...)
     (build-or (flatten-res (map (λ (r) (->re r cache)) rs)
                            orR? orR-res loc:union cache)
               cache)]
    [`(intersection ,rs ...)
     (build-and (flatten-res (map (λ (r) (->re r cache)) rs)
                             andR? andR-res (λ (a b)
                                              (let-values (((i _ __) (loc:split a b))) i))
                             cache)
                cache)]
    [`(complement ,r) (build-neg (->re r cache) cache)]
    [`(concatenation ,rs ...)
     (foldr (λ (x y)
              (build-concat (->re x cache) y cache))
            e
            rs)]
    [`(char-range ,c1 ,c2)
     (let ([i1 (char->integer (if (string? c1) (string-ref c1 0) c1))]
           [i2 (char->integer (if (string? c2) (string-ref c2 0) c2))])
       (if (<= i1 i2)
           (build-char-set (loc:make-range i1 i2) cache)
           z))]
    [`(char-complement ,crs ...)
     (let ([cs (->re `(union ,@crs) cache)])
       (cond
         [(zeroR? cs) (build-char-set (loc:make-range 0 max-char-num) cache)]
         [(char-setR? cs)
          (build-char-set (loc:complement (char-setR-chars cs) 0 max-char-num) cache)]
         [else z]))]))
              

        

;; flatten-res: (list-of re) (re -> bool) (re -> (list-of re))
;;              (char-set char-set -> char-set) cache -> (list-of re)
;; Takes all the char-sets in l and combines them into one char-set using the combine function.
;; Flattens out the values of type?.  get-res only needs to function on things type? returns
;; true for.
(define (flatten-res l type? get-res combine cache)
  (let loop ([res l]
             ;; chars : (union #f char-set)
             [chars #f]
             [no-chars null])
    (cond
      [(null? res) 
       (if chars
           (cons (build-char-set chars cache) no-chars)
           no-chars)]
      [(char-setR? (car res))
       (if chars
           (loop (cdr res) (combine (char-setR-chars (car res)) chars) no-chars)
           (loop (cdr res) (char-setR-chars (car res)) no-chars))]
      [(type? (car res))
       (loop (append (get-res (car res)) (cdr res)) chars no-chars)]
      [else (loop (cdr res) chars (cons (car res) no-chars))])))
    
;; build-epsilon : -> re
(define (build-epsilon) e)
  
(define (build-zero) z)
    
(define loc:integer-set-contents is:integer-set-contents)
  
;; build-char-set : char-set cache -> re
(define (build-char-set cs cache)
  (define l (loc:integer-set-contents cs))
  (cond
    [(null? l) z]
    [else
     (cache l
            (λ ()
              (make-char-setR #f (get-index) cs)))]))
  
  
  
;; build-concat : re re cache -> re
(define (build-concat r1 r2 cache)
  (cond
    [(eq? e r1) r2]
    [(eq? e r2) r1]
    [(or (eq? z r1) (eq? z r2)) z]
    [else
     (cache (cons 'concat (cons (re-index r1) (re-index r2)))
            (λ ()
              (make-concatR (and (re-nullable? r1) (re-nullable? r2))
                            (get-index)
                            r1 r2)))]))
  
;; build-repeat : nat nat-or-+inf.0 re cache -> re
(define (build-repeat low high r cache)
  (let ([low (if (< low 0) 0 low)])
    (cond
      [(eq? r e) e]
      [(and (= 0 low) (or (= 0 high) (eq? z r))) e]
      [(and (= 1 low) (= 1 high)) r]
      [(and (repeatR? r)
            (eqv? (repeatR-high r) +inf.0)
            (or (= 0 (repeatR-low r))
                (= 1 (repeatR-low r))))
       (build-repeat (* low (repeatR-low r))
                     +inf.0
                     (repeatR-re r)
                     cache)]
      [else
       (cache (cons 'repeat (cons low (cons high (re-index r))))
              (λ ()
                (make-repeatR (or (re-nullable? r) (= 0 low)) (get-index) low high r)))])))
  
  
;; build-or : (list-of re) cache -> re
(define (build-or rs cache)
  (let ([rs 
         (filter
          (λ (x) (not (eq? x z)))
          (do-simple-equiv (replace rs orR? orR-res null) re-index))])
    (cond
      [(null? rs) z]
      [(null? (cdr rs)) (car rs)]
      [(memq (build-neg z cache) rs) (build-neg z cache)]
      [else
       (cache (cons 'or (map re-index rs))
              (λ ()
                (make-orR (ormap re-nullable? rs) (get-index) rs)))])))
  
;; build-and : (list-of re) cache -> re
(define (build-and rs cache)
  (let ([rs (do-simple-equiv (replace rs andR? andR-res null) re-index)])
    (cond
      [(null? rs) (build-neg z cache)]
      [(null? (cdr rs)) (car rs)]
      [(memq z rs) z]
      [else
       (cache (cons 'and (map re-index rs))
              (λ ()
                (make-andR (andmap re-nullable? rs) (get-index) rs)))])))
      
;; build-neg : re cache -> re
(define (build-neg r cache)
  (cond
    [(negR? r) (negR-re r)]
    [else
     (cache (cons 'neg (re-index r))
            (λ ()
              (make-negR (not (re-nullable? r)) (get-index) r)))]))
  
;; Tests for the build-functions
(test-block ((c (make-cache))
             (isc is:integer-set-contents)
             (r1 (build-char-set (is:make-range (char->integer #\1)) c))
             (r2 (build-char-set (is:make-range (char->integer #\2)) c))
             (r3 (build-char-set (is:make-range (char->integer #\3)) c))
             (rc (build-concat r1 r2 c))
             (rc2 (build-concat r2 r1 c))
             (rr (build-repeat 0 +inf.0 rc c))
             (ro (build-or `(,rr ,rc ,rr) c))
             (ro2 (build-or `(,rc ,rr ,z) c))
             (ro3 (build-or `(,rr ,rc) c))
             (ro4 (build-or `(,(build-or `(,r1 ,r2) c)
                              ,(build-or `(,r2 ,r3) c)) c))
             (ra (build-and `(,rr ,rc ,rr) c))
             (ra2 (build-and `(,rc ,rr) c))
             (ra3 (build-and `(,rr ,rc) c))
             (ra4 (build-and `(,(build-and `(,r3 ,r2) c)
                               ,(build-and `(,r2 ,r1) c)) c))
             (rn (build-neg z c))
             (rn2 (build-neg r1 c)))
               
            ((isc (char-setR-chars r1)) (isc (is:make-range (char->integer #\1))))
            ((isc (char-setR-chars r2)) (isc (is:make-range (char->integer #\2))))
            ((isc (char-setR-chars r3)) (isc (is:make-range (char->integer #\3))))
            ((build-char-set (is:make-range) c) z)
            ((build-concat r1 e c) r1)
            ((build-concat e r1 c) r1)
            ((build-concat r1 z c) z)
            ((build-concat z r1 c) z)
            ((build-concat r1 r2 c) rc)
            ((concatR-re1 rc) r1)
            ((concatR-re2 rc) r2)
            ((concatR-re1 rc2) r2)
            ((concatR-re2 rc2) r1)
            (ro ro2)
            (ro ro3)
            (ro4 (build-or `(,r1 ,r2 ,r3) c))
            ((orR-res ro) (list rc rr))
            ((orR-res ro4) (list r1 r2 r3))
            ((build-or null c) z)
            ((build-or `(,r1 ,z) c) r1)
            ((build-repeat 0 +inf.0 rc c) rr)
            ((build-repeat 0 1 z c) e)
            ((build-repeat 0 0 rc c) e)
            ((build-repeat 0 +inf.0 z c) e)
            ((build-repeat -1 +inf.0 z c) e)
            ((build-repeat 0 +inf.0 (build-repeat 0 +inf.0 rc c) c)
             (build-repeat 0 +inf.0 rc c))
            ((build-repeat 20 20 (build-repeat 0 +inf.0 rc c) c)
             (build-repeat 0 +inf.0 rc c))
            ((build-repeat 20 20 (build-repeat 1 +inf.0 rc c) c)
             (build-repeat 20 +inf.0 rc c))
            ((build-repeat 1 1 rc c) rc)
            ((repeatR-re rr) rc)
            (ra ra2)
            (ra ra3)
            (ra4 (build-and `(,r1 ,r2 ,r3) c))
            ((andR-res ra) (list rc rr))
            ((andR-res ra4) (list r1 r2 r3))
            ((build-and null c) (build-neg z c))
            ((build-and `(,r1 ,z) c) z)
            ((build-and `(,r1) c) r1)
            ((build-neg r1 c) (build-neg r1 c))
            ((build-neg (build-neg r1 c) c) r1)
            ((negR-re (build-neg r2 c)) r2)
            ((re-nullable? r1) #f)
            ((re-nullable? rc) #f)
            ((re-nullable? (build-concat rr rr c)) #t)
            ((re-nullable? rr) #t)
            ((re-nullable? (build-repeat 0 1 rc c)) #t)
            ((re-nullable? (build-repeat 1 2 rc c)) #f)
            ((re-nullable? (build-repeat 1 2 (build-or (list e r1) c) c)) #t)
            ((re-nullable? ro) #t)
            ((re-nullable? (build-or `(,r1 ,r2) c)) #f)
            ((re-nullable? (build-and `(,r1 ,e) c)) #f)
            ((re-nullable? (build-and `(,rr ,e) c)) #t)
            ((re-nullable? (build-neg r1 c)) #t)
            ((re-nullable? (build-neg rr c)) #f))
              
(test-block ((c (make-cache))
             (isc is:integer-set-contents)
             (r1 (->re #\1 c))
             (r2 (->re #\2 c))
             (r3-5 (->re '(char-range #\3 #\5) c))
             (r4 (build-or `(,r1 ,r2) c))
             (r5 (->re `(union ,r3-5 #\7) c))
             (r6 (->re #\6 c)))
            ((flatten-res null orR? orR-res is:union c) null)
            ((isc (char-setR-chars (car (flatten-res `(,r1) orR? orR-res is:union c))))
             (isc (is:make-range (char->integer #\1))))
            ((isc (char-setR-chars (car (flatten-res `(,r4) orR? orR-res is:union c))))
             (isc (is:make-range (char->integer #\1) (char->integer #\2))))
            ((isc (char-setR-chars (car (flatten-res `(,r6 ,r5 ,r4 ,r3-5 ,r2 ,r1)
                                                     orR? orR-res is:union c))))
             (isc (is:make-range (char->integer #\1) (char->integer #\7))))
            ((flatten-res `(,r1 ,r2) andR? andR-res (λ (x y)
                                                      (let-values (((i _ __)
                                                                    (is:split x y)))
                                                        i))
                          c)
             (list z)))
  
;; ->re
(test-block ((c (make-cache))
             (isc is:integer-set-contents)
             (r (->re #\a c))
             (rr (->re `(concatenation ,r ,r) c))
             (rrr (->re `(concatenation ,r ,rr) c))
             (rrr* (->re `(repetition 0 +inf.0 ,rrr) c)))
            ((isc (char-setR-chars r)) (isc (is:make-range (char->integer #\a))))
            ((->re "" c) e)
            ((->re "asdf" c) (->re `(concatenation #\a #\s #\d #\f) c))
            ((->re r c) r)
            ((->re `(repetition 0 +inf.0 ,r) c) (build-repeat 0 +inf.0 r c))
            ((->re `(repetition 1 +inf.0 ,r) c) (build-repeat 1 +inf.0 r c))
            ((->re `(repetition 0 1 ,r) c) (build-repeat 0 1 r c))
            ((->re `(repetition 0 1 ,rrr*) c) rrr*)
            ((->re `(union (union (char-range #\a #\c)
                                  (char-complement (char-range #\000 #\110)
                                                   (char-range #\112 ,(integer->char max-char-num))))
                           (union (repetition 0 +inf.0 #\2))) c)
             (build-or (list (build-char-set (is:union (is:make-range 73)
                                                       (is:make-range 97 99))
                                             c)
                             (build-repeat 0 +inf.0 (build-char-set (is:make-range 50) c) c))
                       c))
            ((->re `(union ,rr ,rrr) c) (build-or (list rr rrr) c))
            ((->re `(union ,r) c) r)
            ((->re `(union) c) z)
            ((->re `(intersection (intersection #\111 
                                                (char-complement (char-range #\000 #\110)
                                                                 (char-range #\112 ,(integer->char max-char-num))))
                                  (intersection (repetition 0 +inf.0 #\2))) c)
             (build-and (list (build-char-set (is:make-range 73) c)
                              (build-repeat 0 +inf.0 (build-char-set (is:make-range 50) c) c))
                        c))
            ((->re `(intersection (intersection #\000 (char-complement (char-range #\000 #\110)
                                                                       (char-range #\112 ,(integer->char max-char-num))))
                                  (intersection (repetition 0 +inf.0 #\2))) c)
             z)
            ((->re `(intersection ,rr ,rrr) c) (build-and (list rr rrr) c))
            ((->re `(intersection ,r) c) r)
            ((->re `(intersection) c) (build-neg z c))
            ((->re `(complement ,r) c) (build-neg r c))
            ((->re `(concatenation) c) e)
            ((->re `(concatenation ,rrr*) c) rrr*)
            (rr (build-concat r r c))
            ((->re `(concatenation ,r ,rr ,rrr) c)
             (build-concat r (build-concat rr rrr c) c))
            ((isc (char-setR-chars (->re `(char-range #\1 #\1) c))) (isc (is:make-range 49)))
            ((isc (char-setR-chars (->re `(char-range #\1 #\9) c))) (isc (is:make-range 49 57)))
            ((isc (char-setR-chars (->re `(char-range "1" "1") c))) (isc (is:make-range 49)))
            ((isc (char-setR-chars (->re `(char-range "1" "9") c))) (isc (is:make-range 49 57)))
            ((->re `(char-range "9" "1") c) z)
            ((isc (char-setR-chars (->re `(char-complement) c)))
             (isc (char-setR-chars (->re `(char-range #\000 ,(integer->char max-char-num)) c))))
            ((isc (char-setR-chars (->re `(char-complement #\001 (char-range #\002 ,(integer->char max-char-num))) c)))
             (isc (is:make-range 0)))
            )
  
 
