;;;; common.lisp -- efficient implementations of mod32 arithmetic and macros

;;; Functions in this file are intended to be fast
(declaim (optimize (speed 3) (safety 0) (space 0) (debug 0)))

(in-package :crypto)

(defmacro defconst (name value)
  `(defconstant ,name
    (if (boundp ',name)
        (symbol-value ',name)
        ,value)))

;;; CMUCL and SBCL both have an internal type for this, but we'd like to
;;; be portable, so we define our own.

(deftype index () '(mod #.array-dimension-limit))
(deftype index+1 () `(mod ,(1+ array-dimension-limit)))


;;; extracting individual bytes from integers

;;; We used to declare these functions with much stricter types (e.g.
;;; (UNSIGNED-BYTE 32) as the lone argument), but we need to access
;;; bytes of both 32-bit and 64-bit words and the types would just get
;;; in our way.  We declare these functions as inline; a good Common
;;; Lisp compiler should be able to generate efficient code from the
;;; declarations at the point of the call.

;;; These functions are named according to big-endian conventions.  The
;;; comment is here because I always forget and need to be reminded.
#.(loop for i from 1 to 8
        collect (let ((name (intern (format nil "~:@(~:R~)-BYTE" i))))
                  `(progn
                    (declaim (inline ,name))
                    (declaim (ftype (function (unsigned-byte) (unsigned-byte 8)) ,name))
                    (defun ,name (ub)
                      (declare (type unsigned-byte ub))
                      (ldb (byte 8 ,(* 8 (1- i))) ub)))) into forms
        finally (return `(progn ,@forms)))


;;; fetching/storing appropriately-sized integers from octet vectors

(macrolet ((define-fetcher (bitsize &optional big-endian)
             (let ((name (intern (format nil "FETCH-UB~D-~:[LE~;BE~]"
                                         bitsize big-endian)))
                   (bytes (truncate bitsize 8)))
               `(progn
                 (declaim (inline ,name))
                 (defun ,name (buffer index)
                   (declare (type (simple-array (unsigned-byte 8) (*)) buffer))
                   (declare (type (integer 0 ,(- array-dimension-limit bytes)) index))
                   (logand ,(1- (ash 1 bitsize))
                           ,(loop for i from 0 below bytes
                                  collect (let* ((offset (if big-endian
                                                             i
                                                             (- bytes i 1)))
                                                 (shift (if big-endian
                                                            (* (- bytes i 1) 8)
                                                            (* offset 8))))
                                            `(ash (aref buffer (+ index ,offset)) ,shift)) into forms
                                  finally (return `(logior ,@forms))))))))
           (define-storer (bitsize &optional big-endian)
             (let ((name (intern (format nil "STORE-UB~D-~:[LE~;BE~]"
                                         bitsize big-endian)))
                   (bytes (truncate bitsize 8)))
               `(progn
                 (declaim (inline ,name))
                 (defun ,name (buffer index value)
                   (declare (type (simple-array (unsigned-byte 8) (*)) buffer))
                   (declare (type (integer 0 ,(- array-dimension-limit bytes)) index))
                   (declare (type (unsigned-byte ,bitsize) value))
                   ,@(loop for i from 1 to bytes
                           collect (let ((offset (if big-endian
                                                     (- bytes i)
                                                     (1- i))))
                                     `(setf (aref buffer (+ index ,offset))
                                       (,(intern (format nil "~:@(~:R~)-BYTE" i)) value))))
                   (values)))))
           (define-fetchers-and-storers (bitsize)
             `(progn
               (define-fetcher ,bitsize) (define-fetcher ,bitsize t)
               (define-storer ,bitsize) (define-storer ,bitsize t))))
  (define-fetchers-and-storers 16)
  (define-fetchers-and-storers 32)
  (define-fetchers-and-storers 64))

(defmacro with-words (((&rest word-vars) array initial-offset
                       &key (size 4) (big-endian t))
                      &body body)
  (let ((fetch-sym (intern (format nil "FETCH-UB~D-~:[LE~;BE~]"
                                   (* size 8) big-endian)))
        (store-sym (intern (format nil "STORE-UB~D-~:[LE~;BE~]"
                                   (* size 8) big-endian))))
    (loop for word-var in word-vars
          for offset from 0 by size
          collect `(,word-var (,fetch-sym ,array (+ ,initial-offset ,offset)))
          into let-bindings
          finally (return `(macrolet ((store-words (buffer buffer-offset &rest word-vars)
                                       (loop for word-var in word-vars
                                             for offset from 0 by ,size
                                             collect `(,',store-sym ,buffer (+ ,buffer-offset ,offset) ,word-var)
                                             into stores
                                             finally (return `(progn ,@stores))))
                                      (store-ub32 (buffer buffer-offset word)
                                       `(,',store-sym ,buffer ,buffer-offset ,word)))
                            (let ,let-bindings
                              (declare (type (unsigned-byte ,(* size 8)) ,@word-vars))
                              ,@body))))))


;;; efficient 32-bit arithmetic, which a lot of algorithms require

(declaim (inline mod32+)
	 (ftype (function ((unsigned-byte 32) (unsigned-byte 32)) (unsigned-byte 32)) mod32+))
(defun mod32+ (a b)
  (declare (type (unsigned-byte 32) a b))
  (ldb (byte 32 0) (+ a b)))

#+cmu
(define-compiler-macro mod32+ (a b)
  `(ext:truly-the (unsigned-byte 32) (+ ,a ,b)))

#+sbcl
(define-compiler-macro mod32+ (a b)
  `(ldb (byte 32 0) (+ ,a ,b)))

;;; mostly needed for CAST*
(declaim (inline mod32-)
         (ftype (function ((unsigned-byte 32) (unsigned-byte 32)) (unsigned-byte 32))))

(defun mod32- (a b)
  (declare (type (unsigned-byte 32) a b))
  (ldb (byte 32 0) (- a b)))

#+cmu
(define-compiler-macro mod32- (a b)
  `(ext:truly-the (unsigned-byte 32) (- ,a ,b)))

#+sbcl
(define-compiler-macro mod32- (a b)
  `(ldb (byte 32 0) (- ,a ,b)))

;;; mostly needed for IDEA
(declaim (inline mod32*)
         (ftype (function ((unsigned-byte 16) (unsigned-byte 16)) (unsigned-byte 32))))

(defun mod32* (a b)
  (declare (type (unsigned-byte 16) a b))
  (ldb (byte 32 0) (* a b)))

#+cmu
(define-compiler-macro mod32* (a b)
  `(ext:truly-the (unsigned-byte 32) (* ,a ,b)))

#+sbcl
(define-compiler-macro mod32* (a b)
  `(ldb (byte 32 0) (* ,a ,b)))

(declaim (inline mod32ash)
         (ftype (function ((unsigned-byte 32) (integer -31 31)) (unsigned-byte 32)) mod32ash))

(defun mod32ash (num count)
  (declare (type (unsigned-byte 32) num))
  (declare (type (integer -31 31) count))
  (ldb (byte 32 0) (ash num count)))

#+sbcl
(define-compiler-macro mod32ash (num count)
  ;; work around SBCL optimizing bug as described by APD:
  ;;  http://www.caddr.com/macho/archives/sbcl-devel/2004-8/3877.html
  `(logand #xffffffff (ash ,num ,count)))

(declaim (inline mod32lognot)
         (ftype (function ((unsigned-byte 32)) (unsigned-byte 32))))

(defun mod32lognot (num)
  (ldb (byte 32 0) (lognot num)))

#+sbcl
(define-compiler-macro mod32lognot (num)
  `(ldb (byte 32 0) (lognot ,num)))

(declaim (inline rol32 ror32)
	 (ftype (function ((unsigned-byte 32) (unsigned-byte 5)) (unsigned-byte 32)) rol32 ror32))

(defun rol32 (a s)
  (declare (type (unsigned-byte 32) a) (type (unsigned-byte 5) s))
  #+cmu
  (kernel:32bit-logical-or #+little-endian (kernel:shift-towards-end a s)
			   #+big-endian (kernel:shift-towards-start a s)
			   (ash a (- s 32)))
  #+sbcl
  (sb-rotate-byte:rotate-byte s (byte 32 0) a)
  #-(or sbcl cmu)
  (logior (ldb (byte 32 0) (ash a s)) (ash a (- s 32))))

(defun ror32 (a s)
  (declare (type (unsigned-byte 32) a) (type (unsigned-byte 5) s))
  #+sbcl
  (sb-rotate-byte:rotate-byte (- s) (byte 32 0) a)
  #-sbcl
  (rol32 a (- 32 s)))


;;; efficient 8-byte -> 32-byte buffer copy routines, mostly used by
;;; the hash functions.  we provide big-endian and little-endian
;;; versions.

(declaim (inline fill-block-le-ub8 fill-block-be-ub8))

(declaim (inline copy-to-buffer))
(defun copy-to-buffer (from from-offset count buffer buffer-offset)
  "Copy a partial segment from input vector from starting at
from-offset and copying count elements into the 64 byte buffer
starting at buffer-offset."
  (declare (type (unsigned-byte 29) from-offset)
	   (type (integer 0 63) count buffer-offset)
	   (type (simple-array (unsigned-byte 8) (*)) from)
	   (type (simple-array (unsigned-byte 8) (64)) buffer))
  #+cmu
  (kernel:bit-bash-copy
   from (+ (* vm:vector-data-offset vm:word-bits) (* from-offset vm:byte-bits))
   buffer (+ (* vm:vector-data-offset vm:word-bits)
	     (* buffer-offset vm:byte-bits))
   (* count vm:byte-bits))
  #+sbcl
  (sb-kernel:ub8-bash-copy from from-offset buffer buffer-offset count)
  #-(or cmu sbcl)
  (loop for buffer-index of-type (integer 0 64) from buffer-offset
        for from-index of-type fixnum from from-offset
        below (+ from-offset count)
        do
        (setf (aref buffer buffer-index) (aref from from-index))))

(defun fill-block-ub8-le (block buffer offset)
  "Convert a complete 64 (UNSIGNED-BYTE 8) input BUFFER starting from
OFFSET into the given (UNSIGNED-BYTE 32) BLOCK."
  (declare (type (integer 0 #.(- array-dimension-limit 64)) offset)
	   (type (simple-array (unsigned-byte 32) (16)) block)
	   (type (simple-array (unsigned-byte 8) (*)) buffer))
  #+(and :cmu :little-endian)
  (kernel:bit-bash-copy
   buffer (+ (* vm:vector-data-offset vm:word-bits) (* offset vm:byte-bits))
   block (* vm:vector-data-offset vm:word-bits)
   (* 64 vm:byte-bits))
  #+(and :sbcl :little-endian)
  (sb-kernel:ub8-bash-copy buffer offset block 0 64)
  #-(or (and :sbcl :little-endian) (and :cmu :little-endian))
  (loop for i of-type (integer 0 16) from 0
	for j of-type (integer 0 #.array-dimension-limit)
	from offset to (+ offset 63) by 4
	do
	(setf (aref block i) (fetch-ub32-le buffer j))))

(defun fill-block-ub8-be (block buffer offset)
  "Convert a complete 64 (unsigned-byte 8) input vector segment
starting from offset into the given 16 word SHA1 block.  Calling this function
without subsequently calling EXPAND-BLOCK results in undefined behavior."
  (declare (type (integer 0 #.(- array-dimension-limit 64)) offset)
	   (type (simple-array (unsigned-byte 32) (80)) block)
	   (type (simple-array (unsigned-byte 8) (*)) buffer))
  ;; convert to 32-bit words
  #+(and :cmu :big-endian)
  (kernel:bit-bash-copy
   buffer (+ (* vm:vector-data-offset vm:word-bits)
             (* offset vm:byte-bits))
   block (* vm:vector-data-offset vm:word-bits)
   (* 64 vm:byte-bits))
  #+(and :sbcl :big-endian)
  (sb-kernel:ub8-bash-copy buffer offset block 0 64)
  #-(or (and :sbcl :big-endian) (and :cmu :big-endian))
  (loop for i of-type (integer 0 16) from 0
        for j of-type (integer 0 #.array-dimension-limit)
        from offset to (+ offset 63) by 4
        do (setf (aref block i) (fetch-ub32-be buffer j))))

;;; a few functions that are useful during compilation

(defun make-circular-list (&rest elements)
  (let ((list (copy-seq elements)))
    (setf (cdr (last list)) list)))

;;; SUBSEQ is defined to error on circular lists, so we define our own
(defun circular-list-subseq (list start end)
  (let* ((length (- end start))
         (subseq (make-list length)))
    (do ((i 0 (1+ i))
         (list (nthcdr start list) (cdr list))
         (xsubseq subseq (cdr xsubseq)))
        ((>= i length) subseq)
      (setf (first xsubseq) (first list)))))