/* parse.c - ACAP protocol parser
 * Rob Earhart
 * $Id: parse.c,v 1.8 1997/11/17 22:47:02 rob Exp $
 */
/***********************************************************
        Copyright 1997 by Carnegie Mellon University

                      All Rights Reserved

Permission to use, copy, modify, and distribute this software and its
documentation for any purpose and without fee is hereby granted,
provided that the above copyright notice appear in all copies and that
both that copyright notice and this permission notice appear in
supporting documentation, and that the name of CMU not be
used in advertising or publicity pertaining to distribution of the
software without specific, written prior permission.

CMU DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
CMU BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
SOFTWARE.
******************************************************************/

#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include "acapint.h"

/* Nothing fancy - just a DFA chomping through states, with a variable
 * amount of lookahead.  Bleach.  OTOH, a good optimizing compiler
 * should be able to do miracles with this... */

/* Oh - I'm not using lex/flex/whatever for this because I really
 * don't want to deal with the broken API and the angst in trying
 * to get the thing to behave in a threaded style and the fact
 * that different versions of lex do things in different ways.
 * I fully support the idea of automated parser generators;
 * there just aren't any good ones.  (TODO :-) */

/* These make the code *much* easier to read - it's good to see
 * the logic behind what's going on without all the manipulation
 * cluttering things up. */

/* Macros - the REAL man's way of inlining code.  :-) */

/* This little piggy jumps to a new state */
#define GO(n, x) {				\
  SETUP(n, x);					\
  goto x;					\
}

/* This one begins error recovery */
#define ERROR() {				\
  GO(0, ERR);					\
}

/* This one sets us up to jump to a new state */
#define SETUP(n, x) {				\
  GOBBLE(n);					\
  conn->state = x;				\
}

/* This is called by the atom parser to indicate that this
 * is a "finalization" command - OK, NO, BAD, or BYE */
#define FINAL(n, t) {				\
  conn->status = t;				\
  GO(n, FINALIZE);				\
}

/* This one's called for data commands - ENTRY, MODTIME, etc. */
#define DAT(n, t) {				\
  conn->datan = -1;				\
  if (! (conn->data = malloc(sizeof(char *))))	\
    ERROR();					\
  conn->data[0] = NULL;				\
  conn->status = t;				\
  GO(n, DATA);					\
}

/* This frees up our data accumulator; useful for error recovery. */
#define FREEDAT() {				\
  if (conn->data) {				\
    acap_String **s;				\
    for (s = conn->data; *s; s++)		\
      free(*s);					\
    free(conn->data);				\
    conn->data = NULL;				\
  }						\
}

/* And this one builds up our accumulator by one. */

#define INCRDAT() {						\
  acap_String **nd;						\
  conn->datan++;						\
  nd = realloc(conn->data, sizeof(char *) * (conn->datan + 2));	\
  if (! nd) ERROR();						\
  conn->data = nd;						\
  conn->data[conn->datan + 1] = NULL;				\
}

#define HAVE(n) (conn->in_pos <= conn->in_len - (n))
#define GOBBLE(n) (conn->in_pos += (n))
#define REMAINING() (conn->in_pos - conn->in_len)
#define OCTET(n) (conn->incoming[conn->in_pos + (n)])

enum states {
  TAG = 0,
  LITBYTES,
  LITERAL,
  ERR,
  DATA,
  QSTRING,
  FINALIZE,
  ATOM
};

static void FinalCallback(acap_Connection conn, char *info) {
  if (conn->cb) {
    acap_CmdCallback *final = conn->cb->final;
    void *finalData = conn->cb->finalData;
    free(conn->cb);
    conn->cb = NULL;
    final(conn, finalData, conn->status, info);
  } else
    free(info);			/* XXX */
}

static void DataCallback(acap_Connection conn) {
  if (conn->cb) {
    acap_String **data = conn->data;
    conn->data = NULL;		/* it's now the callback's data */
    conn->cb->data(conn, conn->cb->dataData, conn->status, data);
  } else {
    printf("Unexpected data callback %d triggered (XXX)\n",
	   conn->status);
  }
}

static int parse(acap_Connection conn) {
  /* mmmm, DFA... */
  while (1)
    switch (conn->state) {
    TAG:
    case TAG:
      {
	/* Attempt to slurp a tag */
	char *start, *p;
	if (! HAVE(1)) return 0;
	if (OCTET(0) == '*')
	  if (! HAVE(2)) return 0;
	  else {
	    if (OCTET(1) == ' ') {
	      conn->cb = NULL;
	      GO(2, ATOM);
	    } else
	      ERROR();
	  }
	if (OCTET(0) == '+')
	  if (! HAVE(2)) return 0;
	  else {
	    if (OCTET(1) == ' ') {
	      conn->cb = conn->continuation;
	      DAT(1, ACAP_CONTINUATION);
	    } else
	      ERROR();
	  }
	start = &OCTET(0);
	for (p = start; HAVE(p - start + 1) && *p != ' '; p++)
	  if (! isxdigit(*p) && *p != 'x')
	    ERROR();
	if (*p != ' ')
	  return 0;
	*p = '\0';
	conn->cb = (void *)strtoul(start, &p, 16);
	if (*p) ERROR();
	GO(p - start + 1, ATOM);
      }
    LITBYTES:
    case LITBYTES:
      {
	if (HAVE(conn->lit_size)) {
	  if (conn->data) {
	    memcpy(conn->data[conn->datan]->str + conn->lit_off,
		   &OCTET(0),
		   conn->lit_size);
	    conn->data[conn->datan]->str[conn->lit_off + conn->lit_size] = '\0';
	    GO(conn->lit_size, DATA);
	  } else
	    GO(conn->lit_size, ERR);
	}
	if (conn->data) {
	    memcpy(conn->data[conn->datan]->str + conn->lit_off,
		   &OCTET(0),
		   REMAINING());
	  conn->lit_off += REMAINING();
	}
	conn->lit_size -= REMAINING();
	GOBBLE(REMAINING());
	return 0;
      }
    LITERAL:
    case LITERAL:
      {
	while (HAVE(3)) {
	  if (OCTET(0) == '}' && OCTET(1) == '\r' && OCTET(2) == '\n') {
	    if (conn->data) {
	      if (! (conn->data[conn->datan] = malloc(sizeof(acap_String) + conn->lit_size))) {
		FREEDAT();	/* we're now in error recovery */
	      } else {
		conn->data[conn->datan]->length = conn->lit_size;
	      }
	      conn->lit_off = 0;
	    }
	    GO(3, LITBYTES);
	  }
	  if (! isdigit(OCTET(0)))
	    ERROR();
	  /* XXX We should probably do an overflow check here.
	   * Although I'm really not sure what we'd do in that case -
	   * close the connection because it wouldn't even be worth
	   * trying to slurp that much data across?
	   * Good thing overflow checks are SO easy to do in C... grn. */
	  conn->lit_size *= 10;
	  conn->lit_size += OCTET(0) - '0';
	  GOBBLE(1);
	}
	return 0;		/* not enough data to decide what to do */
      }
    ERR:
    case ERR:
      {
	FREEDAT();
	while(HAVE(2)) {
	  if (OCTET(0) == '{' && isdigit(OCTET(1))) {
	    conn->lit_size = 0;
	    GO(1, LITERAL);
	  }
	  if (conn->incoming[conn->in_pos] == '\r'
	      && conn->incoming[conn->in_pos + 1] == '\n') {
	    GO(2, TAG);
	  }
	  GOBBLE(1);
	}
	return 0;
      }
    DATA:
    case DATA:
      /* We're expecting some number of strings.
       * Hrm. */
      /* XXX We *really* need a better way to get data to callbacks -
       * throwing away parens like this is... bothersome. */
      while(HAVE(1) && OCTET(0) == ')')
	GOBBLE(1);

      if (! HAVE(2)) {
	return 0;
      } else if (OCTET(0) == '\r' && OCTET(1) == '\n') {
	/* End of data */
	SETUP(2, TAG);
	DataCallback(conn);
	GO(0, TAG);
      } else if (! HAVE(3)) {
	return 0;
      } else if (OCTET(0) == ' ' && OCTET(1) == '{'
		 || OCTET(0) == ' ' && OCTET(1) == '(' && OCTET(2) == '{') {
	/* Handle a literal */
	INCRDAT();
	conn->lit_size = 0;
	GO((OCTET(1) == '(' ? 3 : 2), LITERAL);
      } else if (OCTET(0) == ' ' && OCTET(1) == '"'
		 || OCTET(0) == ' ' && OCTET(1) == '(' && OCTET(2) == '"') {
	/* Handle a quoted string */
	INCRDAT();
	conn->data[conn->datan] = malloc(sizeof(acap_String) + ACAP_MAX_QSTRING_SIZE);
	if (! conn->data[conn->datan])
	  ERROR();
	conn->data[conn->datan]->length = 0;
	GO((OCTET(1) == '(' ? 3 : 2), QSTRING);
      } else
	ERROR();
    QSTRING:
    case QSTRING:
      {
	char c;
	while(HAVE(1)) {
	  if (OCTET(0) == '\\') {
	    if (! HAVE(2)) return 0;
	    c = OCTET(1);
	    GOBBLE(2);
	  } else if (OCTET(0) == '\"') {
	    conn->data[conn->datan]->str[conn->data[conn->datan]->length] = '\0';
	    GO(1, DATA);
	  } else {
	    c = OCTET(0);
	    GOBBLE(1);
	  }
	  if (ACAP_MAX_QSTRING_SIZE <= conn->data[conn->datan]->length)
	    ERROR();
	  conn->data[conn->datan]->str[conn->data[conn->datan]->length++] = c;
	}
	return 0;
      }
    FINALIZE:
    case FINALIZE:
      {
	/* Slurp up everything until the next \r\n */
	int count;
	for (count = 0; HAVE(count + 2); count++)
	  if (OCTET(count) == '\r'
	      && OCTET(count + 1) == '\n') {
	    char *data = malloc(count+1);
	    if (data) {
	      strncpy(data, &OCTET(0), count);
	      data[count] = '\0';
	    }
	    SETUP(count + 2, TAG);
	    if (! data) return -1;
	    FinalCallback(conn, data);
	    return 0;		/* in case we need to start decrypting */
	  }
	return 0;		/* didn't have the whole line */
      }
    ATOM:
    case ATOM:
      {
	/* Hrm.  Okay, we want to attempt to make a decision
	 * as to what we're doing, or return 0 if there
	 * isn't enough data to decide.
	 * This is fairly nasty, but ought to optimize well.
	 * Thank god for forward-sexp and backward-sexp...
	 *
	 * Theoretically, we could also do all this with states
	 * in our main DFA, too, except that things would be even
	 * more cluttered (if that's even concievable) and
	 * it'd be harder to add ASTRINGs later if it turns out that
	 * someday we need them. */
	if (! HAVE(1)) return 0;
	/* There's at least one character. */
	if (OCTET(0) == 'A' || OCTET(0) == 'a') {
	  /* Looking for ACAP */
	  if (! HAVE(2)) return 0;
	  if (OCTET(1) == 'C' || OCTET(1) == 'c') {
	    if (! HAVE(3)) return 0;
	    if (OCTET(2) == 'A' || OCTET(2) == 'a') {
	      if (! HAVE(4)) return 0;
	      if (OCTET(3) == 'P' || OCTET(3) == 'p') {
		if (! HAVE(5)) return 0;
		if (OCTET(4) == ' ')
		  FINAL(5, ACAP_ACAP);
	      }
	    }
	  }
	} else if (OCTET(0) == 'B' || OCTET(0) == 'b') {
	  /* Looking for BAD or BYE */
	  if (! HAVE(2)) return 0;
	  if (OCTET(1) == 'A' || OCTET(1) == 'a') {
	    /* Looking for BAD */
	    if (! HAVE(3)) return 0;
	    if (OCTET(2) == 'D' || OCTET(2) == 'd') {
	      if (! HAVE(4)) return 0;
	      if (OCTET(3) == ' ')
		FINAL(4, ACAP_BAD);
	    }
	  } else if (OCTET(1) == 'Y' || OCTET(1) == 'y') {
	    /* Looking for BYE */
	    if (! HAVE(3)) return 0;
	    if (OCTET(2) == 'E' || OCTET(2) == 'e') {
	      if (! HAVE(4)) return 0;
	      if (OCTET(3) == ' ')
		FINAL(4, ACAP_BYE);
	    }
	  }
	} else if (OCTET(0) == 'E' || OCTET(0) == 'e') {
	  /* Looking for ENTRY */
	  if (! HAVE(2)) return 0;
	  if (OCTET(1) == 'N' || OCTET(1) == 'n') {
	    if (! HAVE(3)) return 0;
	    if (OCTET(2) == 'T' || OCTET(2) == 't') {
	      if (! HAVE(4)) return 0;
	      if (OCTET(3) == 'R' || OCTET(3) == 'r') {
		if (! HAVE(5)) return 0;
		if (OCTET(4) == 'Y' || OCTET(4) == 'y') {
		  if (! HAVE(6)) return 0;
		  if (OCTET(5) == ' ')
		    DAT(5, ACAP_RESPONSE_ENTRY);
		}
	      }
	    }
	  }
	} else if (OCTET(0) == 'O' || OCTET(0) == 'o') {
	  /* Looking for OK */
	  if (! HAVE(2)) return 0;
	  if (OCTET(1) == 'K' || OCTET(1) == 'k') {
	    if (! HAVE(3)) return 0;
	    if (OCTET(2) == ' ')
	      FINAL(3, ACAP_OK);
	  }
	} else if (OCTET(0) == 'M' || OCTET(0) == 'm') {
	  /* Looking for MODTIME */
	  if (! HAVE(2)) return 0;
	  if (OCTET(1) == 'O' || OCTET(1) == 'o') {
	    if (! HAVE(3)) return 0;
	    if (OCTET(2) == 'D' || OCTET(2) == 'd') {
	      if (! HAVE(4)) return 0;
	      if (OCTET(3) == 'T' || OCTET(3) == 't') {
		if (! HAVE(5)) return 0;
		if (OCTET(4) == 'I' || OCTET(4) == 'i') {
		  if (! HAVE(6)) return 0;
		  if (OCTET(5) == 'M' || OCTET(5) == 'm') {
		    if (! HAVE(7)) return 0;
		    if (OCTET(6) == 'E' || OCTET(6) == 'e') {
		      if (! HAVE(8)) return 0;
		      if (OCTET(7) == ' ')
			DAT(7, ACAP_RESPONSE_MODTIME);
		    }
		  }
		}
	      }
	    }
	  }
	} else if (OCTET(0) == 'N' || OCTET(0) == 'n') {
	  /* Looking for NO */
	  if (! HAVE(2)) return 0;
	  if (OCTET(1) == 'O' || OCTET(1) == 'o') {
	    if (! HAVE(3)) return 0;
	    if (OCTET(2) == ' ')
	      FINAL(3, ACAP_NO);
	  }
	}
	/* If we reached this point, it's because we didn't match
	 * at a place where we had data.  Since we're not even
	 * attempting to deal with astrings, we just ERROR(). */
	ERROR();
      }
    }
}

int acap_Parse(acap_Connection conn) {
  if (! conn->isopen) return 0;
  while(1) {
    /* parse as much as possible */
    int decrypting = conn->decryptproc ? 1 : 0;
    while (conn->in_len - conn->in_pos) {
      int res = parse(conn);
      if (res) return res;
      if (! decrypting && conn->decryptproc)
	break;
    }

    /* save current contents */
    if (conn->in_pos)
      memmove(conn->incoming,
	      conn->incoming + conn->in_pos,
	      conn->sec_len - conn->in_pos);
    conn->in_len -= conn->in_pos;
    conn->sec_pos -= conn->in_pos;
    conn->sec_len -= conn->in_pos;
    conn->in_pos = 0;

    /* read as much as possible */
    {
      size_t n_read = 
	(conn->readproc)(conn->clientdata,
			 conn->incoming + conn->sec_len,
			 ACAP_DATA_BUF_SIZE - conn->sec_len);
      if (! n_read) {
	conn->isopen = 0;
	return 0;
      }
      conn->sec_len += n_read;
    }
    /* ... eventually, we'll need to decrypt.
     * We don't for now, though... XXX */
    conn->in_len = conn->sec_pos = conn->sec_len;
  }
}
