/*
 * regular expression pattern matching routines < rege.c >
 *
 * 正規表現によるパターンマッチングルーチン
 *
 * Copyright 1996,1998 (c) Taiji Yamada, AIHARA Electrical Engineering Co.,Ltd.
 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "rege.h"

/*
 * エラー処理
 */
static char *rege_errmsg[] = {
  "Success",
  "No match",
  "Invalid regular expression",
  "Not implemented",
  "Invalid character class name",
  "Trailing escape sequence",
  "Invalid back reference",
  "Unmatched left bracket",
  "Unmatched left parenthesis",
  "Unmatched left brace",
  "Invalid contents of braces",
  "Invalid range end",
  "Memory exhausted",
  "Invalid preceding regular expression for repetition",
  "Premature end of regular expression",
  "Regular expression too big",
  "Unmatched right parenthesis",
};
char *rege_error(rege_errcode_t errcode)
{
  return rege_errmsg[errcode];
}

#ifndef min
#define min(a,b) (((a)<(b))?(a):(b))
#endif
#ifndef max
#define max(a,b) (((a)>(b))?(a):(b))
#endif
#ifndef isodigit
#define isodigit(o) ('0'<=(o)&&(o)<='7')
#endif
#ifndef ctod
#define ctod(d) (isdigit(d)?((d)-'0'):0)
#endif
#ifndef ctox
#define ctox(x) (isdigit(x)?((x)-'0'):isupper(x)?((x)-'A'+10):((x)-'a'+10))
#endif
#ifndef ctoo
#define ctoo(o) (isdigit(o)?((o)-'0'):0)
#endif
#ifndef ctoC
#define ctoC(c) (isupper(c)?((c)-'A'+1):0)
#endif

/*
 * メモリ確保と解放
 */
#ifdef DEBUG
static size_t used_size = 0;
#endif
static void *rege_malloc(size_t size)
{
  void *p;

  if ((p=malloc(size)) == NULL)
    return NULL;
  memset(p, 0, size);
#ifdef DEBUG
  used_size += size;
#endif
  return p;
}

#ifdef DEBUG
#define rege_free_size(p,size) \
do {\
  used_size -= size;\
  free(p);\
} while(0)
#else
#define rege_free_size(p,size) free(p)
#endif


/*
 * 正規表現をパースして構文木を生成する
 */

/*
 * トークンを表す型
 */
typedef enum {
  TOKEN_CHAR,                   /* 通常の文字                 */
  TOKEN_GROUP_OPEN,             /* 正規表現のグループ化の開始 */
  TOKEN_GROUP_CLOSE,            /* 正規表現のグループ化の終了 */
  TOKEN_INTERVAL_ZERO_MORE,     /* 任意の数(0を含む)の並び    */
  TOKEN_INTERVAL_ONE_MORE,      /* 1回以上の並び              */
  TOKEN_INTERVAL_ZERO_ONE,      /* 0回か1回の並び             */
  TOKEN_INTERVAL_OPEN,          /* 繰り返しの指定開始         */
  TOKEN_INTERVAL_CLOSE,         /* 繰り返しの指定終了         */
  TOKEN_ANY_CHAR,               /* 改行を除く任意の1文字      */
  TOKEN_LIST_OPEN,              /* 文字リストの指定開始       */
  TOKEN_LIST_CLOSE,             /* 文字リストの指定終了       */
  TOKEN_LINE_BEGIN,             /* 行の先頭                   */
  TOKEN_LINE_END,               /* 行の末尾                   */
  TOKEN_ALTERNATION,            /* 選択                       */
  TOKEN_META_ESCAPE,            /* メタエスケープ             */
  TOKEN_END,                    /* トークンの終了             */
  TOKEN_LIST_NOT,               /* 文字リストの否定指定       */
  TOKEN_LIST_RANGE,             /* 文字リストの範囲指定       */
  TOKEN_LIST_ESCAPE,            /* 文字リストのメタエスケープ */
  TOKEN_INTERVAL_RANGE,         /* 繰り返しの範囲指定         */
  TOKEN_XCHAR_CLASS_OPEN,       /* 拡張キャラクタクラスの開始 */ /* ! */
  TOKEN_XCHAR_CLASS_CLOSE,      /* 拡張キャラクタクラスの終了 */ /* ! */
} token_t;
static char *token_chars[] = {
  "c",
#if (VERSION_DOWN == 1)
  "(", ")", "*", "+", "",  "",  "",  "",  "",  "",  "",  "",  "|", "",
#else
  "(", ")", "*", "+", "?", "{", "}", ".", "[", "]", "^", "$", "|", "\\",
#endif
  "",
  "^", "-", "\\", ",", "[:", ":]",
};
#define TOKEN_CHARS ((parser->token_chars)?parser->token_chars:token_chars)

#define CHAR_SPACE_SIZE 256 /* 文字空間のサイズ */
#if (VERSION_DOWN == 1)
#else
/*
 * 任意の一文字
 */
static char any_char[] = "^\n";
#define ANY_CHAR ((parser->any_char)?parser->any_char:any_char)

/*
 * エスケープシーケンスの種類を表す型
 */
typedef enum {
  ESCS_CTRL_CHAR,       /* 制御文字 */
  ESCS_CHAR_CLASS,      /* 文字クラス */
  ESCS_CHAR_CODE,       /* 文字コード */
  ESCS_SPEC_CHAR,       /* 特殊メタキャラクタ */
  ESCS_LITERAL_CHAR,    /* リテラル文字 */
} escs_kind_t;
typedef struct {
  escs_kind_t kind;     /* エスケープシーケンスの種類 */
  char i;               /* エスケープシーケンスの識別子 */
  char *desc;           /* desc は、
                           kind = ESCS_CTRL_CHAR の時、制御文字を表す文字
                           kind = ESCS_CHAR_CLASS の時、文字クラスを表す文字列
                           kind = ESCS_CHAR_CODE の時、文字コードを表す正規表現
                           kind = ESCS_SPEC_CHAR の時、意味なし
                           kind = ESCS_LITERAL_CHAR の時、意味なし */
} escs_t;
/*
 * エスケープシーケンスを表す型
 */
typedef enum {
  CTRL_BELL,            /* 警告文字 */
  CTRL_BS,              /* バックスペース */
  CTRL_FF,              /* フォームフィード */
  CTRL_CR,              /* 改行 */
  CTRL_LF,              /* 復帰 */
  CTRL_HTAB,            /* 水平タブ */
  CTRL_VTAB,            /* 垂直タブ */
  CLASS_SPACE,          /* 空白文字「\t\n\f\r 」 */
  CLASS_NOT_SPACE,      /* 空白文字以外 */
  CLASS_WORD,           /* 英数字と「_」 */             /* ! */
  CLASS_NOT_WORD,       /* 英数字,「_」以外の文字 */
  CLASS_DIGIT,          /* 数字 */
  CLASS_NOT_DIGIT,      /* 非数字 */
  CODE_CTRL,            /* 英字によるコントロール文字 */
  CODE_XDIGIT,          /* 2桁の16進数による文字 */
  CODE_ODIGIT,          /* 3桁の8進数による文字 */
  CODE_NULL,            /* ナル文字 */                  /* ! */
  BACK_REFERENCE,       /* 第nグループの内容 */         /* ! */
  NULL_WORD_BOUNDARY,   /* 単語境界の空文字 */          /* ! */
  NULL_WITHIN_WORD,     /* 単語境界以外の空文字 */      /* ! */
  NULL_WORD_OPEN,       /* 単語開始位置の空文字 */      /* ! */
  NULL_WORD_CLOSE,      /* 単語終了位置の空文字 */      /* ! */
  LITERAL_CHAR,         /* 任意のリテラル文字 */
} escs_set_t;
static escs_t escs_set[] = {
  { ESCS_CTRL_CHAR,    'a',  "\a",             },
  { ESCS_CTRL_CHAR,    'b',  "\b",             },
  { ESCS_CTRL_CHAR,    'f',  "\f",             },
  { ESCS_CTRL_CHAR,    'n',  "\n",             },
  { ESCS_CTRL_CHAR,    'r',  "\r",             },
  { ESCS_CTRL_CHAR,    't',  "\t",             },
  { ESCS_CTRL_CHAR,    'v',  "\v",             },
  { ESCS_CHAR_CLASS,   's',  "\t\n\f\r ",      },
  { ESCS_CHAR_CLASS,   'S',  "^\t\n\f\r ",     },
  { ESCS_CHAR_CLASS,   'w',  "0-9A-Z_a-z",     },
  { ESCS_CHAR_CLASS,   'W',  "^0-9A-Z_a-z",    },
  { ESCS_CHAR_CLASS,   'd',  "0-9",            },
  { ESCS_CHAR_CLASS,   'D',  "^0-9",           },
  { ESCS_CHAR_CODE,    'c',  "[A-Z]",          },
  { ESCS_CHAR_CODE,    'x',  "[0-9A-Fa-f]{2}", },
  { ESCS_CHAR_CODE,    '\0', "[0-8]{3}",       },
  { ESCS_CHAR_CODE,    '0',  "\0",             },
  { ESCS_SPEC_CHAR,    '\0', "",               },
  { ESCS_SPEC_CHAR,    'b',  "",               },
  { ESCS_SPEC_CHAR,    'B',  "",               },
  { ESCS_SPEC_CHAR,    '<',  "",               },
  { ESCS_SPEC_CHAR,    '>',  "",               },
  { ESCS_LITERAL_CHAR, '\0', "",               },
};

/*
 * 拡張キャラクタクラスの種類を表す型
 */
typedef enum {
  XCHAR_CLASS_ALNUM,    /* アルファベットと数字 */
  XCHAR_CLASS_ALPHA,    /* アルファベット */
  XCHAR_CLASS_CNTRL,    /* 制御文字 */
  XCHAR_CLASS_DIGIT,    /* 数字 */
  XCHAR_CLASS_GRAPH,    /* space 以外の print */
  XCHAR_CLASS_LOWER,    /* 小文字のアルファベット */
  XCHAR_CLASS_PRINT,    /* 印字可能文字(ASCII なら空白〜チルダ) */
  XCHAR_CLASS_PUNCT,    /* cntrl, alnum 以外の文字 */
  XCHAR_CLASS_SPACE,    /* フォームフィード、改行、復帰、水平タブ、空白 */
  XCHAR_CLASS_UPPER,    /* 大文字のアルファベット */
  XCHAR_CLASS_XDIGIT,   /* 16 進の数字 0-9, a-f, A-F */
} xchar_set_t;
typedef struct {
  char *name;
  int (*is)(int);
} xchar_t;
/*
 * 拡張キャラクタクラスを表す型
 */
xchar_t xchars[] = {
  { "alnum",    isalnum, },
  { "alpha",    isalpha, },
  { "cntrl",    iscntrl, },
  { "digit",    isdigit, },
  { "graph",    isgraph, },
  { "lower",    islower, },
  { "print",    isprint, },
  { "punct",    ispunct, },
  { "space",    isspace, },
  { "upper",    isupper, },
  { "xdigit",   isxdigit, },
};
#endif

/*
 * ノードの操作を表す型
 */
typedef enum {
  OP_CHAR,              /* 文字そのもの */
#if (VERSION_DOWN == 1)
#else
  OP_HEAD,              /* 行頭 */
  OP_TAIL,              /* 行末 */
#endif
  OP_CONCAT,            /* XY */
  OP_UNION,             /* X|Y */
  OP_CLOSURE,           /* X* */
  OP_EMPTY,             /* 空 */
} op_t;

/*
 * 構文木のノードを表す型
 */
typedef struct tree {
  op_t op;
  union {
    char c;
    struct {
      struct tree *left;
      struct tree *right;
    } x;
  } u;
} tree_t;

/*
 * パーサを表す型
 */
typedef struct {
  rege_syntax_t syntax;         /* 構文規則 */
  rege_errcode_t errcode;       /* エラーコード */
  char *trans_table;            /* 変換表 */
  tree_t *tree;                 /* 構文木 */
  token_t current_token;        /* 処理中のトークン */
  char token_char;              /* 文字値 */
  char *p;                      /* 解析する文字列へのポインタ */
#if (VERSION_DOWN == 1)
#else
  char *token_char_set;         /* 文字集合値 */
  char user_char_set[CHAR_SPACE_SIZE]; /* ユーザー文字集合値 */
  int intervals_open, intervals_close; /* 繰り返し範囲 */
#endif
  char **token_chars;           /* カスタマイズ可能なトークン */
  char *any_char;               /* カスタマイズ可能な任意の一文字 */
} rege_parser_t;

/*
 * トークンを一つ読み込む
 */
static void get_token(rege_parser_t *parser)
{
  token_t i;
  char *c;

#if (VERSION_DOWN == 1)
  parser->current_token = TOKEN_CHAR;
  for (i=TOKEN_CHAR+1; i<TOKEN_END; i++) {
    int l;
    if (*parser->p == '\0') {
      parser->current_token = TOKEN_END;
      return;
    }
    if ((l=strlen(TOKEN_CHARS[i])) && strncmp(parser->p,TOKEN_CHARS[i],l)==0) {
      parser->current_token = i;
      break;
    }
  }
  c = parser->p;
  parser->p += strlen(TOKEN_CHARS[parser->current_token]);
  if (parser->current_token == TOKEN_CHAR)
    parser->token_char = *c;
  return;
#else
  parser->current_token = TOKEN_CHAR;
  for (i=TOKEN_CHAR+1; i<TOKEN_END; i++) {
    int l;
    if (*parser->p == '\0') {
      parser->current_token = TOKEN_END;
      return;
    }
    if ((l=strlen(TOKEN_CHARS[i])) && strncmp(parser->p,TOKEN_CHARS[i],l)==0) {
      parser->current_token = i;
      break;
    }
  }
  c = parser->p;
  parser->p += strlen(TOKEN_CHARS[parser->current_token]);
  switch (parser->current_token) {
  case TOKEN_CHAR:
    parser->token_char = *c;
    break;
  case TOKEN_GROUP_OPEN: break;
  case TOKEN_GROUP_CLOSE: break;
  case TOKEN_INTERVAL_ZERO_MORE: break;
  case TOKEN_INTERVAL_ONE_MORE: break;
  case TOKEN_INTERVAL_ZERO_ONE: break;
  case TOKEN_INTERVAL_OPEN:
    if (*parser->p != '\0') {
      int *i = &parser->intervals_open;
      parser->intervals_open = 0;
      parser->intervals_close = 0;
      c = parser->p++;
      while (*c != '\0') {
        int l;
        if ((l=strlen(TOKEN_CHARS[TOKEN_INTERVAL_CLOSE]))
            && strncmp(c,TOKEN_CHARS[TOKEN_INTERVAL_CLOSE],l)==0) {
          if (i == &parser->intervals_open)
            parser->intervals_close = parser->intervals_open;
          parser->current_token = TOKEN_INTERVAL_CLOSE;
          parser->p += (l-1); /* ! */
          break;
        } else if ((l=strlen(TOKEN_CHARS[TOKEN_INTERVAL_RANGE]))
                   && strncmp(c,TOKEN_CHARS[TOKEN_INTERVAL_RANGE],l)==0) {
          i = &parser->intervals_close;
          c = parser->p;
          parser->p += l;
        } else if (isdigit(*c)) {
          *i = *i*10 + ctod(*c);
          c = parser->p++;
        } else {
          parser->errcode = REGE_BADBR;
          c = parser->p++;
        }
      }
    }
    if (parser->current_token != TOKEN_INTERVAL_CLOSE) {
      parser->current_token = TOKEN_END;
      parser->errcode = REGE_EBRACE;
    }    
    break;
  case TOKEN_INTERVAL_CLOSE: break;
  case TOKEN_ANY_CHAR:
    parser->current_token = TOKEN_LIST_CLOSE;
    parser->token_char_set = ANY_CHAR;
    break;
  case TOKEN_LIST_OPEN:
    if (*parser->p != '\0') {
      int l, l2;
      int i = 0;
      c = parser->p++;
      if ((l=strlen(TOKEN_CHARS[TOKEN_LIST_CLOSE]))
          && strncmp(c,TOKEN_CHARS[TOKEN_LIST_CLOSE],l)==0) {
        strncpy(&parser->user_char_set[i],TOKEN_CHARS[TOKEN_LIST_CLOSE],l);
        i += l;
        c = parser->p;
        parser->p += l;
      }
      if ((l=strlen(TOKEN_CHARS[TOKEN_LIST_NOT]))
          && strncmp(c,TOKEN_CHARS[TOKEN_LIST_NOT],l)==0
          && (l2=strlen(TOKEN_CHARS[TOKEN_LIST_CLOSE]))
          && strncmp(parser->p,TOKEN_CHARS[TOKEN_LIST_CLOSE],l2)==0) {
        strncpy(&parser->user_char_set[i],TOKEN_CHARS[TOKEN_LIST_NOT],l);
        i += l;
        c = parser->p;
        parser->p += l;
        strncpy(&parser->user_char_set[i],TOKEN_CHARS[TOKEN_LIST_CLOSE],l2);
        i += l2;
        c = parser->p;
        parser->p += l2;
      }
      while (*c != '\0') {
        if ((l=strlen(TOKEN_CHARS[TOKEN_LIST_CLOSE]))
            && strncmp(c,TOKEN_CHARS[TOKEN_LIST_CLOSE],l)==0) {
          parser->user_char_set[i+1] = '\0';
          parser->current_token = TOKEN_LIST_CLOSE;
          parser->token_char_set = parser->user_char_set;
          parser->p += (l-1); /* ! */
          break;
        } else
          parser->user_char_set[i++] = *c;
        c = parser->p++;
      }
    }
    if (parser->current_token != TOKEN_LIST_CLOSE) {
      parser->current_token = TOKEN_END;
      parser->errcode = REGE_EBRACK;
    }
    break;
  case TOKEN_LIST_CLOSE:
    break;
  case TOKEN_LINE_BEGIN:
    parser->token_char = *c;
    break;
  case TOKEN_LINE_END:
    parser->token_char = *c;
    break;
  case TOKEN_ALTERNATION: break;
  case TOKEN_META_ESCAPE:
    if (*parser->p != '\0') {
      escs_set_t t;
      c = parser->p++;
      t = CTRL_BELL;
      while (t<=LITERAL_CHAR) {
        switch (escs_set[t].kind) {
        case ESCS_CTRL_CHAR:
          if (*c == escs_set[t].i) {
            parser->current_token = TOKEN_CHAR;
            parser->token_char = escs_set[t].desc[0];
            t = LITERAL_CHAR;
          }
          break;
        case ESCS_CHAR_CLASS:
          if (*c == escs_set[t].i) {
            parser->current_token = TOKEN_LIST_CLOSE;
            parser->token_char_set = escs_set[t].desc;
            t = LITERAL_CHAR;
          }
          break;
        case ESCS_CHAR_CODE:
          switch (t) {
          case CODE_CTRL:
            if (*c == escs_set[t].i && isupper(parser->p[0])) {
              parser->current_token = TOKEN_CHAR;
              parser->token_char = ctoC(parser->p[0]);
              parser->p++;
              t = LITERAL_CHAR;
            }
            break;
          case CODE_XDIGIT:
            if (*c == escs_set[t].i
                && isxdigit(parser->p[0]) && isxdigit(parser->p[1])) {
              parser->current_token = TOKEN_CHAR;
              parser->token_char = ctox(parser->p[0])*16+ctox(parser->p[1]);
              parser->p += 2;
              t = LITERAL_CHAR;
            }
            break;
          case CODE_ODIGIT:
            if (isodigit(*c)
                && isodigit(parser->p[0]) && isodigit(parser->p[1])) {
              parser->current_token = TOKEN_CHAR;
              parser->token_char = ctoo(*c)*64+ctoo(parser->p[0])*8+ctoo(parser->p[1]);
              parser->p += 2;
              t = LITERAL_CHAR;
            }
            break;
          case CODE_NULL:
          default:
            if (*c == escs_set[t].i) {
              parser->current_token = TOKEN_CHAR;
              parser->token_char = '\0';
              t = LITERAL_CHAR;
            }
            break;
          }
          break;
        case ESCS_SPEC_CHAR: /* ! */
          break;
        case ESCS_LITERAL_CHAR:
          parser->current_token = TOKEN_CHAR;
          parser->token_char = *c;
          t = LITERAL_CHAR;
          break;
        }
        t++;
      }
    } else {
      parser->current_token = TOKEN_END;
      parser->errcode = REGE_EESCAPE;
    }
    break;
  case TOKEN_END: default: break;
  }
  return;
#endif
}

/*
 * パーサの初期化
 */
static void initialize_regexp_parser(char *pattern, rege_parser_t *parser)
{
  parser->p = pattern;
  get_token(parser);
}

/*
 * 構文木のノード作成
 */
static tree_t *make_tree_node(rege_parser_t *parser, op_t op, tree_t *left, tree_t *right)
{
  tree_t *p;

  if ((p=(tree_t*)rege_malloc(sizeof(tree_t))) == NULL) {
    parser->errcode = REGE_ESPACE;
    return NULL;
  }
  p->op = op;
  p->u.x.left = left;
  p->u.x.right = right;
  return p;
}

/*
 * 構文木の葉の作成
 */
static tree_t *make_atom(rege_parser_t *parser, char c)
{
  tree_t *p;

  p = make_tree_node(parser, OP_CHAR, NULL, NULL);
  p->u.c = c;
  return p;
}

#if (VERSION_DOWN == 1)
#else
/*
 * 行頭を表す構文木の葉を返す
 */
static tree_t *make_head(rege_parser_t *parser, char c)
{
  tree_t *p;

  p = make_tree_node(parser, OP_HEAD, NULL, NULL);
  p->u.c = c;
  return p;
}

/*
 * 行末を表す構文木の葉を返す
 */
static tree_t *make_tail(rege_parser_t *parser, char c)
{
  tree_t *p;

  p = make_tree_node(parser, OP_TAIL, NULL, NULL);
  p->u.c = c;
  return p;
}

/*
 * 文字集合の場合の構文木の葉の作成
 */
static tree_t *make_atoms(rege_parser_t *parser, char *p)
{
  tree_t *x = NULL;
  unsigned char *c, table[CHAR_SPACE_SIZE/8], b, a;
  int neg_flag = 0, l;

  for (b=0; b<CHAR_SPACE_SIZE/8; b++)
    table[b] = 0;
  c = p++;
  if ((l=strlen(TOKEN_CHARS[TOKEN_LIST_NOT]))
      && strncmp(c,TOKEN_CHARS[TOKEN_LIST_NOT],l)==0) {
    neg_flag = !0;
    if (!(parser->syntax & REGE_HAT_LISTS_NEWLINE))
      table[('\n')/8] |= (1<<('\n'%8));
    c = p;
    p += l;
  }
  if ((l=strlen(TOKEN_CHARS[TOKEN_LIST_RANGE]))
      && strncmp(c,TOKEN_CHARS[TOKEN_LIST_RANGE],l)==0) {
    table[(b=*c)/8] |= (1<<(b%8));
    c = p;
    p += l;
  }
  while (*c != '\0') {
    if ((l=strlen(TOKEN_CHARS[TOKEN_LIST_ESCAPE]))
        && strncmp(c,TOKEN_CHARS[TOKEN_LIST_ESCAPE],l)==0) {
      escs_set_t t;
      c = p;
      p += l;
      t = CTRL_BELL;
      while (t<=LITERAL_CHAR) {
        switch (escs_set[t].kind) {
        case ESCS_CTRL_CHAR:
          if (*c == (unsigned char)escs_set[t].i) {
            a = escs_set[t].desc[0];
            table[(b=a)/8] |= (1<<(b%8));
            t = LITERAL_CHAR;
          }
          break;
        case ESCS_CHAR_CLASS:
          if (*c == (unsigned char)escs_set[t].i) {
            char *p = escs_set[t].desc;
            unsigned char *c, tbl[CHAR_SPACE_SIZE/8], b, a;
            int neg_flag = 0, l;

            for (b=0; b<CHAR_SPACE_SIZE/8; b++)
              tbl[b] = 0;
            c = p++;
            if ((l=strlen(TOKEN_CHARS[TOKEN_LIST_NOT]))
                && strncmp(c,TOKEN_CHARS[TOKEN_LIST_NOT],l)==0) {
              neg_flag = !0;
              c = p;
              p += l;
            }
            while (*c != '\0') {
              if ((l=strlen(TOKEN_CHARS[TOKEN_LIST_RANGE])) /* ! */
                  && strncmp(c,TOKEN_CHARS[TOKEN_LIST_RANGE],l)==0) {
                if (b<=(unsigned char)*p)
                  for (a=b+1; a<=(unsigned char)*p; a++)
                    tbl[a/8] |= (1<<(a%8));
                else if (*p == '\0')
                  table[TOKEN_CHARS[TOKEN_LIST_RANGE][0]/8]
                    |= (1<<(TOKEN_CHARS[TOKEN_LIST_RANGE][0]%8));
                else
                  if (!(parser->syntax & REGE_IGNORE_EMPTY_RANGES))
                    parser->errcode = REGE_ERANGE;      
                c = p;
                p += l;
              } else
                tbl[(b=*c)/8] |= (1<<(b%8));
              c = p++;
            }
            if (neg_flag)
              for (a=0; a<CHAR_SPACE_SIZE-1; a++)
                tbl[a/8] ^= (1<<(a%8));
            for (a=0; a<CHAR_SPACE_SIZE-1; a++)
              if (tbl[a/8]&(1<<(a%8)))
                table[a/8] |= (1<<(a%8));
            t = LITERAL_CHAR;
          }
          break;
        case ESCS_CHAR_CODE:
          switch (t) {
          case CODE_CTRL:
            if (*c == (unsigned char)escs_set[t].i && isupper(p[0])) {
              a = ctoC(p[0]);
              table[(b=a)/8] |= (1<<(b%8));
              p++;
              t = LITERAL_CHAR;
            }
            break;
          case CODE_XDIGIT:
            if (*c == (unsigned char)escs_set[t].i
                && isxdigit(p[0]) && isxdigit(p[1])) {
              a = ctox(p[0])*16+ctox(p[1]);
              table[(b=a)/8] |= (1<<(b%8));
              p += 2;
              t = LITERAL_CHAR;
            }
            break;
          case CODE_ODIGIT:
            if (isodigit(*c)
                && isodigit(p[0]) && isodigit(p[1])) {
              a = ctoo(*c)*64+ctoo(p[0])*8+ctoo(p[1]);
              table[(b=a)/8] |= (1<<(b%8));
              p += 2;
              t = LITERAL_CHAR;
            }
            break;
          case CODE_NULL:
          default:
            if (*c == (unsigned char)escs_set[t].i) {
              parser->current_token = TOKEN_CHAR;
              a = '\0';
              table[(b=a)/8] |= (1<<(b%8));
              t = LITERAL_CHAR;
            }
            break;
          }
          break;
        case ESCS_SPEC_CHAR:
        case ESCS_LITERAL_CHAR:
          table[(b=*c)/8] |= (1<<(b%8));
          t = LITERAL_CHAR;
          break;
        }
        t++;
      }
    } else if ((l=strlen(TOKEN_CHARS[TOKEN_LIST_RANGE]))
               && strncmp(c,TOKEN_CHARS[TOKEN_LIST_RANGE],l)==0) { /* ! */
      if (b<=(unsigned char)*p)
        for (a=b+1; a<=(unsigned char)*p; a++)
          table[a/8] |= (1<<(a%8));
      else if (*p == '\0')
        table[TOKEN_CHARS[TOKEN_LIST_RANGE][0]/8]
          |= (1<<(TOKEN_CHARS[TOKEN_LIST_RANGE][0]%8));
      else
        if (!(parser->syntax & REGE_IGNORE_EMPTY_RANGES))
          parser->errcode = REGE_ERANGE;
      c = p;
      p += l;
    } else
      table[(b=*c)/8] |= (1<<(b%8));
    c = p++;
  }
  if (neg_flag)
    for (a=0; a<CHAR_SPACE_SIZE-1; a++)
      table[a/8] ^= (1<<(a%8));
  for (a=0; a<CHAR_SPACE_SIZE-1; a++)
    if (table[a/8]&(1<<(a%8)))
      if (x == NULL)
        x = make_atom(parser, a);
      else
        x = make_tree_node(parser, OP_UNION, x, make_atom(parser, a));
  return x;
}
#endif

/*
 * 選択 X|Y について regexp をパースして得られた構文木を返す
 */
static tree_t *regexp(rege_parser_t *parser)
{
  tree_t *x;
  static tree_t *term(rege_parser_t *parser);

  x = term(parser);
  while (parser->current_token == TOKEN_ALTERNATION) {
    get_token(parser);
    x = make_tree_node(parser, OP_UNION, x, term(parser));
  }
  return x;
}

/*
 * 連結 XY について term をパースして得られた構文木を返す
 */
static tree_t *term(rege_parser_t *parser)
{
  tree_t *x;
  static tree_t *factor(rege_parser_t *parser);

  if (parser->current_token == TOKEN_ALTERNATION
      || parser->current_token == TOKEN_GROUP_CLOSE
      || parser->current_token == TOKEN_END)
    x = make_tree_node(parser, OP_EMPTY, NULL, NULL);
  else {
    x =  factor(parser);
    while (parser->current_token != TOKEN_ALTERNATION
           && parser->current_token != TOKEN_GROUP_CLOSE
           && parser->current_token != TOKEN_END)
      x = make_tree_node(parser, OP_CONCAT, x, factor(parser));
  }
  return x;
}

/*
 * 繰り返し X*, X+, X?, X{n,m} について factor をパースして得られた構文木を返す
 */
static tree_t *factor(rege_parser_t *parser)
{
  tree_t *x;
  static tree_t *primary(rege_parser_t *parser);

  x = primary(parser);
  if (parser->current_token == TOKEN_INTERVAL_ZERO_MORE) {
    x = make_tree_node(parser, OP_CLOSURE, x, NULL);
    get_token(parser);
  } else if (parser->current_token == TOKEN_INTERVAL_ONE_MORE) {
    x = make_tree_node(parser, OP_CONCAT, x,
                       make_tree_node(parser, OP_CLOSURE, x, NULL));
    get_token(parser);
  } else if (parser->current_token == TOKEN_INTERVAL_ZERO_ONE) {
    x = make_tree_node(parser, OP_UNION, x,
                       make_tree_node(parser, OP_EMPTY, NULL, NULL));
    get_token(parser);
#if (VERSION_DOWN == 1)
#else
  } else if (parser->current_token == TOKEN_INTERVAL_CLOSE) {
    tree_t *p;
    int i;
    if (parser->intervals_close == 0)
      p = make_tree_node(parser, OP_CLOSURE, x, NULL);
    else
      p = make_tree_node(parser, OP_EMPTY, NULL, NULL);
    for (i=parser->intervals_close; i>parser->intervals_open; i--)
      p = make_tree_node(parser, OP_CONCAT, p,
                         make_tree_node(parser, OP_UNION, x,
                                        make_tree_node(parser, OP_EMPTY, NULL, NULL)));
    for (i=parser->intervals_open; i>0; i--)
      p = make_tree_node(parser, OP_CONCAT, x, p);
    if (parser->intervals_open == 0)
      p = make_tree_node(parser, OP_UNION, p,
                         make_tree_node(parser, OP_EMPTY, NULL, NULL));
    x = p;
    get_token(parser);
#endif
  }
  return x;
}

/*
 * 文字そのものまたは (X) について primary をパースして得られた構文木を返す
 */
static tree_t *primary(rege_parser_t *parser)
{
  tree_t *x;

  if (parser->current_token == TOKEN_CHAR) {
    x = make_atom(parser, parser->token_char);
    get_token(parser);
#if (VERSION_DOWN == 1)
#else
  } else if (parser->current_token == TOKEN_LINE_BEGIN) {
    x = make_head(parser, parser->token_char);
    get_token(parser);
  } else if (parser->current_token == TOKEN_LINE_END) {
    x = make_tail(parser, parser->token_char);
    get_token(parser);
  } else if (parser->current_token == TOKEN_LIST_CLOSE) {
    x = make_atoms(parser, parser->token_char_set);
    get_token(parser);
#endif
  } else if (parser->current_token == TOKEN_GROUP_OPEN) {
    get_token(parser);
    x = regexp(parser);
    if (parser->current_token != TOKEN_GROUP_CLOSE)
      parser->errcode = REGE_EPAREN;
    get_token(parser);
  } else
    parser->errcode = REGE_BADPAT;
  return x;
}

#ifdef DEBUG
static void dump_tree(tree_t *p)
{
  switch (p->op) {
  case OP_CHAR:
#if (VERSION_DOWN == 1)
#else
  case OP_HEAD:
  case OP_TAIL:
#endif
    fprintf(stderr, "\"%c\"", p->u.c);
    break;
  case OP_CONCAT:
    fprintf(stderr, "(concat ");
    dump_tree(p->u.x.left);
    fprintf(stderr, " .");
    dump_tree(p->u.x.right);
    fprintf(stderr, ")");
    break;
  case OP_UNION:
    fprintf(stderr, "(or ");
    dump_tree(p->u.x.left);
    fprintf(stderr, " .");
    dump_tree(p->u.x.right);
    fprintf(stderr, ")");
    break;
  case OP_CLOSURE:
    fprintf(stderr, "(closure ");
    dump_tree(p->u.x.left);
    fprintf(stderr, ")");
    break;
  case OP_EMPTY:
    fprintf(stderr, "EMPTY");
    break;
  default:
    break;
  }
}
#endif

/*
 * 構文木を解放する
 */
static void free_tree(tree_t *p)
{
  switch (p->op) {
  case OP_CONCAT:
  case OP_UNION:
    if (p->u.x.right) free_tree(p->u.x.right);
    p->u.x.right = NULL;
  case OP_CLOSURE:
    if (p->u.x.left) free_tree(p->u.x.left);
    p->u.x.left = NULL;
    break;
  default:
    break;
  }
  rege_free_size(p, sizeof(tree_t));
}

/*
 * 正規表現をパースして、対応する構文木を含むパーサ型を生成
 */
static rege_parser_t *parse_regexp(char *pattern, rege_syntax_t syntax, char *trans_table)
{
  rege_parser_t *parser;

  if ((parser=(rege_parser_t*)rege_malloc(sizeof(rege_parser_t))) == NULL)
    return NULL;
  parser->syntax = syntax;
  parser->errcode = REGE_NOERROR;
  parser->trans_table = trans_table;

  if (syntax & (REGE_BS_BRACES|REGE_BS_PARENS|REGE_BS_PLUS_QM|REGE_BS_VBAR|
                REGE_LIMITED_OPS|REGE_NEWLINE_ALT|
                REGE_NO_INTERVALS|
                REGE_NO_XCHAR_CLASSES)) {
    parser->token_chars = (char**)malloc(sizeof(token_chars));
    memcpy(parser->token_chars, token_chars, sizeof(token_chars));
  }
  if (syntax & REGE_BS_BRACES) {
    parser->token_chars[TOKEN_INTERVAL_OPEN] = "\\{";
    parser->token_chars[TOKEN_INTERVAL_CLOSE] = "\\}";
  }
  if (syntax & REGE_BS_PARENS) {
    parser->token_chars[TOKEN_GROUP_OPEN] = "\\(";
    parser->token_chars[TOKEN_GROUP_CLOSE] = "\\)";
  }
  if (syntax & REGE_BS_PLUS_QM) {
    parser->token_chars[TOKEN_INTERVAL_ONE_MORE] = "\\+";
    parser->token_chars[TOKEN_INTERVAL_ZERO_ONE] = "\\?";
  }
  if (syntax & REGE_BS_VBAR) {
    parser->token_chars[TOKEN_ALTERNATION] = "\\|";
  }
  if (syntax & REGE_LIMITED_OPS) {
    parser->token_chars[TOKEN_INTERVAL_ONE_MORE] = "";
    parser->token_chars[TOKEN_INTERVAL_ZERO_ONE] = "";
    parser->token_chars[TOKEN_ALTERNATION] = "";
  }
  if (syntax & REGE_NEWLINE_ALT) {
    parser->token_chars[TOKEN_ALTERNATION] = "\n";
  }
  if (syntax & REGE_NO_INTERVALS) {
    parser->token_chars[TOKEN_INTERVAL_OPEN] = "";
    parser->token_chars[TOKEN_INTERVAL_CLOSE] = "";
  }
  if (syntax & REGE_DOT_NEWLINE && syntax & REGE_DOT_NOT_NULL) {
    static char any_char[] = "\x01-\xfe";
    parser->any_char = any_char;
  } else if (syntax & REGE_DOT_NEWLINE) {
    static char any_char[] = "\\0-\xfe";
    parser->any_char = any_char;
  } else if (syntax & REGE_DOT_NOT_NULL) { /* ! */
    static char any_char[] = "\x01-\x0c\x0e-\xfe";
    parser->any_char = any_char;
  }
  if (trans_table == NULL && syntax & REGE_IGNORE_CASE) {
    static char trans_table[CHAR_SPACE_SIZE];
    static int i = 0;
    if (i != 0)
      for (i=0; i<CHAR_SPACE_SIZE; i++)
        trans_table[i] = isupper(i)?tolower(i):i;
    parser->trans_table = trans_table;
  } else if (trans_table != NULL && syntax & REGE_IGNORE_CASE) {
    int i;
    for (i=0; i<CHAR_SPACE_SIZE; i++)
      trans_table[i]
        = isupper(trans_table[i])?tolower(trans_table[i]):trans_table[i];
  }

  initialize_regexp_parser(pattern, parser);
  parser->tree = regexp(parser);
  if (parser->current_token != TOKEN_END)
    parser->errcode = REGE_EEND;
#ifdef DEBUG
  fprintf(stderr, "-- tree\n");
  dump_tree(parser->tree);
  fprintf(stderr, "\n");
#endif  
  return parser;
}

/*
 * パーサを解放する
 */
static void free_parser(rege_parser_t *parser)
{
  free_tree(parser->tree);
  rege_free_size(parser, sizeof(rege_parser_t));
}

/*
 * 構文木から NFA を生成
 */
#define EPSILON_TRANS -1 /* ε遷移 */

/*
 * NFA における遷移を表す型
 */
typedef struct nlist {
  char c;
  int to;
  struct nlist *next;
} nlist_t;

/*
 * NFA を表す型
 */
typedef struct {
  rege_syntax_t syntax;         /* 構文規則 */
  rege_errcode_t errcode;       /* エラーコード */
  char *trans_table;            /* 変換表 */
#if (VERSION_DOWN == 1)
#define NFA_STATE_MAX 128       /* NFA の状態数の上限 */
#else
#define NFA_STATE_MAX 256       /* NFA の状態数の上限 */
#endif
  nlist_t *states[NFA_STATE_MAX]; /* NFA の状態遷移表 */
  int entry;                    /* NFA の初期状態 */
  int exit;                     /* NFA の終了状態 */
  int nstate;                   /* NFA の状態数 */
#if (VERSION_DOWN == 1)
#else
  int head_flag;                /* 行頭マッチを含むか */
  int tail_flag;                /* 行末マッチを含むか */
#endif
} rege_nfa_t;

/*
 * ノードに番号を割り当てる
 */
static int gen_node(rege_nfa_t *nfa)
{
  if (nfa->nstate >= NFA_STATE_MAX) {
    nfa->errcode = REGE_ESIZE;
    return 0;
  }
  return nfa->nstate++;
}

/*
 * NFA に状態遷移を追加する
 */
static void add_transition(rege_nfa_t *nfa, int from, int to, char c)
{
  nlist_t *p;

  if ((p=(nlist_t*)rege_malloc(sizeof(nlist_t))) == NULL) {
    nfa->errcode = REGE_ESPACE;
    return;
  }
  p->c = c;
  p->to = to;
  p->next = nfa->states[from];
  nfa->states[from] = p;
}

/*
 * 構文木 tree に対する NFA を生成
 */
static void gen_nfa(rege_nfa_t *nfa, tree_t *tree, int entry, int way_out)
{
  int a1, a2;

  switch (tree->op) {
  case OP_CHAR:
    add_transition(nfa, entry, way_out, tree->u.c);
    break;
#if (VERSION_DOWN == 1)
#else
  case OP_HEAD:
    if (entry == nfa->entry) {
      nfa->head_flag = !0;
      add_transition(nfa, entry, way_out, EPSILON_TRANS);
    }/* else
      add_transition(nfa, entry, way_out, tree->u.c);*/
    break;
  case OP_TAIL:
    if (way_out == nfa->exit) {
      nfa->tail_flag = !0;
      add_transition(nfa, entry, way_out, EPSILON_TRANS);
    }/* else
      add_transition(nfa, entry, way_out, tree->u.c);*/
    break;
#endif
  case OP_EMPTY:
    add_transition(nfa, entry, way_out, EPSILON_TRANS);
    break;
  case OP_UNION:
    gen_nfa(nfa, tree->u.x.left, entry, way_out);
    gen_nfa(nfa, tree->u.x.right, entry, way_out);
    break;
  case OP_CLOSURE:
    a1 = gen_node(nfa);
    a2 = gen_node(nfa);
    add_transition(nfa, entry, a1, EPSILON_TRANS);
    gen_nfa(nfa, tree->u.x.left, a1, a2);
    add_transition(nfa, a2, a1, EPSILON_TRANS);
    add_transition(nfa, a1, way_out, EPSILON_TRANS);
    break;
  case OP_CONCAT:
    a1 = gen_node(nfa);
    gen_nfa(nfa, tree->u.x.left, entry, a1);
    gen_nfa(nfa, tree->u.x.right, a1, way_out);
    break;
  default:
    nfa->errcode = REGE_BADPAT;
    return;
  }
}

#ifdef DEBUG
/*
 * NFA を表示する
 */
static void dump_nfa(rege_nfa_t *nfa)
{
  int i;
  nlist_t *p;

  for (i=0; i<nfa->nstate; i++)
    if (nfa->states[i] != NULL) {
      fprintf(stderr, "state %3d: ", i);
      for (p=nfa->states[i]; p!=NULL; p=p->next)
        fprintf(stderr, "(%c . %d) ", (p->c==EPSILON_TRANS)?'?':p->c,p->to);
      fprintf(stderr, "\n");
    }
}
#endif

/*
 * NFA を解放する
 */
static void free_nfa(rege_nfa_t *nfa)
{
  int i;
  nlist_t *p, *q;

  for (i=0; i<nfa->nstate; i++)
    if (nfa->states[i] != NULL) {
      p = nfa->states[i];
      while (p != NULL) {
        q = p;
        p = p->next;
        rege_free_size(q, sizeof(nlist_t));
      }
    }
  rege_free_size(nfa, sizeof(rege_nfa_t));
}

/*
 * 構文木に対する NFA を生成
 */
static rege_nfa_t *build_nfa(rege_parser_t *parser)
{
  rege_nfa_t *nfa;

  if ((nfa=(rege_nfa_t*)rege_malloc(sizeof(rege_nfa_t))) == NULL)
    return NULL;
  nfa->syntax = parser->syntax;
  nfa->errcode = REGE_NOERROR;
  nfa->trans_table = parser->trans_table;

  nfa->nstate = 0;
  nfa->entry = gen_node(nfa);
  nfa->exit = gen_node(nfa);
#if (VERSION_DOWN == 1)
#else
  nfa->head_flag = 0;
  nfa->tail_flag = 0;
#endif
  gen_nfa(nfa, parser->tree, nfa->entry, nfa->exit);
#ifdef DEBUG
  fprintf(stderr, "-- NFA\n");
  dump_nfa(nfa);
#endif
  return nfa;
}

/*
 * NFA を DFA に変換する
 */
#define NFA_VECTOR_SIZE (NFA_STATE_MAX/8) /* 状態集合に必要なバイト数 */

/*
 * NFA 状態集合を表す型
 */
typedef struct {
  unsigned char vec[NFA_VECTOR_SIZE];
} nfa_state_set_t;

/*
 * NFA 状態集合 state に状態 s が含まれているか
 */
#define check_nfa_state(state,s) ((state)->vec[(s)/8]&(1<<((s)%8)))

/*
 * 遷移可能な NFA 状態のリストを表す型
 */
typedef struct dlist {
  char c;
  nfa_state_set_t to;
  struct dlist *next;
} dlist_t;

/*
 * 遷移先のリストを表す型
 */
typedef struct dslist {
  char c;
  struct dfa_state *to;
  struct dslist *next;
} dslist_t;

/*
 * DFA 状態を表す型
 */
typedef struct dfa_state {
  nfa_state_set_t *state;
  int visited;
  int accepted;
  dslist_t *next;
  dlist_t *reach;
} dfa_state_t;

/*
 * DFA を表す型
 */
typedef struct {
  rege_syntax_t syntax;         /* 構文規則 */
  rege_errcode_t errcode;       /* エラーコード */
  char *trans_table;            /* 変換表 */
#define DFA_STATE_MAX 100       /* DFA の状態数の上限 */
  dfa_state_t states[DFA_STATE_MAX]; /* DFA の状態遷移表 */
  dfa_state_t *initial_state;   /* DFA の初期状態 */
  int nstate;                   /* DFA の状態数 */
#if (VERSION_DOWN == 1)
#else
  int head_flag;                /* 行頭マッチを含むか */
  int tail_flag;                /* 行末マッチを含むか */
#endif
  nfa_state_set_t *nfa_initial_state;
} rege_dfa_t;

#ifdef DEBUG
/*
 * NFA 状態集合を表示
 */
static void dump_nfa_state_set(rege_nfa_t *nfa, nfa_state_set_t *p)
{
  int i;

  for (i=0; i<nfa->nstate; i++)
    if (check_nfa_state(p, i))
      fprintf(stderr, "%d ", i);
}

/*
 * DFA を表示する
 */
static void dump_dfa(rege_nfa_t *nfa, rege_dfa_t *dfa)
{
  dslist_t *l;
  int i;

  for (i=0; i<dfa->nstate; i++) {
    fprintf(stderr, "%2d%c: ", i, dfa->states[i].accepted?'A':' ');
    for (l=dfa->states[i].next; l!=NULL; l=l->next)
      fprintf(stderr, "%c=>%d ", l->c, l->to-dfa->states);
    fprintf(stderr, "\n");
  }
  for (i=0; i<dfa->nstate; i++) {
    fprintf(stderr, "state %2d%c = {", i, dfa->states[i].accepted?'A':' ');
    dump_nfa_state_set(nfa, dfa->states[i].state);
    fprintf(stderr, "}\n");
  }
}
#endif

/*
 * DFA を解放する
 */
static void free_dfa(rege_dfa_t *dfa)
{
  int i;
  {
    dslist_t *p, *q;
    
    for (i=0; i<dfa->nstate; i++) {
      p = dfa->states[i].next;
      while (p != NULL) {
        q = p;
        p = p->next;
        rege_free_size(q, sizeof(dslist_t));
      }
    }
  }
  {
    dlist_t *p, *q;
    
    for (i=0; i<dfa->nstate; i++) {
      p = dfa->states[i].reach;
      while (p != NULL) {
        q = p;
        p = p->next;
        rege_free_size(q, sizeof(dlist_t));
      }
    }
  }
  rege_free_size(dfa->nfa_initial_state, sizeof(nfa_state_set_t));
  rege_free_size(dfa, sizeof(rege_dfa_t));
}

/*
 * NFA 状態集合 state に NFA 状態 s を追加する
 */
static void add_nfa_state(nfa_state_set_t *state, int s)
{
  state->vec[s/8] |= (1<<(s%8));
}

/*
 * NFA 状態集合 state に NFA 状態 s を追加し、
 * NFA 状態からε遷移で移動できる NFA 状態も追加する
 */
static void mark_empty_transition(rege_nfa_t *nfa, nfa_state_set_t *state, int s)
{
  nlist_t *p;

  add_nfa_state(state, s);
  for (p=nfa->states[s]; p!=NULL; p=p->next)
    if (p->c == EPSILON_TRANS && !check_nfa_state(state, p->to))
      mark_empty_transition(nfa, state, p->to);
}

/*
 * NFA 状態集合 state に対して ε-closure 操作を実行し、
 * ε遷移で遷移可能なすべての NFA 状態を追加する
 */
static void collect_empty_transition(rege_nfa_t *nfa, nfa_state_set_t *state)
{
  int i;

  for (i=0; i<nfa->nstate; i++)
    if (check_nfa_state(state, i))
      mark_empty_transition(nfa, state, i);
}

/*
 * 2つの NFA 状態集合 a と b が等しいか調べる
 */
static int equal_nfa_state_set(nfa_state_set_t *a, nfa_state_set_t *b)
{
  int i;

  for (i=0; i<NFA_VECTOR_SIZE; i++)
    if (a->vec[i] != b->vec[i])
      return 0;
  return !0;
}

/*
 * NFA 状態集合 s を DFA に登録して、DFA 状態へのポインタを返し、
 * s が終了状態を含んでいれば accepted フラグをセットする
 */
static dfa_state_t *register_dfa_state(rege_nfa_t *nfa, rege_dfa_t *dfa, nfa_state_set_t *s)
{
  int i;

  for (i=0; i<dfa->nstate; i++)
    if (equal_nfa_state_set(dfa->states[i].state, s))
      return &dfa->states[i];
  if (dfa->nstate >= DFA_STATE_MAX) {
    dfa->errcode = REGE_ESIZE;
    return NULL;
  }
  
  dfa->states[dfa->nstate].state = s;
  dfa->states[dfa->nstate].visited = 0;
  dfa->states[dfa->nstate].accepted = check_nfa_state(s, nfa->exit)?(!0):0;
  dfa->states[dfa->nstate].next = NULL;
  return &dfa->states[dfa->nstate++];
}

/*
 * 処理済みでない DFA 状態を探す
 */
static dfa_state_t *fetch_unvisited_dfa_state(rege_dfa_t *dfa)
{
  int i;

  for (i=0; i<dfa->nstate; i++)
    if (!dfa->states[i].visited)
      return &dfa->states[i];
  return NULL;
}

/*
 * DFA 状態 dstate から遷移可能な NFA 状態を探してリストにして返す
 */
static dlist_t *compute_reachable_nfa_state(rege_nfa_t *nfa, rege_dfa_t *dfa, dfa_state_t *dstate)
{
  int i;
  nlist_t *p;
  dlist_t *result = NULL, *a, *b;
  nfa_state_set_t *state = dstate->state;

  for (i=0; i<nfa->nstate; i++)
    if (check_nfa_state(state, i))
      for (p=nfa->states[i]; p!=NULL; p=p->next)
        if (p->c != EPSILON_TRANS) { /* ε遷移は除外 */
          for (a=result; a!=NULL; a=a->next)
            if (a->c == p->c) {
              add_nfa_state(&a->to, p->to);
              goto added;
            }
          if ((b=(dlist_t*)rege_malloc(sizeof(dlist_t))) == NULL) {
            dfa->errcode = REGE_ESPACE;
            return NULL;
          }
          b->c = p->c;
          add_nfa_state(&b->to, p->to);
          b->next = result;
          result = b;
        added:
          ;
        }
  return result;
}

/*
 * NFA を等価な DFA へ変換する
 */
static rege_dfa_t *convert_nfa_to_dfa(rege_nfa_t *nfa)
{
  rege_dfa_t *dfa;
  dfa_state_t *t;
  dlist_t *x;
  dslist_t *p;

  if ((dfa=(rege_dfa_t*)rege_malloc(sizeof(rege_dfa_t))) == NULL)
    return NULL;
  dfa->syntax = nfa->syntax;
  dfa->errcode = REGE_NOERROR;
  dfa->trans_table = nfa->trans_table;

  /* DFA の初期状態を登録する */
  if ((dfa->nfa_initial_state=(nfa_state_set_t*)rege_malloc(sizeof(nfa_state_set_t))) == NULL) {
    dfa->errcode = REGE_ESPACE;
    return dfa;
  }
  add_nfa_state(dfa->nfa_initial_state, nfa->entry);
  collect_empty_transition(nfa, dfa->nfa_initial_state);
  dfa->nstate = 0;
  dfa->initial_state = register_dfa_state(nfa, dfa, dfa->nfa_initial_state);
#if (VERSION_DOWN == 1)
#else
  dfa->head_flag = nfa->head_flag;
  dfa->tail_flag = nfa->tail_flag;
#endif

  while ((t=fetch_unvisited_dfa_state(dfa)) != NULL) {
    t->visited = !0;
    for (t->reach=x=compute_reachable_nfa_state(nfa, dfa, t); x!=NULL; x=x->next) {
      collect_empty_transition(nfa, &x->to);
      if ((p=(dslist_t*)rege_malloc(sizeof(dslist_t))) == NULL) {
        dfa->errcode = REGE_ESPACE;
        return dfa;
      }
      p->c = x->c;

      p->to = register_dfa_state(nfa, dfa, &x->to);
      p->next = t->next;
      t->next = p;
    }
  }   

#ifdef DEBUG
  fprintf(stderr, "-- DFA\n");
  dump_dfa(nfa, dfa);
#endif
  return dfa;
}

/*
 * DFA でパターンマッチングを行なう
 */

/*
 * 状態 state から文字 c で状態を遷移させる
 */
static dfa_state_t *next_dfa_state(rege_dfa_t *dfa, dfa_state_t *state, char c)
{
  dslist_t *p;

  for (p=state->next; p!=NULL; p=p->next)
    if (dfa->trans_table == NULL) {
      if (c == p->c)
        return p->to;
    } else
      if (dfa->trans_table[c] == dfa->trans_table[p->c])
        return p->to;
  return NULL;
}

/*
 * 正規表現をコンパイル
 */
rege_t *rege_compile(char *pattern, rege_syntax_t syntax, char *trans_table)
{
  rege_parser_t *parser;
  rege_nfa_t *nfa;
  rege_dfa_t *dfa;
  rege_t *rege;
#define error_return(p) \
  do {\
    if (!p) {\
      rege_free_size(rege, sizeof(rege_t));\
      return NULL;\
    }\
    if (p->errcode != REGE_NOERROR) {\
      rege->errcode = p->errcode;\
      free_##p(p);\
      return rege;\
    }\
  } while (0)

  if ((rege=(rege_t*)rege_malloc(sizeof(rege_t))) == NULL)
    return NULL;
  rege->syntax = syntax;
  rege->errcode = REGE_NOERROR;
  rege->trans_table = trans_table;
  rege->buffer = NULL;

  parser = parse_regexp(pattern, syntax, trans_table);
  error_return(parser);
  nfa = build_nfa(parser);
  free_parser(parser);
  error_return(nfa);
  dfa = convert_nfa_to_dfa(nfa);
  free_nfa(nfa);
  error_return(dfa);
  rege->buffer = (void*)dfa;
  return rege;
}

/*
 * DFA で文字列 string に対してパターンマッチングを行なう
 */
void rege_match(rege_t *rege, char *string, size_t size, char **from, char **to)
{
  char *start;
  char *p, *max_match;
  dfa_state_t *state;
  rege_dfa_t *dfa;

  if (rege == NULL || rege->buffer == NULL) {
    if (from) *from = NULL;
    return;
  }
  dfa = (rege_dfa_t*)rege->buffer;
  if (dfa->errcode == REGE_NOMATCH)
    dfa->errcode = REGE_NOERROR;
  if (rege->trans_table == NULL && rege->syntax & REGE_IGNORE_CASE) {
    static char trans_table[CHAR_SPACE_SIZE];
    static int i = 0;
    if (i != 0)
      for (i=0; i<CHAR_SPACE_SIZE; i++)
        trans_table[i] = isupper(i)?tolower(i):i;
    dfa->trans_table = rege->trans_table = trans_table;
  }
#if (VERSION_DOWN == 1)
#else
  if (rege->syntax & REGE_NO_MATCH_BOL && dfa->head_flag) {
    if (from) *from = NULL;
    rege->errcode = REGE_NOMATCH;
    return;
  }
  if (rege->syntax & REGE_NO_MATCH_EOL && dfa->tail_flag) {
    if (from) *from = NULL;
    rege->errcode = REGE_NOMATCH;
    return;
  }
#endif
  for (start=string; start-string<size; start++) { /* *start!='\0'; */
    state = dfa->initial_state;
    max_match = NULL;
    p = start;
    while (state != NULL) {
#if (VERSION_DOWN == 1)
      if (state->accepted)
        max_match = p;
#else
      if (!dfa->tail_flag) {
        if (state->accepted)
          max_match = p;
      } else
        if ((*p == '\n'||*p == '\0') && state->accepted)
          max_match = p;
#endif
      if (p-string==size) /* *p == '\0' */
        break;
      state = next_dfa_state(dfa, state, *p++);
    }
#if (VERSION_DOWN == 1)
    if (max_match != NULL && max_match != start) {
      if (from) *from = start;
      if (to) *to = max_match;
      return;
    }
#else
    if (max_match != NULL) {
      if (from) *from = start;
      if (to) *to = max_match;
      return;
    }
    if (dfa->head_flag)
      break;
#endif
  }
  if (from) *from = NULL;
  rege->errcode = REGE_NOMATCH;
}

/*
 * DFA で文字列 string に対してパターンマッチングを行なう '^', '$' は無視
 */
void rege_next_match(rege_t *rege, char *string, size_t size, char **from, char **to)
{
  rege->syntax |= (REGE_NO_MATCH_BOL|REGE_NO_MATCH_EOL);
  rege_match(rege, string, size, from, to);
  rege->syntax &= ~(REGE_NO_MATCH_BOL|REGE_NO_MATCH_EOL);
}

/*
 * 正規表現のコンパイルのための型を解放する
 */
void rege_free(rege_t *rege)
{
  rege_dfa_t *dfa = (rege_dfa_t*)rege->buffer;

#ifdef DEBUG
  fprintf(stderr, "%d\n", used_size);
#endif
  free_dfa(dfa);
  rege_free_size(rege, sizeof(rege_t));
#ifdef DEBUG
  fprintf(stderr, "%d\n", used_size);
#endif
}
