/**
 * # CHAPTER #
 * ============================================================================
 * BONSAIϢδؿ
 * ============================================================================
 */

#include <musashi.h>
#include <bonsai.h>
#include <usint.h>
#include <exHash.h>
#include <condition.h>
#include <priorityQueue.h>

/* ############ Хѿ ##############*/
/*ѥ顼᡼*/
extern MssOptFLG optSEQ;
extern MssOptINT optEND;
extern MssOptINT optBGN;
extern MssOptINT optSED;
extern MssOptINT optCAN;

extern int ClassSize; /*饹Υ*/

int usedRandSeed; /*Ѥ줿μ(ɽѤȤtree.cǻ)*/

/*
 * # STRUCT #
 * ѥǼץ饤ƥ塼¤
 */
struct RegBest{
  struct PQnode *pqNode; /*ץ饤ƥ塼*/
  int    cnt;            /*ϿƤ*/
  int    maxCnt;         /*ѥ(optCAN)*/
  double maxVal;         /*ץ饤ƥ塼ϿƤǺǤ*/
                         /*GiniIndex*/
};

/**
 * # SECTION #
 * ----------------------------------------------------------------------------
 * ե٥åȡݥǥåϢδؿ
 * ----------------------------------------------------------------------------
 */

/**
 * # FUNCTION #
 * 륵ΥǥåȤŬڤǤ1else 0֤
 * ǤŬڤȤϡǥåΥʬμѤƤ륤ǥå
 */
static int goodInitIdx( usint *idx, int cnt){
  int len;
  int i;
  char buf[256];

  for(i=1; i<=cnt; i++) buf[i]=-1;

  len=strlenUSI(idx);
  for(i=0; i<len; i++){
    buf[*(idx+i)]=1;
  }
  for(i=1; i<=cnt; i++){
    if(buf[i]==-1)return(0);
  }
  return(1);
}

/**
 * # FUNCTION #
 * ǥåȤŬڤǤ1else 0֤
 * ǤΡŬڡפȤϡƤƱǤʤǥå
 */
static int goodIdx(usint *idx){
  int i;
  usint chr;

  chr=*idx;
  for(i=1; i<strlenUSI(idx); i++){
    if( chr!=*(idx+i) ) return(1);
  }
  return(0);
}

/**
 * # FUNCTION #
 * ǥå򥢥ե٥åȤƱѰդ
 * ǥåʤ(-s 0)ξѤ
 */
struct Idxs *initIdxs0(struct Data *dat){
  int i,j,k;
  int size;
  struct Idxs *idxs;
  usint *str;

  usedRandSeed=mssInitRand(optSED.val); /**/

  idxs=mssMalloc(sizeof(struct Idxs),"initIdxs01");
  idxs->idx=mssMalloc(sizeof(usint **)*1,"initIdxs02");
  for(i=0; i<1; i++){
    *(idxs->idx+i)=mssMalloc(sizeof(usint *)*dat->patCnt,"initIdxs03");
  }
  idxs->cnt=1;
  idxs->patCnt=dat->patCnt;

  for(j=0; j<dat->patCnt; j++){

    size=(dat->pat+j)->map->alpSiz;
    *(*(idxs->idx+0)+j)=mssCalloc(sizeof(usint)*(size+1),"initIdxs04");
    str=*(*(idxs->idx+0)+j);

    for(k=0; k<size; k++){
      *(str+k) = 1+k;
    }
    *(str+k) = 0;
  }
  return(idxs);
}

/**
 * # FUNCTION #
 * ǥå˷ꤹ
 */
struct Idxs *initIdxs(struct Data *dat){
  int i,j,k;
  int size;
  struct Idxs *idxs;
  int unit;
  usint num;
  usint *str;
  int numCnt;
  int digits;

  usedRandSeed=mssInitRand(optSED.val); /**/

  idxs=mssMalloc(sizeof(struct Idxs),"initIdxs1");
  idxs->idx=mssMalloc(sizeof(usint **)*1,"initIdxs2");
  for(i=0; i<1; i++){
    *(idxs->idx+i)=mssMalloc(sizeof(usint *)*dat->patCnt,"initIdxs3");
  }
  idxs->cnt=1;
  idxs->patCnt=dat->patCnt;

  for(j=0; j<dat->patCnt; j++){
    unit=RAND_MAX/(dat->pat+j)->map->idxSiz;

    size=(dat->pat+j)->map->alpSiz;
    *(*(idxs->idx+0)+j)=mssCalloc(sizeof(usint)*(size+1),"initIdxs4");
    str=*(*(idxs->idx+0)+j);

    /*ͥѥξ*/
    if((dat->pat+j)->numPat){
      numCnt=0;
      while(numCnt<(dat->pat+j)->map->idxSiz-1){
        digits=rand()/(RAND_MAX/((dat->pat+j)->map->alpSiz-1))+1;
        if( *(str+digits) != 0 ) continue;
        *(str+digits) = 9999;
        numCnt++;
      }
      num=1;
      for(i=0; i<size; i++){
        if(*(str+i)==9999) num+=1;
        *(str+i)=num;
      }

    /*ʸѥξ*/
    }else{
      while(1){
        for(k=0; k<size; k++){
          *(str+k) = 1+rand()/unit;
        }
        *(str+k) = 0;
        if( goodInitIdx(str, (dat->pat+j)->map->idxSiz) )break;
      }
    }
  }
  return(idxs);
}

/**
 * # FUNCTION #
 * ǥåꥹȤγ(ѥܿʬ)
 */
static void freeIdxsSub(usint **add, int cnt){
  int i;
  for(i=0; i<cnt; i++){
    mssFree(*(add+i));
  }
  mssFree(add);
}

/**
 * # FUNCTION #
 * ǥåꥹȤγ()
 */
void freeIdxs(struct Idxs *idxs){
  int i;

  for(i=0; i<idxs->cnt; i++){
    freeIdxsSub( *(idxs->idx+i),idxs->patCnt );
  }
  mssFree(idxs->idx);
  mssFree(idxs);
}

/**
 * # FUNCTION #
 * ǥåΰγ
 */
static struct Idxs *malNextIdxs(struct Idxs *idxs, struct Data *dat){
  int i;

  /*ΰ*/
  idxs->idx=mssRealloc(idxs->idx,sizeof(usint **)*(idxs->cnt+1),"malIdxs1");
  *(idxs->idx+idxs->cnt)=mssCalloc(sizeof(usint *)*dat->patCnt,"malIdxs2");

  /*öȤalpha-index򥳥ԡ*/
  for(i=0; i<dat->patCnt; i++){
    *(*(idxs->idx+idxs->cnt)+i)
      =mssCalloc(sizeof(usint)*((dat->pat+i)->map->alpSiz+1),"malIdxs3");
    strcpyUSI( *(*(idxs->idx+idxs->cnt)+i), (dat->pat+i)->map->idx );
  }
  return(idxs);
}

/**
 * # FUNCTION #
 * ߤΥǥåŤѹǥåΥꥹȤ
 */
struct Idxs *setNextIdxs(struct Data *dat){
  int i,j,k;
  struct Idxs *idxs;
  int numPat;
  char chr;
  char prv;
  char nxt;

  idxs=mssCalloc(sizeof(struct Idxs),"nextIdxs1");
  idxs->cnt=0;
  idxs->patCnt=dat->patCnt;

  /*ǥåѹܤǲ*/
  for(i=0; i<dat->patCnt; i++){
    numPat=(dat->pat+i)->numPat; /*ͥѥ󤫤ɤ*/
    for(j=0; j<(dat->pat+i)->map->alpSiz; j++){ /*alphaĹ*/
      chr=*((dat->pat+i)->map->idx+j); /*оݤʸ*/
      if(!numPat){
        for(k=1; k<=(dat->pat+i)->map->idxSiz; k++){ /*indexμ*/
          if(k==chr) continue;
          idxs=malNextIdxs(idxs,dat);
          *(*(*(idxs->idx+idxs->cnt)+i)+j)=k;
          if( !goodIdx( *(*(idxs->idx+idxs->cnt)+i) ) ){
            freeIdxsSub( *(idxs->idx+idxs->cnt),dat->patCnt );
            continue; /*idxs->cnt򥫥ȥåפʤΤϿʤƱ*/
          }
          idxs->cnt++;
        }
      }else{
        if(j==0) prv=0;
        else     prv=*((dat->pat+i)->map->idx+j-1);
        if(j==(dat->pat+i)->map->alpSiz-1) nxt=0;
        else nxt=*((dat->pat+i)->map->idx+j+1);
        if(prv!=chr && prv!=0){
          idxs=malNextIdxs(idxs,dat);
          *(*(*(idxs->idx+idxs->cnt)+i)+j)=prv;
          if( !goodIdx( *(*(idxs->idx+idxs->cnt)+i) ) ){
            freeIdxsSub( *(idxs->idx+idxs->cnt),dat->patCnt );
            continue; /*idxs->cnt򥫥ȥåפʤΤϿʤƱ*/
          }
          idxs->cnt++;
        }
        if(nxt!=chr && nxt!=0){
          idxs=malNextIdxs(idxs,dat);
          *(*(*(idxs->idx+idxs->cnt)+i)+j)=nxt;
          if( !goodIdx( *(*(idxs->idx+idxs->cnt)+i) ) ){
            freeIdxsSub( *(idxs->idx+idxs->cnt),dat->patCnt );
            continue; /*idxs->cnt򥫥ȥåפʤΤϿʤƱ*/
          }
          idxs->cnt++;
        }
      }
    }
  }
  return(idxs);
}

/**
 * # FUNCTION #
 * ʣǥådat¤ΤؤΥԡ
 */
void cpyIdx(struct Data *dat, usint **idx){
  int j;

  for(j=0; j<dat->patCnt; j++){
    strcpyUSI( (dat->pat+j)->map->idx, *(idx+j) );
  }
}

/**
 * # FUNCTION #
 * Ϳ줿alphabetбindex֤
 */
char alp2idx(struct Map *map, usint alpha){
  int i;
  for(i=0; i<map->alpSiz; i++){
    if(*(map->alp+i)==alpha) return (*(map->idx+i));
  }
  mssShowErrMsg("internal error2");
  exit(mssErrorNoDefault);
  return(1);
}

/**
 * # FUNCTION #
 * Ϳ줿alphabetʸбindexʸ򥻥åȤ
 */
void alpStr2idxStr(usint index[],usint *alpha, struct Map *map){
  int i=0;

  while(*alpha!=0){
    index[i++]=alp2idx(map,*(alpha++));
  }
  index[i]=0;
}


/**
 * # FUNCTION #
 * dat¤ΤΥǡ򥤥ǥå
 */
void datIndexing( struct Data *dat ){

  usint          alpha;            /*ե٥åʸ*/
  usint          index[MaxPatLen]; /*ǥå줿ʸ*/
  usint         *str;              /*ѥȤΰŪݥ*/
  int i,j,k;

  for(i=0; i<dat->patCnt; i++){ /*ѥܿ*/
    freeStrListUSI((dat->pat+i)->patIdx);
    /*index줿ǡѿ*/
    (dat->pat+i)->patIdx=initStrListUSI();
  }

  for(i=0; i<dat->patCnt; i++){ /*ѥܿ*/
    for(j=0; j<dat->cnt; j++){  /*ǡ*/
      k=0;                      /*ǥåθʸ*/
      /*iܹܤjܥǡʸ*/
      str=getStrListUSI((dat->pat+i)->patAlp,j);
      while(*str!='\0'){ /*NULLͤξϤʤ֥졼*/
        alpha=*(str++);
        index[k++]=alp2idx((dat->pat+i)->map,alpha);
      }
      index[k]=0; /*ߥ͡*/
      putStrListUSI( (dat->pat+i)->patIdx, index); /*ѥγǼ*/
    }
  }
}

/**
 * # FUNCTION #
 * ѥ°Ȥơ֥
 */
void mkAtt( struct Data *dat ){

  int i,j,k;

  freeAtt(dat);

  /*ѥ°ΰγ*/
  for(i=0; i<dat->patCnt; i++){
    (dat->pat+i)->attCnt = (dat->pat+i)->regTbl->cnt;

    for(j=0; j<(dat->pat+i)->attCnt; j++){
      (dat->pat+i)->att[j]= mssCalloc(sizeof(char)*dat->cnt,"AttTblAtt");
    }
  }

  /*ɽ°*/
  for(i=0; i<dat->patCnt; i++){ /*ѥܿ*/
    for(j=0; j<dat->cnt; j++){ /*ץǡ*/
      for(k=0; k<(dat->pat+i)->attCnt; k++){ /*ɽ*/
      *((dat->pat+i)->att[k]+j) =
         (char)regCmp( getStrListUSI((dat->pat+i)->patIdx,j),
                     (dat->pat+i)->regTbl->reg+k );
      }
    }
  }
}

/**
 * # FUNCTION #
 * ѥ°Ȥơ֥ΰ賫
 */
void freeAtt( struct Data *dat ){
  int i,j;

  for(i=0; i<dat->patCnt; i++){
    for(j=0; j<(dat->pat+i)->attCnt; j++){
      mssFree((dat->pat+i)->att[j]);
    }
  }
}

/**
 * # SECTION #
 * ----------------------------------------------------------------------------
 * ѥϢ
 * ----------------------------------------------------------------------------
 */

/**
 * # FUNCTION #
 * Regexp¤ΤΥԡ
 */
void cpReg(struct Regexp *to, struct Regexp *from){

  strcpyUSI(to->str,from->str);
  to->type=from->type;
  to->bgnRng=from->bgnRng;
  to->endRng=from->endRng;
  to->objVal=from->objVal;
}

/**
 * # FUNCTION #
 * ʸstrκǸnʸǽʸreg뤫ɤ
 */
static int atEndRng(usint *str,usint *reg,int n){
  int l=strlenUSI(str); /*strĹ*/
  int s=strlenUSI(reg); /*regĹ*/
  int i;
  int start; /*strθϰ*/
  int end;   /*strθλ*/

  start=l-n-s+1;
  if(start<0){start=0;end=start+l-s+1;}
  else       {        end=start+n    ;}
  for(i=start; i<end; i++){
    if(0==strncmpUSI(str+i,reg,s)) return(1);
  }
  return(0);
}

/**
 * # FUNCTION #
 * substringΥޥå(1:ޥå,0:ޥå)
 */
int regCmpStr(usint *str,struct Regexp *reg){
  usint *bgnPos; /*ǽθǤΰװ*/

  /*substringmatch֤򸡺*/
  if( NULL == (bgnPos=strstrUSI(str,reg->str)) ) return(0);

  /*Ƭפλ꤬*/
  if(reg->bgnRng!=0){
    if(bgnPos-str >= reg->bgnRng) return(0);
  }

  /*פλ꤬*/
  if(reg->endRng!=0){
    if(0==atEndRng(str,reg->str,reg->endRng)) return(0);
  }

  /*嵭ξ˰äʤаפȤ*/
  return(1);
}

/**
 * # FUNCTION #
 * subsequenceΥޥå(1:ޥå,0:ޥå)
 */
int regCmpSeq(usint *str,struct Regexp *reg){
  usint *strPnt=str;
  usint *regPnt=reg->str;
  usint *strEnd; /*strend*/
  usint *regEnd; /*reg->strend*/
  int   flg;
  int   i;

  /*subsequenceȤưפ뤫Ĵ٤*/
  while(*regPnt!=0 && *strPnt!=0){
    if(NULL==(strPnt=strchrUSI(strPnt,*regPnt))){
      return(0);
    }
    strPnt++;
    regPnt++;
  }

  /*ѥʸɤڤäƤʤ*/
  if(*regPnt!=0) return(0);

  /*Ƭפλ꤬,ѥκǽʸbgnRngˤ뤫Ĵ٤*/
  if(reg->bgnRng!=0){
    flg=0;
    for(i=0; i<reg->bgnRng; i++){
      if(*reg->str == *(str+i))flg=1;
    }
    if(!flg) return(0);
  }

  /*פλ꤬,ѥκǽʸendRngˤ뤫Ĵ٤*/
  if(reg->endRng!=0){
    flg=0;
    strEnd=     str+strlenUSI(     str)-1;
    regEnd=reg->str+strlenUSI(reg->str)-1;
    for(i=0; i<reg->endRng; i++){
      if(*regEnd == *(strEnd-i))flg=1;
    }
    if(!flg) return(0);
  }

  /*嵭ξ˰äʤаפȤ*/
  return(1);
}

/**
 * # FUNCTION #
 * ѥͿ줿ʸ˥ޥå뤫ɤ(1:ޥå,0:ޥå)
 * substring or subsequence , end,start θ
 */
int regCmp(usint *str, struct Regexp *reg){

  switch(reg->type){
    case 0: /*substring*/
      return( regCmpStr(str,reg) );
      break;
    case 1: /*subsequence*/
      return( regCmpSeq(str,reg) );
      break;
  }
  return(0);
}

/**
 * # SECTION #
 * ----------------------------------------------------------------------------
 * ѥݤι®르ꥺ
 *  algorithm by
 *    M. Hirao, H.Hoshino, A.Shinohara, M.Takeda, and S.Arikawa,
 *    "A Practical Algorithm to Find the Best Subsequence Patterns"
 * ----------------------------------------------------------------------------
 */

/**
 * # FUNCTION #
 * 饹̥ޥåޥåɽ顢upperBound 
 * upperBound : оݤȤʤѥĥɤsv
 */
static double calUpperBound( struct CndCnt *cnt){

  double svMin, sv; /*value for splitting criteria*/
  int x,y,xMax,yMax;

  x=cnt->mmCnt[0];
  y=cnt->mmCnt[1];
  xMax=cnt->mmCnt[0]+cnt->uuCnt[0];
  yMax=cnt->mmCnt[1]+cnt->uuCnt[1];

  /*f(0,y)*/
  cnt->mmCnt[0] = 0; cnt->uuCnt[0] = xMax-cnt->mmCnt[0];
  cnt->mmCnt[1] = y; cnt->uuCnt[1] = yMax-cnt->mmCnt[1];
  calCndCnt(cnt);
  calCndCntSplit(cnt);
  svMin=cnt->splitAfter;

  /*f(x,0)*/
  cnt->mmCnt[0] = x; cnt->uuCnt[0] = xMax-cnt->mmCnt[0];
  cnt->mmCnt[1] = 0; cnt->uuCnt[1] = yMax-cnt->mmCnt[1];
  calCndCnt(cnt);
  calCndCntSplit(cnt);
  sv=cnt->splitAfter;
  if(svMin>sv) svMin=sv;

  /*f(0,0)*/
  cnt->mmCnt[0] = 0; cnt->uuCnt[0] = xMax-cnt->mmCnt[0];
  cnt->mmCnt[1] = 0; cnt->uuCnt[1] = yMax-cnt->mmCnt[1];
  calCndCnt(cnt);
  calCndCntSplit(cnt);
  sv=cnt->splitAfter;
  if(svMin>sv) svMin=sv;

  /*f(x,y)*/
  cnt->mmCnt[0] = x; cnt->uuCnt[0] = xMax-cnt->mmCnt[0];
  cnt->mmCnt[1] = y; cnt->uuCnt[1] = yMax-cnt->mmCnt[1];
  calCndCnt(cnt);
  calCndCntSplit(cnt);
  sv=cnt->splitAfter;
  if(svMin>sv) svMin=sv;

  return(svMin);
}

/**
 * # FUNCTION #
 * ѥΥǡФGINIupperBound
 * ѥ󤬰ޥåʤ1,Ǥʤ0֤
 */
static int regCnt(
  struct PQkey   *pqKey,     /*ѥ*/
  struct StrListUSI  *patIdx,/*index줿training dataΥѥܥǡ*/
  struct Class   *cls,       /*饹ܥǡ*/
  struct Cost    *cost){     /*ȥǡ*/

  struct CndCnt  cnt; /*饹̥ޥåޥåɽ*/

  int i;

  /*cntν*/
  iniCndCnt(&cnt,cost);

  /*ǡƤѥƤϤᡢCndCnt¤Τ˷򥻥åȤ*/
  for(i=0; i<patIdx->lineCnt; i++){

    /*ιԤޥåƤ*/
    if(regCmp(getStrListUSI(patIdx,i),&pqKey->reg)){
      cnt.mCnt[(int)*(cls->chr+i)]+=1;

    /*ιԤޥåʤ*/
    }else{
      cnt.uCnt[(int)*(cls->chr+i)]+=1;
    }
  }

  /**/
  calCndCnt(&cnt);

  /*Giniη׻*/
  calCndCntSplit(&cnt);
  pqKey->reg.objVal = cnt.splitAfter;

  /*upperBoundη׻*/
  pqKey->upperBound = calUpperBound(&cnt);

  if(cnt.mtCnt==0) return(1);
  else             return(0);
}

/**
 * # FUNCTION #
 * ѥ٥ȥ塼Ͽ
 */
static void setRegBest(struct PQkey *pqKey, struct RegBest *regBest){
  struct PQnode *worst;

  worst=PQworstMember(regBest->pqNode->left);

  /* ޤϿƤʤ⤷ϺãƤʤ */
  /* ñϿ */
  if( worst == NULL || regBest->cnt <regBest->maxCnt){
    PQinsert(pqKey, regBest->pqNode->left);
    regBest->cnt++;

  /* worstsvɤСworstƿϿ */
  } else if ( pqKey->reg.objVal < regBest->maxVal ) {
    PQdeleteNode(worst);
    PQinsert(pqKey, regBest->pqNode->left);

    /*regBest->maxValι(upperboundȤӤʤɤ)*/
    worst=PQworstMember(regBest->pqNode->left);
    regBest->maxVal=worst->key.reg.objVal;
  }
}

static int RegPQ2Tbl_Cnt;

/**
 * # FUNCTION #
 * 塼regBestƵŪõơ֥Ͽ
 */
static void regPQ2TblSub(
  struct PQnode *pqNode,
  struct RegTbl *regTbl){

  /*꡼հʳΥΡɤϺƵŪ˸ƤӽФ*/
  if(!PQisExternalNode(pqNode)){

    /*(left)귫*/
    regPQ2TblSub(pqNode->left,regTbl);

    /*塼ΥΡɤtable˥ԡ*/
    cpReg(regTbl->reg+RegPQ2Tbl_Cnt,&pqNode->key.reg);

    /*tableηȥå*/
    RegPQ2Tbl_Cnt++;

    /*礭(right)귫*/
    regPQ2TblSub(pqNode->right,regTbl);
  }
}

/**
 * # FUNCTION #
 * 塼regBestϿ줿ѥơ֥Ѵ
 */
static struct RegTbl *regPQ2Tbl( struct RegBest *regBest ){

  struct RegTbl *regTbl;

  /*ΰ*/
  regTbl=mssMalloc(sizeof(struct RegTbl), "regPQ2Tbl");
  regTbl->reg=mssCalloc(sizeof(struct Regexp) * regBest->maxCnt, "regPQ2Tbl");

  RegPQ2Tbl_Cnt=0;
  regPQ2TblSub(regBest->pqNode->left,regTbl);
  regTbl->cnt=RegPQ2Tbl_Cnt;
  return(regTbl);
}


/**
 * # FUNCTION #
 *ػߥꥹȤϿƤ뤫ɤΤ
 *  reg->regStr="abcd"ΤȤ
 *                "bcd","cd","d"  ػߥꥹȤˤ뤫Ĵ٤
 */
static int isForbid(struct Regexp *reg,struct cell **forbidden){
  int i;
  usint *pos;

  /*ƬפפξȽǤʤ(ػߥꥹȤˤʤΤȤߤʤ)*/
  if(reg->bgnRng!=0 || reg->bgnRng!=0) return(0);

  pos=reg->str;
  for(i=1; i<strlenUSI(reg->str); i++){
    pos++;  /* ʸ */

    if(EHmember(pos,forbidden)) return(1);
  }
  return(0);
}

/**
 * # FUNCTION #
 * ߤΤʤѥforbidden listϿ
 * ϿʸɽѴʸ(Regexp->regStr)
 */
static void putForbid(struct Regexp *reg,struct cell **forbidden){

  /*Ƭפפξ϶ػߥꥹȤʤ*/
  if(reg->bgnRng!=0 || reg->bgnRng!=0) return;
  /*ϥåꥹȤϿ*/
  EHinsert(reg->str,forbidden);
}

/**
 * # FUNCTION #
 * оݤȤʤѥ(pqKey)ˤĤưʲΥƥȾ夫˹Ԥ
 * pqTop,regBest,ForbiddenListϿ
 * 1. ʬѥForbiddenListˤ               -> ⤷ʤ  &return
 * 2. ѥ󤬰ޥåʤ                -> ForbidenList&return
 * 3. ѥregBestޤ                 -> regBest&pqTop&return
 * 4. ѥUpperBoundregBestޤǤʤ -> ForbidenList&return
 * 5. ѥĹĹã              -> ⤷ʤ&return
 * 6. 嵭ƤϤޤʤ                        -> pqTop&return
 */
static void putPFB(
  struct PQkey *pqKey,       /*оݤȤʤѥ*/
  struct cell  *forbidden[], /*õػߥꥹ*/
  struct Data *dat,
  struct Cost *cost,
  struct Pattern *pat,
  struct PQnode *pqTop,      /*õоݥꥹ*/
  struct RegBest *regBest){  /*٥nꥹ*/
  int noMatch; /*ѥ󤬰ޥåʤե饰*/
  double upperBound; /*fobidden list뤫ɤη׻*/

  /*(1) ѥʬѥforbiddenꥹȤˤreturn*/
  if(isForbid(&pqKey->reg,forbidden)){
    return;
  }

  /*split rule,upperbound׻*/
  noMatch=regCnt(pqKey,pat->patIdx,dat->cls,cost);

  /*(2) ޥåʤforbidenꥹȤϿ&return*/
  if(noMatch){
    putForbid(&pqKey->reg,forbidden);
    return;
  }

  /*(3) ߤBest nʤregBest˥å*/
  if(pqKey->reg.objVal < regBest->maxVal){
    setRegBest(pqKey, regBest);
    if(strlenUSI(pqKey->reg.str)<MaxRegLen){
      PQinsert(pqKey,pqTop->left);
    }
    return;
  }

  /*upperBoundη׻classSize2λΤͭʤΤ            */
  /*classSize2礭upperBoundη׻Ωʤ  */
  /*ΤǡǤɤsvupperBoundȤƤ                */
  /*ʤupperBoundˤforbidenϿΥ르ꥺȤʤ*/
  if(ClassSize==2) upperBound=pqKey->upperBound;
  else             upperBound=0;

  /*(4) ѥUpperBoundregBestޤǤʤ*/
  if(upperBound > regBest->maxVal){
    putForbid(&pqKey->reg,forbidden);
    return;
  }

  /*(5) ѥĹĹã*/
  if(strlenUSI(pqKey->reg.str)>=MaxRegLen){
    return;
  }

  /*(6)嵭ƤϤޤʤ->塼ơ*/
  PQinsert(pqKey,pqTop->left);
}

/**
 * # FUNCTION #
 *ʸ(str)˰ʸ(chr)ɲ
 */
static void addStrChr(usint *str,usint chr){
  int len;

  len=strlenUSI(str);
  *(str+len)=chr;
  *(str+len+1)=0;
}

/**
 * # FUNCTION #
 * ѥ (dat->pat+patNo)->regTbl ˥åȤ
 */
void setRegTbl(struct Data *dat, struct Cost *cost){

  struct Regexp reg;
  double upperBound;
  struct PQnode *pqTop;
  struct PQkey   pqKey;
  int i,j;
  struct RegBest regBest;
  struct cell *forbidden[EHbucket];
  int patNo;
  struct Pattern *pat;

  freeRegTbl(dat);
  for(patNo=0; patNo<dat->patCnt; patNo++){
    pat=dat->pat+patNo;

    /*hash bucket*/
    for(j=0; j<EHbucket; j++) forbidden[j]=NULL;

    /*Best nĤɽ빽¤Τν*/
    /*ǥѤѥ䤬Ǽ*/
    regBest.pqNode               = PQmakeNode();
    regBest.pqNode->parent       = regBest.pqNode;
    regBest.pqNode->right        = NULL;
    regBest.pqNode->rank         = 0;
    regBest.pqNode->left         = PQmakeNode();
    regBest.pqNode->left->parent = regBest.pqNode;
    regBest.cnt                  = 0;
    regBest.maxVal               = 9999;
    regBest.maxCnt               = optCAN.val;

    /*priority queueν*/
    /*ͭ˾ѥŪ˳Ǽ*/
    /*Υ塼ˤʤޤѥõ³*/
    pqTop               = PQmakeNode();
    pqTop->parent       = pqTop;
    pqTop->right        = NULL;
    pqTop->rank         = 0;
    pqTop->left         = PQmakeNode();
    pqTop->left->parent = pqTop;

    /*-------------------------------------------------------------*/
    /* ᥤ롼                                              */
    /* pqTop   : ȤʤɽƤpriority queue      */
    /* regBest : ȤʤnĤpatternsv㤤ߤ  */
    /* fobidden: θߤΤʤpatternߤƤϥå */
    /*-------------------------------------------------------------*/

    /*ޤʸѥͤȤpqTop˳Ǽ*/
    for(i=0; i<pat->map->idxSiz; i++){
      /*̾ѥ*/
      pqKey.reg.str[0]=i+1;
      pqKey.reg.str[1]=0;
      pqKey.reg.bgnRng=0;
      pqKey.reg.endRng=0;
      pqKey.reg.type  =optSEQ.set;
      putPFB(&pqKey,forbidden,dat,cost,pat,pqTop,&regBest);

      /*Ƭ*/
      for(j=1; j<=optBGN.val; j++){
        pqKey.reg.bgnRng=j;
        putPFB(&pqKey,forbidden,dat,cost,pat,pqTop,&regBest);
      }
      pqKey.reg.bgnRng=0;

      /**/
      for(j=1; j<=optEND.val; j++){
        pqKey.reg.endRng=j;
        putPFB(&pqKey,forbidden,dat,cost,pat,pqTop,&regBest);
      }
      pqKey.reg.endRng=0;

      /*Ƭξ*/
      /*ϼ
      for(j=1; j<=optBGN.val; j++){
        for(k=1; k<=optEND.val; k++){
          pqKey.reg.bgnRng=j;
          pqKey.reg.endRng=k;
          putPFB(&pqKey,forbidden,dat,cost,pat,pqTop,&regBest);
        }
      }
      */
    }
    pqKey.reg.bgnRng=0;
    pqKey.reg.endRng=0;

    /* priority queue(pqTop)ˤʤޤǥ롼פ */
    while(PQisEmpty(pqTop->left) != 1){
      /*priority queueͥѥФ*/
      /*regupperBound򥻥å*/
      PQpop(&reg,&upperBound,pqTop->left);
      /*popUB٥nʲΤȤõ*/
      if(upperBound <= regBest.maxVal){

        /*Фɽ˰ʸɲ*/
        for(i=0; i<pat->map->idxSiz; i++){
          cpReg(&pqKey.reg,&reg);
          addStrChr(pqKey.reg.str,i+1);
          putPFB(&pqKey,forbidden,dat,cost,pat,pqTop,&regBest);
        }
      }
    }
  
    /*ץ饤ƥ塼ơ֥Ѵ*/
    (dat->pat+patNo)->regTbl=regPQ2Tbl(&regBest);

    /*ΰ賫*/
    PQfree(pqTop);
    PQfree(regBest.pqNode);
    EHfree(forbidden);

  }/* for patNo */
}

/**
 * # FUNCTION #
 * ѥΰ賫
 */
void freeRegTbl(struct Data *dat){
  int i;
  for(i=0; i<dat->patCnt; i++){
    if( (dat->pat+i)->regTbl == NULL) return;
    mssFree((dat->pat+i)->regTbl->reg);
    mssFree((dat->pat+i)->regTbl);
  }
}

/**
 * # SECTION #
 * ----------------------------------------------------------------------------
 * Ϸ
 * ----------------------------------------------------------------------------
 */

/**
 * # FUNCTION #
 * ѥɽ
 */
void prnRegexp(struct Regexp *reg, struct mssFPW *fpw){
  usint *str;

  if(reg->type==0) mssWriteStr("substring ",fpw);
  else             mssWriteStr("subsequence ",fpw);

  mssWriteStr("\"",fpw);

  /*Ƭ*/
  if(reg->bgnRng!=0){
    mssWriteStr("^[",fpw);
    mssWriteInt(reg->bgnRng,fpw);
    mssWriteStr("]",fpw);
  }

  str=reg->str;
  while(*str!=0){
    mssWriteInt(*str,fpw);
    if(*(str+1)!=0) mssWriteStr(" ",fpw);
    str++;
  }

  /**/
  if(reg->endRng!=0){
    mssWriteStr("$[",fpw);
    mssWriteInt(reg->endRng,fpw);
    mssWriteStr("]",fpw);
  }

  mssWriteStr("\"",fpw);
}
