#include "lexer.h"

Lexer::~Lexer(){
}

Lexer::Lexer( istream& script ){
  in = &script;
  row=1;
  col=prevCol=1;
  bPendingDefine=false;
}


int Lexer::getRow(){
  return row;
}

int Lexer::getCol(){
  return col;
}

int Lexer::getChar(){
  int i=in->get();
  if( i == '\n' ){
    row++;
    prevCol=col;
    col=1;
  }
  else{
    col++;
  }
  return i;
}

void Lexer::ungetChar(int ch){
  if( ch == '\n' ){
    row--;
    col=prevCol;
  }
  else{
    col--;
  }
  
  in->unget();
}


int Lexer::getNumber(string& s){
  Number n;
  (*in)>>n;
  if( in->good() ){
    ostringstream os;
    os << n;
    s = os.str();
    int ch = getChar();
    if( ch == ':' ){
      return tokLabel;
    }
    else{
      ungetChar( ch );
    }
      return tokNumber;
  }
  else if( in->eof() ){
    return tokEof;
  }
  else{
    return tokError;
  }
}


int Lexer::getName(string& s){
  char look=getChar();
  if(isalpha(look)){
    while( ( isalpha(look) || isdigit(look) || look == '_') && !in->eof() ){
      s+=look;
      look=getChar();
    }
    
    if( look == '$' ) s+=look;  //we also allow names ending with $ to be more basic compliant :)
    else ungetChar(look);       //read one too much
    
    return tokId;
  }
  else{
    return tokError;  
  }
}


void Lexer::checkKeywords(token& t){
  if(t.type == tokId){
    if(      t.str == "begin"     ) t.type=tokBegin;
    else if( t.str == "end"       ) t.type=tokEnd;
    else if( t.str == "while"     ) t.type=tokWhile;
    else if( t.str == "if"        ) t.type=tokIf;
    else if( t.str == "else"      ) t.type=tokElse;
    else if( t.str == "print"     ) t.type=tokPrint;
    else if( t.str == "input"     ) t.type=tokInput;
    else if( t.str == "for"       ) t.type=tokFor;
    else if( t.str == "to"        ) t.type=tokTo;
    else if( t.str == "step"      ) t.type=tokStep;
    else if( t.str == "and"       ) t.type=tokAnd;
    else if( t.str == "or"        ) t.type=tokOr;
    else if( t.str == "not"       ) t.type=tokNot;
    else if( t.str == "return"    ) t.type=tokReturn;
    else if( t.str == "break"     ) t.type=tokBreak;
    else if( t.str == "run"       ) t.type=tokRun;
    else if( t.str == "write"     ) t.type=tokWrite; //will become depricated!
    else if( t.str == "left"      ) t.type=tokLeft;
    else if( t.str == "mid"       ) t.type=tokMid;
    else if( t.str == "right"     ) t.type=tokRight;
    else if( t.str == "len"       ) t.type=tokLen;
    else if( t.str == "asc"       ) t.type=tokAsc;
    else if( t.str == "chr"       ) t.type=tokChr;
    else if( t.str == "ran"       ) t.type=tokRan;
    else if( t.str == "read"      ) t.type=tokRead;
    else if( t.str == "restore"   ) t.type=tokRestore;
    else if( t.str == "data"      ) t.type=tokData;
    else if( t.str == "foreach"   ) t.type=tokForEach;
    else if( t.str == "in"        ) t.type=tokIn;
    else if( t.str == "seperated" ) t.type=tokSeperated;
    else if( t.str == "by"        ) t.type=tokBy;
    else if( t.str == "function"  ) t.type=tokFunction;
    else if( t.str == "sin"       ) t.type=tokSin;
    else if( t.str == "cos"       ) t.type=tokCos;
    else if( t.str == "exp"       ) t.type=tokExp;
    else if( t.str == "log"       ) t.type=tokLog;
    else if( t.str == "mod"       ) t.type=tokMod;
    else if( t.str == "sqrt"      ) t.type=tokSqrt;
    else if( t.str == "str"       ) t.type=tokStr;
    else if( t.str == "val"       ) t.type=tokVal;
    
    else return;  //t.type=tokId;
  }
}

void Lexer::skipComment(){
  char look=getChar();
  while ( !in->eof() && look == '#' ){
    look=getChar();
    
    //check for define
    if( isalpha(look) ){
        token t;
        ungetChar(look);
        t.type=getName(t.str);
        if( t.str == "define" ){
          bPendingDefine = true;
          return;
        }
    }
    
    while( !in->eof() && look!='\n' ){
      look=getChar();
    }
    skipWhite();
    look=getChar();
  }
  ungetChar(look);
}

void Lexer::skipWhite(){
  char look=getChar();
  while( !in->eof() && isspace(look) ) look=getChar();
  ungetChar(look);
}


void Lexer::getStringConstant(token& t){
  string constStr="";
  int ch=getChar();
  while( ch != '"' && !in->eof() ){
    
    if(ch == '\\'){ //escape sequence 
      ch=getChar();
      switch(ch){
        case 'n' : constStr+='\n';  break;
        case 't' : constStr+='\t';  break;
        case 'f' : constStr+='\f';  break;
        case '"' : constStr+='"';   break;
        case '\\': constStr+="\\\\";  break;
        default  : constStr+="\\";constStr+=ch; break; //everything allowed :)
      }
    }
    else if(ch != '"'){ //anything but closing char
      constStr+=(char) ch;
    }
    
    ch=getChar();
  }
  
  t.str=constStr;
  t.type=tokString;
  
  if(in->eof()) t.type=tokEof;
}


token Lexer::lex(){
  START: //yes we are using a goto here, shame on me, but for our define's hack it works great :D
  
  token t;
  t.str="";
  t.type=tokEof;
  
  skipWhite();
  skipComment();
  skipWhite();
  
  if( bPendingDefine ){
    int look;
    string defineValue;
    t.type=getName(t.str);

    if( t.type != tokId ){
      cerr << "Define contains a keyword" << endl;
      t.type=tokError;
      return t;
    }
    
    skipWhite();
    while( !in->eof() && ( ( look=getChar() ) != '\n' ) ) defineValue+=look;
    defineMap[ t.str ] = defineValue;
    
    bPendingDefine=false;

    #ifdef _DEBUG_
      cout << "define name=" << t.str << ", value='" << defineValue << "'" << endl;
    #endif  
    goto START; //we handled the define and added it to our map, just re-lex kiekerjan :)
  }

  char look=getChar();
  
  if(in->eof()){
    t.type=tokEof;
    
    #ifdef _DEBUG_
      cout << "token type = " << t.type << ", value = " << t.str << endl;
    #endif  
    return t;
  }
  
  if( isalpha(look) ){
    ungetChar(look);
    t.type=getName(t.str);
    checkKeywords(t);
    
    if( t.type == tokId ){
      if( defineMap.find( t.str ) != defineMap.end() ){ //this word was not an id but something defined
        //we do a trick, we have taken the word of our input stream, now we put back the value of the define
        //onto the input stream using ungetChar and recall the lexer (is same result as if we did a text replace in a prelexing fase) 
        string defineVal = defineMap[ t.str ];
        #ifdef _DEBUG_
          cout << "lexing define value='" << defineVal <<"' into our lexer stream"<<endl;
        #endif
        istringstream defIn( " "+defineVal+"\n" );
        Lexer defLex( defIn );
        return defLex.lex(); //we limit ourselves to 1 token per define here...
      }//found a define
    }
  }
  else if( isdigit(look) ){
    ungetChar(look);
    t.type=getNumber(t.str);
  }
  else{
    switch(look){
      case '$': if( getChar() == '#' ){
                  t.type = tokArgCount;
                }
                else{
                  ungetChar( look );
                  t.type = getNumber( t.str );
                  t.type = tokInputArg;
                }
                break;
      case '>': if( getChar() == '=' ){
                  t.type=tokGe;
                }
                else{
                  ungetChar(look);
                  t.type=tokGt;
                }
                break;
      
      case '<': if( getChar() == '=' ){
                  t.type=tokLe;
                }
                else{
                  ungetChar(look);
                  t.type=tokLt;
                }
                break;
      
      case '!': if( getChar() == '=' ){
                  t.type=tokNe;
                }
                else{
                  ungetChar(look);
                  t.type='!';
                }
                break;
      
      case '=': if( getChar() == '=' ){
                  t.type=tokEq;
                }
                else{
                  ungetChar(look);
                  t.type=tokAssign;
                }
                break;
      
      case '"': getStringConstant( t ); break;
      
      default : t.type=look;   break;
    }
  }
  
  #ifdef _DEBUG_
    cout << "token type = " << t.type << ", value = " << t.str << endl;
  #endif  
  
  return t;
}


