// entry point
SWYM.Tokenize = function(readsource)
{
	SWYM.source = readsource;
	SWYM.sourcePos = 0;
	SWYM.c = SWYM.source[SWYM.sourcePos];
	
	var result = [];
	do
	{
		var done = SWYM.GenerateNextToken(result);
	}
	while( !done );
	return result;
}

//=============================================================

SWYM.NextChar = function(step)
{
	SWYM.sourcePos+=(step?step:1);
	SWYM.c = SWYM.source[SWYM.sourcePos];
}

//=============================================================

SWYM.PeekNext = function(offset) { return SWYM.source[SWYM.sourcePos+(offset?offset:1)]; }

SWYM.IsLetter = function(c) { if (!c) c = SWYM.c; return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); }
SWYM.IsDigit = function(c) { if (!c) c = SWYM.c; return (c >= '0' && c <= '9'); }

SWYM.whitespaceString = " \n\t\r";
SWYM.IsWhitespace = function(c) { return SWYM.whitespaceString.indexOf(c?c:SWYM.c) >= 0; }

SWYM.symbolString = "!$%^|&*-=+;:@~,<>/?±";
SWYM.IsSymbol = function(c) { return SWYM.symbolString.indexOf(c?c:SWYM.c) >= 0; }

SWYM.extendedSymbolString = "_."; // symbols that can appear in the body of an operator, but not at the start.
SWYM.IsExtendedSymbol = function(c)
{
	return SWYM.IsSymbol(c) || SWYM.extendedSymbolString.indexOf(c?c:SWYM.c) >= 0;
}

SWYM.openBracketString = "([{";
SWYM.closeBracketString = ")]}";
SWYM.IsOpenBracket = function(c) { return SWYM.openBracketString.indexOf(c?c:SWYM.c) >= 0; }
SWYM.IsCloseBracket = function(c) { return SWYM.closeBracketString.indexOf(c?c:SWYM.c) >= 0; }
SWYM.IsBracket = function(c) { return SWYM.IsOpenBracket(c) || SWYM.IsCloseBracket(c); }

//=============================================================

SWYM.FindEndOfWord = function()
{
	if ( SWYM.IsLetter() || SWYM.c === '_' )
	{
		do { SWYM.NextChar(); }
		while( SWYM.IsLetter() || SWYM.IsDigit() || SWYM.c === '_' );
	}
}

//=============================================================

SWYM.SkipWhitespace = function()
{
	var result = false;
	while(true)
	{
		while( SWYM.IsWhitespace() )
		{
			if( SWYM.c === '\n' )
				SWYM.followsNewline = true;

			SWYM.followsWhitespace = true;
			SWYM.NextChar();
		}
		
		if ( SWYM.c === '/' )
		{
			var c2 = SWYM.PeekNext();
			if ( c2 === '/' )
			{
				// line comment
				SWYM.NextChar();
				do { SWYM.NextChar(); }
				while( SWYM.c !== '\n' && SWYM.c !== undefined );

				if( SWYM.c === '\n' )
					SWYM.followsNewline = true;

				SWYM.followsWhitespace = true;
			}
			else if ( c2 === '*' )
			{
				var commentStartPos = SWYM.sourcePos;
				// block comment
				SWYM.NextChar();
				do
				{
					SWYM.NextChar();
					if ( SWYM.c === '*' && SWYM.PeekNext() === '/' )
					{
						SWYM.NextChar(2);
						break;
					}
					else if ( SWYM.c === '\n' )
					{
						SWYM.followsNewline = true;
					}
					else if ( SWYM.c === undefined )
					{
						LogError(commentStartPos, "Block comment /* has no matching * /");
						break;
					}
				}
				while( true );

				SWYM.followingWhitespace = true;
			}
			else
			{
				return; // finished skipping whitespace
			}
		
			continue; // see if we can skip more whitespace
		}
		else
		{
			return; // finished skipping whitespace
		}
	}
}

//=============================================================

SWYM.GetSource = function(start, end)
{
	if (end != undefined)
		return SWYM.source.substring(start,end);
	else if (start != undefined)
		return SWYM.source.substring(start, SWYM.sourcePos);
	else
		return SWYM.source;
}

SWYM.TokenBuffer = [];

//=============================================================

// returns true to signal "end"
SWYM.GenerateNextToken = function(tokenlist)
{
	SWYM.followsWhitespace = false;
	SWYM.followsNewline = false;
	
	SWYM.SkipWhitespace();

	if ( SWYM.c === '"' )
	{
		// it's a quoted string
		return SWYM.GenerateString(tokenlist);
	}
	else if (SWYM.c === "'" )
	{
		// it's a quoted char
		return SWYM.GenerateChar(tokenlist);
	}
	else if ( SWYM.IsSymbol() || (SWYM.c === '.' && SWYM.PeekNext() === '.') )
	{
		// read an operator. Operators start either with a symbol, or with '..',
		// and continue with symbols, '_' and/or '.'s.
		var opStartPos = SWYM.sourcePos;
		var peekoffs = 0;
		do
		{
			peekoffs++;
			var opchar = SWYM.PeekNext(peekoffs);
		}
		while( opchar && SWYM.IsExtendedSymbol(opchar) );

		var allsymbols = SWYM.GetSource(opStartPos, SWYM.sourcePos+peekoffs);
		do
		{
			var possibleOpText = SWYM.GetSource(opStartPos, SWYM.sourcePos+peekoffs);
			// hack: apparently, having unicode symbols in your hash table doesn't work.
			var plusMinus = "±"[1];
			if( possibleOpText === plusMinus )
				possibleOpText = "+_";
            var op = SWYM.operators[possibleOpText];
		}
		while(!op && --peekoffs > 0);

		if ( op )
		{
			if ( SWYM.followingWhitespace && !SWYM.IsWhitespace(SWYM.PeekNext(peekoffs)) )
			{
				var altText = "prefix"+possibleOpText;
				var altOp = SWYM.operators[altText];
				if ( altOp )
				{
					possibleOpText = altText;
					op = altOp;
				}
			}
			else if ( !SWYM.followingWhitespace && SWYM.IsWhitespace(SWYM.PeekNext(peekoffs)) )
			{
				var altText = "postfix"+possibleOpText;
				var altOp = SWYM.operators[altText];
				if ( altOp )
				{
					possibleOpText = altText;
					op = altOp;
				}
			}
			// matched an operator
			var newToken = SWYM.NewToken("op", opStartPos, possibleOpText);

			if( SWYM.followsNewline )
				newToken.followsNewline = true;

			tokenlist.push(newToken);

			SWYM.NextChar(peekoffs); // chomp the appropriate number of characters
			return;
		}
		else
		{
			SWYM.LogError(opStartPos, "Don't understand operator \'"+allsymbols+"\'");
		}
	}
	else if ( SWYM.IsLetter() || SWYM.IsDigit() || SWYM.c === '.' || SWYM.c === '#' )
	{
		// it's an identifier or function name
		return SWYM.GenerateIdentifier(tokenlist);
	}
	else if ( SWYM.IsBracket() )
	{
		// read bracket
		var bracket = SWYM.c;
		var tokenPos = SWYM.sourcePos;
		SWYM.NextChar();

		tokenlist.push(SWYM.NewToken(bracket, tokenPos));
		return;
	}
	else if ( SWYM.c !== undefined )
	{
		SWYM.LogError(SWYM.sourcePos, "Unrecognized character \'"+SWYM.c+"\'");
		// keep trying to parse the rest of the file?
		SWYM.NextChar();
		return SWYM.GenerateNextToken(tokenlist);
	}

	// end of file
//	alert("reached eof, c is "+SWYM.c);
	return true;
}

//=============================================================

SWYM.GenerateNumberLiteral = function(tokenlist, nofloatingpoint)
{
	var numSoFar = 0;
	var numStartPos = SWYM.sourcePos;
	do
	{
		numSoFar = numSoFar*10 + (SWYM.c - '0');
		SWYM.NextChar();
	}
	while( SWYM.IsDigit() );

    if ( SWYM.c === '.' && SWYM.IsDigit(SWYM.PeekNext()) && !nofloatingpoint )
    {
        // floating point number
        var numerator = 0;
        var denominator = 1;
        SWYM.NextChar();
        
        do
        {
            numerator = numerator*10 + (SWYM.c - '0');
            denominator *= 10;
            SWYM.NextChar();
        }
        while( SWYM.IsDigit() );
        
        numSoFar += numerator/denominator;
    }
	
	tokenlist.push(SWYM.NewToken("literal", numStartPos, SWYM.GetSource(numStartPos), numSoFar));
	return;
}

//=============================================================

SWYM.GenerateIdentifier = function(tokenlist)
{
	// read an identifier. (".foo" is an identifier for a function.)
	var startsWithDot = SWYM.c === '.';
	var startsWithDigit = SWYM.IsDigit();
	var startsWithHash = SWYM.c === '#';
	
	var chunkStartPos = SWYM.sourcePos;

	// handle function-modifiers, and the first character of the identifier
	if ( startsWithDot )
	{
		var nextC = SWYM.PeekNext();

		if( nextC === "#" )
		{
			//looks like we're referring to a numeric function.
			// # is only valid at the start of the identifier, so skip it before processing the rest of the text.
			SWYM.NextChar();
		}
		else if ( SWYM.IsSymbol(nextC) )
		{
			//TODO - handle function modifiers, e.g. bob.!hasAxe
		}
		else if ( nextC === "{" )
		{
			//sneaky trick: when you declare and call a lambda immediately (e.g. x.{...}),
			// we interpret it as calling a function called '.{}' with args (x, {...}).
			tokenlist.push(SWYM.NewToken("(fn)", SWYM.sourcePos, ".{}"));
			SWYM.NextChar();
			return;
		}
		else if ( nextC === "[" || nextC === "(" )
		{
			//similarly, .(x) and .[x,y,z] convert to .at(x) and .at[x,y,z]
			tokenlist.push(SWYM.NewToken("(fn)", SWYM.sourcePos, ".at"));
			SWYM.NextChar();
			return;
		}
		else if ( SWYM.IsDigit(nextC) )
		{
			startsWithDigit = true;
		}
		else if ( !SWYM.IsLetter(nextC) && !SWYM.IsDigit(nextC) )
		{
			// this isn't a valid function name.
			SWYM.LogError(SWYM.sourcePos, "Invalid function name ."+nextC);
			return true;
		}
	}
	
	do
	{
		SWYM.NextChar();
	}
	while( SWYM.IsLetter() || SWYM.IsDigit() || SWYM.c === '_' );
    var chunkEndPos = SWYM.sourcePos;
    var isEtc = false;

	// special case 'etc' and related operators ( etc..  etc..<  etc..> ) are parsed as a single token of type 'etc'.
	if( SWYM.GetSource(chunkStartPos) === "etc" && SWYM.c === '.' && SWYM.PeekNext() === '.')
	{
		SWYM.NextChar();
		SWYM.NextChar();
		if( SWYM.c === '<' || SWYM.c === '>' || SWYM.c === '/')
			SWYM.NextChar();
		if( SWYM.c === '=' )
			SWYM.NextChar();
		
		tokenlist.push(SWYM.NewToken("name", chunkStartPos, SWYM.GetSource(chunkStartPos)));
		return;
	}
    
    // handle hyphenated functions (e.g. treat if-exists-Tail(x){ return(); } as if(exists(Tail(x))){ return(); } )
    if ( SWYM.c !== '-' )
	{
		SWYM.GenerateNameLiteral(chunkStartPos, SWYM.IsOpenBracket(), tokenlist);
		return;
	}

	var firstMinusSignPos = SWYM.sourcePos;
	var minusBeforeWordPos = SWYM.sourcePos;
        
	// trace until we find a character that's not a minus sign, nor part of a name. Typically an open bracket.
	var wordContainsLetter = false;
	var numHyphens = 1;

	while(true)
	{
		SWYM.NextChar();
	
		if( SWYM.c === '-' )
			numHyphens++;
		
		if ( SWYM.IsLetter() || SWYM.c === '_' )
			wordContainsLetter = true;

		if ( !(SWYM.IsLetter() || SWYM.IsDigit() || SWYM.c === '_') )
		{
			if ( wordContainsLetter )
			{
				// ok, finished reading a valid hyphenated word
				minusBeforeWordPos = SWYM.sourcePos;
			}
			
			// . is only legal in a hyphenated string if immediately following a hyphen
			if( SWYM.c !== '-' || !wordContainsLetter || (SWYM.c === '.' && minusBeforeWordPos !== SWYM.sourcePos-1) ) 
			{
				// end of the hyphenated string, back up to the last valid bit we read
				SWYM.sourcePos = minusBeforeWordPos;
				SWYM.c = SWYM.source[SWYM.sourcePos];
				break;
			}
			else
			{
				// prepare to read more
				wordContainsLetter = false;
			}
		}
	}
        
	if ( SWYM.sourcePos === firstMinusSignPos )
	{
		// didn't find anything interesting
		SWYM.GenerateNameLiteral(chunkStartPos, false, tokenlist);
		return;
	}

	//go back and process them again, with this information

	if( SWYM.c === '-' )
		SWYM.NextChar();
	var hyphenEndPos = SWYM.sourcePos;

	var fullHyphenatedString = SWYM.GetSource(chunkStartPos);

	// to be a hyphenated function, the next character must either be an open bracket - i.e. a-b(... or a-b[... or a-b{...
	// or else it must contain at least two minus signs - i.e. a-b-c.
	var isHyphenated = ( SWYM.c === '(' || SWYM.c === '[' || SWYM.c === '{' || numHyphens >= 2 );
	
	SWYM.sourcePos = firstMinusSignPos;
	SWYM.c = SWYM.source[SWYM.sourcePos];
	SWYM.GenerateNameLiteral(chunkStartPos, isHyphenated, tokenlist);

	//process the remaining names and minus signs
	do
	{
		if ( isHyphenated )
			tokenlist.push(SWYM.NewToken("openbracket", subnameStartPos, "hyphen"));
		else
			tokenlist.push(SWYM.NewToken("op", subnameStartPos, "-"));
		
		SWYM.NextChar();
		
		if( SWYM.sourcePos >= hyphenEndPos )
			break;

		var subnameStartPos = SWYM.sourcePos;
		
		if( SWYM.c === '.' )
			SWYM.NextChar();
		
		do{ SWYM.NextChar(); }
		while( SWYM.IsLetter() || SWYM.IsDigit() || SWYM.c === '_' );
        
		SWYM.GenerateNameLiteral(subnameStartPos, isHyphenated, tokenlist);
	}
	while( SWYM.sourcePos < hyphenEndPos );
}


SWYM.GenerateNameLiteral = function(chunkStartPos, isFunctionCall, tokenlist)
{
	var startsWithDot = SWYM.source[chunkStartPos] === '.';
	var firstChar = SWYM.source[chunkStartPos + (startsWithDot?1:0)];
	var startsWithDigit = SWYM.IsDigit( firstChar );
	var startsWithHash = firstChar === '#';
	var chunkEndPos = SWYM.sourcePos;

    if ( startsWithDigit && !startsWithDot && !isFunctionCall )
	{
		// if an identifier starts with a digit, and there's no
		// following bracket, then it's just a number.
		SWYM.sourcePos = chunkStartPos;
		SWYM.c = SWYM.source[SWYM.sourcePos];
		return SWYM.GenerateNumberLiteral(tokenlist);
    }
    else if ( startsWithDigit )
    {
		// it's a numeric function: 1st(x) or x.1st which we process into x.nth(1).
		SWYM.sourcePos = chunkStartPos;

		if( startsWithDot )
			SWYM.sourcePos++;
		
        SWYM.c = SWYM.source[SWYM.sourcePos];
        
		var numToken = [];
        SWYM.GenerateNumberLiteral(numToken);
        var numSuffix = SWYM.GetSource(SWYM.sourcePos, chunkEndPos);
        
        SWYM.sourcePos = chunkEndPos;
        SWYM.c = SWYM.source[SWYM.sourcePos];
    }
	
    var newname = SWYM.GetSource(chunkStartPos);
	
    if ( numToken )
    {
		// handle .1st and 1st()
		var functionName;
		
		// replace #st #nd #rd -> #th
		if( /^(st|nd|rd)/.test(numSuffix) )
			functionName = "#th"+numSuffix.slice(2);
        else
			functionName = "#"+numSuffix;
		
        var nthToken;
        if ( startsWithDot )
        {
            nthToken = SWYM.NewToken("(fn)", chunkStartPos, "."+functionName);
        }
        else
        {
            nthToken = SWYM.NewToken("name", chunkStartPos, functionName);
        }
        nthToken.numToken = numToken[0];
        tokenlist.push(nthToken);
    }
    else if ( startsWithDot )
    {
		var fnToken = SWYM.NewToken("(fn)", chunkStartPos, newname);
		fnToken.startsWithHash = startsWithHash;
        tokenlist.push(fnToken);
    }
    else if( SWYM.operators[newname] )
    {
        tokenlist.push(SWYM.NewToken("op", chunkStartPos, newname));
    }
    else
    {
        tokenlist.push(SWYM.NewToken("name", chunkStartPos, newname));
    }
}

//=============================================================

SWYM.GenerateChar = function(tokenlist)
{
	// quoted char
	var tokenStartPos = SWYM.sourcePos;
	SWYM.NextChar();
	
	var parsedChar;

	if ( SWYM.c === "\n" )
	{
		SWYM.LogError(tokenStartPos, "Unexpected line-break in character constant.");
		return true;
	}
	else if ( SWYM.c === '\\' )
	{
		// handle escaped characters
		SWYM.NextChar();
		switch( SWYM.c )
		{
			case 'n':	parsedChar = '\n';	break;
			case 't':	parsedChar = '\t';	break;
			case 'r':	parsedChar = '\r';	break;
			default:
				parsedChar = SWYM.c;
				break;
		}
		SWYM.NextChar();
	}
	else
	{
		parsedChar = SWYM.c;
		SWYM.NextChar();
	}
	
	if( SWYM.c !== "'" )
	{
		SWYM.LogError(tokenStartPos, "Too many characters in character constant.");
		return true;
	}
	SWYM.NextChar();
	
	tokenlist.push(SWYM.NewToken("literal", tokenStartPos, "'"+parsedChar+"'", parsedChar));
}

//=============================================================

SWYM.GenerateString = function(tokenlist)
{
	// quoted string
	var parsedString = "";
	var stringOpenQuotePos = SWYM.sourcePos;
	var stringStartPos = SWYM.sourcePos;
	var needsAdd = false;
	var firstSegment = true;
	SWYM.NextChar();
	
	if( SWYM.c === '"' )
	{
		//empty string
		tokenlist.push(SWYM.NewToken("literal", stringStartPos, "", ""));
		SWYM.NextChar();
		return;
	}

	while( SWYM.c !== '"' )
	{
		if ( SWYM.c === undefined )
		{
			SWYM.LogError(stringOpenQuotePos, "Unexpected end-of-file in string");
			return true;
		}
		else if ( SWYM.c === "\n" )
		{
			SWYM.LogError(stringOpenQuotePos, "Unexpected line-break in string. (For a multiline string, use \"\"\"triple quotes\"\"\".)");
			return true;
		}
		else if ( SWYM.c === '\\' )
		{
			// handle escaped characters
			SWYM.NextChar();
			switch( SWYM.c )
			{
				case 'n':	parsedString = parsedString.concat('\n');	break;
				case 't':	parsedString = parsedString.concat('\t');	break;
				case 'r':	parsedString = parsedString.concat('\r');	break;
				default:
					parsedString = parsedString.concat(SWYM.c);
					break;
			}
			SWYM.NextChar();
		}
		else if ( SWYM.c !== '$' )
		{
			// default case, just add the character
			parsedString = parsedString.concat(SWYM.c);
			SWYM.NextChar();
		}
		else
		{
			// string interpolation, e.g. "hello $person.name, how are you?" or "hello $(<whatever expression>), how are you?"
			var interppos = SWYM.sourcePos;
			var done = false;

			if (needsAdd)
				tokenlist.push(SWYM.NewToken("op", interppos, "(str++)"));
			
			if ( parsedString.length > 0 || firstSegment)
			{
				tokenlist.push(SWYM.NewToken("literal", stringStartPos, parsedString, parsedString));
				tokenlist.push(SWYM.NewToken("op", interppos, "(str++)"));
			}
			SWYM.NextChar();

			if ( SWYM.c === '(' || SWYM.c === '{' || SWYM.c === '[')
			{
				// read until the corresponding close bracket
				var bracketdepth = 1;
				var openbracket = SWYM.c;

				var closebracket;
				if (openbracket === '{')
					closebracket = '}';
				else if (openbracket === '[')
					closebracket = ']';
				else
					closebracket = ')';

				var openBracketToken = SWYM.NewToken(openbracket, SWYM.sourcePos);
				tokenlist.push(openBracketToken);
				
				SWYM.NextChar();

				while (!done)
				{
					done = SWYM.GenerateNextToken(tokenlist);
					var generated = tokenlist[tokenlist.length-1];
					
					if ( generated.type === "openbracket" && generated.text === openbracket )
					{
							bracketdepth++;
					}
					else if ( generated.type === "closebracket" && generated.text === closebracket)
					{
							bracketdepth--;
					}
						
					if ( bracketdepth == 0 )
						break;
				}
				
				if ( done )
				{
					SWYM.LogError(interppos, "Unexpected end of file during $"+openbracket+"..."+closebracket+" string interpolation");
					return true;
				}
			}
			else if (SWYM.IsLetter() || SWYM.c === '_' || SWYM.c === '.')
			{
                tokenlist.push(SWYM.NewToken("(", SWYM.sourcePos));

				do
				{
					done = SWYM.GenerateNextToken(tokenlist);
				}
				while ( !done && (SWYM.IsLetter() || SWYM.c === '_' || ( SWYM.c === '.' && SWYM.IsLetter( SWYM.PeekNext() ))) );

                tokenlist.push(SWYM.NewToken(")", SWYM.sourcePos));
				
				if ( done )
				{
					SWYM.LogError(interppos, "Unexpected end of file during $foo.bar string interpolation");
					return true;
				}
			}
			else
			{
				SWYM.LogError(interppos, "Invalid string interpolation '$"+SWYM.c+"'. For a literal dollar sign, write '\\$'.");
				tokenlist.push(SWYM.NewToken("literal", SWYM.sourcePos, "", ""));
			}
			needsAdd = true;
			firstSegment = false;
			parsedString = "";
			stringStartPos = SWYM.sourcePos;
		}
	}

	if ( parsedString.length > 0 )
	{
		if ( needsAdd )
			tokenlist.push(SWYM.NewToken("op", stringStartPos, "(str++)"));

		tokenlist.push(SWYM.NewToken("literal", stringStartPos, parsedString, parsedString));
	}

	SWYM.NextChar();
}

SWYM.BaseTokens = (function()
{
	var result = {};
	
	function Add(t) { result[t] = {type:t, toString:function(){ return ""+this.type+'('+this.text+')';} }; };
	
	function AddBracket(type, symbol)
	{
		var x = object(result[type]);
		x.text = symbol;
		result[symbol] = x;
	}
	
	function QuotedToken(){ return ""+this.type+'"'+this.text+'"';}
	
	Add("name");
	Add("(fn)");
	Add("op");
	Add("(decl)");
	Add("literal");
//	Add("keyword");
	Add("openbracket");
	Add("closebracket");
	Add("lambda{}");
	Add("merge[]");
	Add("etc");
	result.openbracket.toString = QuotedToken;
	result.closebracket.toString = QuotedToken;

	for( var i = 0; i < SWYM.openBracketString.length; i++ )
	{
		AddBracket("openbracket", SWYM.openBracketString[i]);
		AddBracket("closebracket", SWYM.closeBracketString[i]);
	}

	return result;
})();

SWYM.NewToken = function(type, pos, text, value)
{
	if ( SWYM.BaseTokens[type] )
	{
		var result = object(SWYM.BaseTokens[type]);
		result.pos = pos;
		if ( text != undefined ) result.text= text;
		
		if ( type == "op" )
			result.behaviour = SWYM.operators[text];
		else
			result.behaviour = SWYM.operators[type];
		
		if ( SWYM.followsWhitespace )
			result.followsWhitespace = true;
			
		if ( value != undefined )
			result.value = value;
		
		return result;
	}
	SWYM.LogError(pos, "Internal error: Invalid token type "+type);
}
