//=========================================================================
function WSEnrichmentRecord(sWord, nPos, nLength, nLangType, nWordCount)
{
	this.sWord = sWord;
	this.nPos = nPos;
	this.nLength = nLength;
	this.nLangType = nLangType;
	this.nWordCount = nWordCount;
	this.grammar  = new Array();
	this.spelling  = new Array();
	this.thesaurus  = new Array();
	this.enrichment  = new Array();
	this.dictionary  = new Array();
}
//-------------------------------------------------------------------------
WSEnrichmentRecord.prototype.add = function(nEnrichType, sWord, nScore)
{
	var enrichType = {MATCHING: 3, THESAURUS: 4, IDIOM: 5, SPELL: 6, GRAMMAR: 7, DICTIONARY: 8};

	//sWord = "\"" + sWord.replace(/\"/g,"\\\"") + "\"";

	if (nEnrichType == enrichType.MATCHING)
	{
		//if (nScore > 0) this.enrichment[-nScore] = "'sWord'";
		this.enrichment[this.enrichment.length] = sWord;
	}
	else if (nEnrichType == enrichType.THESAURUS)
	{
		this.thesaurus[this.thesaurus.length] = sWord;
	}
	else if (nEnrichType == enrichType.IDIOM)
	{
		//do nothing
	}
	else if (nEnrichType == enrichType.SPELL)
	{
		this.spelling[this.spelling.length] = sWord;
	}
	else if (nEnrichType == enrichType.GRAMMAR)
	{
		this.grammar[this.grammar.length]= sWord;
	}
	else if (nEnrichType == enrichType.DICTIONARY)
	{
		this.dictionary[this.dictionary.length]= sWord;
	}
}
//=========================================================================
function wsGetEnrichmentInfo(sResponseBody, vTokens)
{
	var vResponse = sResponseBody.split("\n");
	var nWords = parseInt(vResponse[0]); //number of enriched words
	var nSize = vResponse.length - 1; //-1, because last element is empty

	var sEnrichmentInfo = "";
	var vEnrichInfo = new Array(); //new Array(vTokens.length);
	var nTokenOffset = 0;
	//for (var i=1, j=0 ; i<nSize && j<nWords; i+=9, ++j)
	for (var j=0 ; j<nWords; ++j)
	{
		var i = j*9 + 1;
		// 0 = word_index; 1 = dic_ref; 2 = word_count; 3 = lang_type; 4 = actionId;
		// 5 = score; 6 = extra_info; 7 = word_id; 8 = enrichword;
		var nWordIndex  = parseInt(vResponse[i]);
		var nWordCount  = parseInt(vResponse[i+2]);
		var nLangType   = parseInt(vResponse[i+3]);
		var nEnrichType = parseInt(vResponse[i+4]);
		var nScore      = parseInt(vResponse[i+5]);
		var sEnrichWord = vResponse[i+8];

		if (! vEnrichInfo[nWordIndex])
		{
			var nTokenIndex = nWordIndex + nTokenOffset;
			
			nTokenOffset += nWordCount - 1;
			var oToken = vTokens[nTokenIndex];
			var nPos = parseInt(oToken.nPos);
			var oTmpToken =  vTokens[nTokenIndex+nWordCount-1];
			var nLength = (parseInt(oTmpToken.nPos) - nPos) + parseInt(oTmpToken.nLength);
			var sWord = "";
			for (var k=0; k < nWordCount; ++k)
			{
				if (sWord.length > 0)
				{
					sWord += " ";
				}
				sWord += vTokens[nTokenIndex+k].sWord;
			}
			vEnrichInfo[nWordIndex] = new WSEnrichmentRecord(sWord, nPos, nLength, nLangType, nWordCount);
		}
		vEnrichInfo[nWordIndex].add(nEnrichType, sEnrichWord, nScore);
	}

	var nTokenOffset = 0;
	for (var i=0; i< vTokens.length; ++i)
	{
		if (i < vEnrichInfo.length && vEnrichInfo[i])
		{
			nTokenOffset += vEnrichInfo[i].nWordCount - 1;
		}
		else
		{
			var j = i + nTokenOffset;
			if (j < vTokens.length)
			{
				vEnrichInfo[i] = new WSEnrichmentRecord(vTokens[j].sWord, vTokens[j].nPos, vTokens[j].nLength, 0, 1);
			}
		}
	}

	return wsConvertToEnrichmentInfo(vEnrichInfo);
}
//-------------------------------------------------------------------------
function wsConvertToEnrichmentInfo(vEnrichInfo)
{
	var vEnrichment = new Array();

	for (var i=0 ; i<vEnrichInfo.length; ++i)
	{
		var enrichment = vEnrichInfo[i];
		if (enrichment)
		{
			vEnrichment[vEnrichment.length] = { sWord: enrichment.sWord,
															nPos: enrichment.nPos,															
															nLength: enrichment.nLength,
															nLangType: enrichment.nLangType,
															s: enrichment.spelling,
															g: enrichment.grammar,
															t: enrichment.thesaurus,
															e: enrichment.enrichment,
															d: enrichment.dictionary};
		
		}
	}
	return vEnrichment;
}
//-------------------------------------------------------------------------
/*
function wsConvertToEnrichmentInfo(vEnrichInfo,vTokens)
{
	var sResult = "[";
	var bFirst = true;

	for (var i=0 ; i<vEnrichInfo.length; ++i)
	{
		var enrichment = vEnrichInfo[i];
		if (enrichment)
		{
			var nPos        = enrichment.nPos;
			var nLength     = enrichment.nLength;
			var nLangType   = enrichment.nLangType;
			var nWordCount  = enrichment.nWordCount;

			var sSpelling   = new String(enrichment.spelling.join(","));
			var sGrammar    = new String(enrichment.grammar.join(","));
			var sThesaurus  = new String(enrichment.thesaurus.join(","));
			var sEnrichment = new String(enrichment.enrichment.join(","));
			var sDictionary = new String(enrichment.dictionary.join(","));

			if (! bFirst)
			{
			  sResult += ",\n";
			}
			sResult += "{nPos: " + nPos + ", nLength: " + nLength + ", nLangType: " + nLangType +
							", s:[" + sSpelling + "]" +
							", g:[" + sGrammar + "]" +
							", t:[" + sThesaurus + "]" +
							", e:[" + sEnrichment + "]" +
							", d:[" + sDictionary + "]}";
			bFirst = false;
		}
	}
	sResult += "]";
	return sResult;
}
*/
//=======================================================================================
// Class WSHTMLParser
//=======================================================================================
function WSHTMLParser()
{
}
//----------------------------------------------------------------------------------
WSHTMLParser.prototype.isWhiteSpace = function(ch)
{
	return (ch==' ' || ch=='\t' || ch=='\r' || ch=='\n');
}
//----------------------------------------------------------------------------------
/*
WSHTMLParser.prototype.isCtrl = function(ch)
{
	return ((ch >= '\0' && ch <= '\31') || ch == '\127');
}
*/
//----------------------------------------------------------------------------------
WSHTMLParser.prototype.isAlpha = function(ch)
{
	return ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'));
}
//----------------------------------------------------------------------------------
WSHTMLParser.prototype.isDigit = function(ch)
{
	return (ch >= '0' && ch <= '9');
}
//----------------------------------------------------------------------------------
WSHTMLParser.prototype.isAlnum = function(ch)
{
	return (this.isAlpha(ch) || this.isDigit(ch));
}
//----------------------------------------------------------------------------------
WSHTMLParser.prototype.isPunct = function(ch)
{
	return (ch==',' || ch=='.' || ch==':' || ch==';' || ch=='!' || ch=='?');
}
//----------------------------------------------------------------------------------
WSHTMLParser.prototype.isMiddleChar = function(sText,nPos)
{
	return (nPos+1<sText.length && nPos-1>=0 &&
			  this.isAlnum(sText.charAt(nPos+1)) && this.isAlnum(sText.charAt(nPos-1)));
}
//----------------------------------------------------------------------------------
WSHTMLParser.prototype.isLegalChar = function(sText,nPos)
{
	var ch = sText.charAt(nPos);
	return (this.isAlnum(ch) || ch == '_' ||
			  (ch == '\'' && this.isMiddleChar(sText,nPos)) //for words like: it's, joe's, etc.
			 );
}
//----------------------------------------------------------------------------------
WSHTMLParser.prototype.isInternetAddress = function(sWord)
{
	if (sWord.length == 0)
		return false;
	var i = 0;
	if (sWord.indexOf("www.") == 0)
	{
		return (sWord.length > 4);
	}
	else if (sWord.indexOf("http://") == 0)
	{
		return (sWord.length > 7);
	}
	else if ((i=sWord.indexOf('@')) > 0)
	{
		var nDotPos	= sWord.indexOf('.',i);
		return (nDotPos >= 0);
	}
	else
		return false;
}
//----------------------------------------------------------------------------------
WSHTMLParser.prototype.isAbbreviation = function(sWord)
{
	// check if sWord has the form "a.s.a.p, p.s, etc"
	if (sWord.length < 2)
		return false;

	var bDotNow = false;
	for (var i=0; i < sWord.length - 1; ++i)
	{
		if (bDotNow)
		{
			if (sWord.charAt(i) != '.')
				return false;
		}
		else
		{
			if (! this.isAlpha(sWord.charAt(i)))
				return false;
		}
		bDotNow = !bDotNow;
	}
	var last_char = sWord.charAt(sWord.length - 1);
	return (!(bDotNow && this.isAlpha(last_char)));
}
//----------------------------------------------------------------------------------
WSHTMLParser.prototype.isNumber = function(sWord)
{
	if (sWord.length == 0)
		return false;

	var state_comma=false, state_dot=true, seen_dot=false;
	for (var i=0 ; i<sWord.length ; ++i)
	{
		var ch = sWord.charAt(i);
		switch (ch)
		{
			case '-': case '+':case '*': case '/': case '=':
				break;
			case '.':
				{
					if (! state_dot)
						return false;
					seen_dot = true;
					state_comma = false;
				}
				break;
			case ',':
				{
					if (! state_comma) // || seen_dot)
						return false;
					state_comma=false;
				}
				break;
			case 'x': case 'X':
				{
					if (i>0 && sWord.charAt(i-1) != '0')
						return false;
				}
				break;
			case 'E': case 'e':
				{
					if (i<=length-2 && !(sWord.charAt(i+1)=='+' || sWord.charAt(i+1)=='-'))
						return false;
				}
				break;
			default:
				{
					if (! this.isDigit(ch))
						return false;
					state_comma = true;
				}
		}
	}
	return true;
}
//----------------------------------------------------------------------------------
WSHTMLParser.prototype.isWholeWord = function(sWord)
{
	return (this.isAbbreviation(sWord) || this.isNumber(sWord) || this.isInternetAddress(sWord));
}
//----------------------------------------------------------------------------------
WSHTMLParser.prototype.addToken = function(vTokens,sToken,nPos)
{
	if (sToken.length > 0)
	{
		//alert("adding token: sWord=[" + sToken + "] pos=[" + nPos + "]");
		vTokens[vTokens.length] = {sWord: sToken, nPos: nPos, nLength: sToken.length};
	}
	return parseInt(nPos + sToken.length);
}
//----------------------------------------------------------------------------------
WSHTMLParser.prototype.extractWords = function(vTokens, sToken, nPos)
{
	if (this.isAbbreviation(sToken) || this.isNumber(sToken) || this.isInternetAddress(sToken))
	{
		var last_char = sToken.charAt(sToken.length - 1);
		if (this.isPunct(last_char))
		{
			var w1 = sToken.substr(0,sToken.length - 1);
			var w2 = last_char;
			nPos = this.addToken(vTokens,w1,nPos);
			nPos = this.addToken(vTokens,w2,nPos);
		}
		else
		{
			nPos = this.addToken(vTokens,sToken,nPos);
		}
	}
	else
	{
		var sWord = "";
		for (var i=0; i < sToken.length; ++i)
		{
			var ch = sToken.charAt(i);
			if (this.isLegalChar(sToken,i))
			{
				sWord += ch;
			}
			else //',' '.' and all the rest
			{
				nPos = this.addToken(vTokens,sWord,nPos);
				sWord = "";
				nPos = this.addToken(vTokens,ch,nPos);
				var sSuffix = sToken.substr(i+1,sToken.length -i-1);
				if (sSuffix.length > 0)
				{
					this.extractWords(vTokens,sSuffix,nPos);
				}
				return;
			}
		}
		nPos = this.addToken(vTokens,sWord,nPos);
	}
}
//----------------------------------------------------------------------------------
WSHTMLParser.prototype.skipWhitespaces = function(sText,nPos)
{
	while (nPos < sText.length && this.isWhiteSpace(sText.charAt(nPos)))
	{
		++nPos;
	}
	return nPos;
}
//----------------------------------------------------------------------------------
WSHTMLParser.prototype.getToken = function(sText,nPos)
{
	var sWord = "";
	//nPos = this.skipWhitespaces(sText,nPos);
	for (var i=nPos ; i < sText.length ; ++i)
	{
		var ch = sText.charAt(i);
		if (ch=='\n' || ch=='\"')
		{
			if (sWord.length == 0)
				sWord += ch;
			else
				--i;
			break;
		}
		else if (this.isWhiteSpace(ch))
		{
			if (sWord.length > 0)
				break;
		}
		else
		{
			sWord += ch;
		}
	}
	nPos = i+1;
	nPos = this.skipWhitespaces(sText,nPos);
	//alert("found token: [" + sWord + "] pos: [" + nPos + "]");
	return {sWord: sWord, nPos: nPos};
}
//-----------------------------------------------------------------------
WSHTMLParser.prototype.tokenizeText = function(vTokens,sText,nOffset)
{
	var nPos = 0;
	nPos = this.skipWhitespaces(sText,nPos);
	for (var oToken=this.getToken(sText,nPos); oToken.sWord.length > 0; oToken=this.getToken(sText,nPos))
	{
		this.extractWords(vTokens,oToken.sWord, nPos+nOffset);
		nPos = oToken.nPos;
	}
}
//-----------------------------------------------------------------------
WSHTMLParser.prototype.parseText = function(sText)
{
	var vTokens = new Array();
	this.tokenizeText(vTokens,sText,0);
	return vTokens;
}
//==============================================================================
//==============================================================================
// Code imported from WZHTMLParse.cpp
//==============================================================================
WSHTMLParser.prototype.getStartTextPosition = function(strContent, nCurrentPos)
{
	var nIndex = nCurrentPos;
	nIndex = this.skipWhitespaces(strContent,nCurrentPos);
	while (nIndex>=0 && nIndex<strContent.length && strContent.charAt(nIndex)=='<')
	{
		if (strContent.substr(nIndex,7) ==  "<script")
		{
			nIndex = strContent.indexOf("</script",nIndex);
		}
		else if (strContent.substr(nIndex,4) == "<!--")
		{
			nIndex = strContent.indexOf("-->",nIndex);
		}
		if (nIndex >= 0)
		{
			nIndex = strContent.indexOf(">",nIndex);
			if(nIndex >= 0)
			{
				++nIndex;
			}
		}
		nIndex = this.skipWhitespaces(strContent,nIndex);
	}
	return nIndex;
}
//-----------------------------------------------------------------------------------
WSHTMLParser.prototype.getStartTagPosition = function(strContent, nCurrentPos)
{
	var nIndex = strContent.indexOf('<',nCurrentPos);
	return nIndex;
}
//-----------------------------------------------------------------------------------
WSHTMLParser.prototype.getBodyPosition = function(strContent)
{
	var nBodyIndex = strContent.indexOf("<body");
	if (nBodyIndex >= 0)
	{
		nBodyIndex = this.getStartTextPosition(strContent,nBodyIndex);
	}
	else
	{
		nBodyIndex = 0;
	}
	return nBodyIndex;
}
//-----------------------------------------------------------------------------------
WSHTMLParser.prototype.findStartTokenIndex = function(strContent, nCurrentPos)
{
	//int nStart = strContent.find_first_not_of(" &",nCurrentPos);
	for (var nStart=nCurrentPos;
			nStart<strContent.length && (strContent.charAt(nStart)=='&' || strContent.charAt(nStart)==' ');
			++nStart)
	{}
	if (nStart==nCurrentPos || nStart>=strContent.length)
	{
		return nStart;
	}
	else if (strContent.charAt(nStart-1) == '&')
	{
		var nFinish = this.findEndEscTokenIndex(strContent,nStart);
		if (nFinish >= 0 && nFinish < strContent.length)
		{
			++nFinish;
			nStart = this.findStartTokenIndex(strContent,nFinish);
		}
	}
	return nStart;
}
//-----------------------------------------------------------------------------------
WSHTMLParser.prototype.findEndEscTokenIndex = function(strContent, nCurrentPos)
{
	var nFinish = 0;
	for (nFinish=nCurrentPos;
			nFinish<strContent.length && nFinish<nCurrentPos+5 && strContent.charAt(nFinish)!=';';
			++nFinish)
	{}
	if (nFinish>=0 && nFinish<strContent.length && strContent.charAt(nFinish)!=';')
	{
		nFinish = -1;
	}
	return nFinish;
}
//-----------------------------------------------------------------------------------
WSHTMLParser.prototype.findEndTokenIndex = function(strContent, nCurrentPos)
{
	//int nFinish = strContent.find_first_of(" &;",nCurrentPos,5); //find ' ' '&' ';' within 5 next characters
	for (var nFinish=nCurrentPos;
			nFinish<strContent.length && strContent.charAt(nFinish)!=' ' && strContent.charAt(nFinish)!=';' && strContent.charAt(nFinish)!='&';
			++nFinish)
	{}
	if (nFinish>=0 && nFinish<strContent.length && strContent.charAt(nFinish)!=';')
	{
		--nFinish;
	}
	return nFinish;
}
//-----------------------------------------------------------------------
WSHTMLParser.prototype.getHTMLToken = function(strContent, nCurrentPos)
{
	var nStart, nFinish;
	var sToken = "";
	for ( ; nCurrentPos < strContent.length ; ++nCurrentPos)
	{
		nStart = this.findStartTokenIndex(strContent,nCurrentPos);
		if (nStart<0 || nStart>=strContent.length)
		{
			nCurrentPos = strContent.length;
			break;
		}
		nFinish = this.findEndTokenIndex(strContent,nStart);
		if (nFinish<0 || nFinish>=strContent.length)
		{
			nFinish = strContent.length - 1;
		}
		sToken = strContent.substr(nStart,nFinish-nStart+1);
		nCurrentPos = nFinish;
		if (sToken.length > 0)
		{
			break;
		}
	}
	//alert("token=[" + sToken + "] nStart=[" + nStart + "] nFinish=[" + nFinish + "]");
	return {sWord: sToken, nStart: nStart, nFinish: nFinish};
}
//-----------------------------------------------------------------------------------
WSHTMLParser.prototype.tokenizeHTML = function(vTokens,strSentence,nOffset)
{
	var nPos = 0;
	for (var oToken=this.getHTMLToken(strSentence,nPos); oToken.sWord.length>0; oToken=this.getHTMLToken(strSentence,nPos))
	{
		//alert("htmlToken = " + oToken.sWord);
		this.tokenizeText(vTokens,oToken.sWord, nOffset + oToken.nStart);
		nPos = oToken.nFinish+1;
	}
	/*
	var nPos = this.findStartTokenIndex(strSentence,0);
	for (var oToken=this.getHTMLToken(strSentence,nPos); oToken.sWord.length>0; oToken=this.getHTMLToken(strSentence,nPos))
	{
		this.tokenizeText(vTokens,oToken.sWord,nPos+nOffset);
		nPos = oToken.nNewPos;
	}
	*/
}
//-----------------------------------------------------------------------------------
WSHTMLParser.prototype.parseHTML = function(strOriginalContent)
{
	//alert("strOriginalContent = \n" + strOriginalContent);
	var strContent = strOriginalContent.toLowerCase();
	var nStartText=0, nEndText=0;
	var vTokens = new Array();
	nStartText = this.getBodyPosition(strContent);

	for ( ; nStartText>=0 && nStartText<strContent.length; nStartText=nEndText)
	{
		nStartText = this.getStartTextPosition(strContent,nStartText);
		if (nStartText < 0)
		{
			break;
		}
		nEndText = this.getStartTagPosition(strContent,nStartText);
		if (nEndText < 0 || nEndText >= strContent.length)
		{
			nEndText = strContent.length;
		}
		var strSentence = strOriginalContent.substr(nStartText,nEndText-nStartText);
		//alert("At parse: strSentence=\n" + strSentence);
		this.tokenizeHTML(vTokens,strSentence,nStartText);
	}
	return vTokens;
}
//==============================================================================

