// Unicode.cpp
// (c) 2003-2004 exeal

#include "StdAfx.h"
#include "Encoder.h"

using namespace Ascension::Encodings;
using namespace std;
using namespace Manah::Text;


DEFINE_ENCODER_CLASS(CPEX_UNICODE_UTF5, Unicode_Utf5);
DEFINE_ENCODER_CLASS(CP_UTF7, Unicode_Utf7);
DEFINE_ENCODER_CLASS(CP_UTF8, Unicode_Utf8);
DEFINE_ENCODER_CLASS(CPEX_UNICODE_UTF16LE, Unicode_Utf16LE);
DEFINE_ENCODER_CLASS(CPEX_UNICODE_UTF16BE, Unicode_Utf16BE);
DEFINE_ENCODER_CLASS(CPEX_UNICODE_UTF32LE, Unicode_Utf32LE);
DEFINE_ENCODER_CLASS(CPEX_UNICODE_UTF32BE, Unicode_Utf32BE);
DEFINE_DETECTOR(CPEX_UNICODE_AUTODETECT, Unicode);


namespace {
	///	 UTF-8 INebgƂ݂ȂAꂪoCgԂ
	inline size_t GetByteLengthAsUTF8Char(const uchar* pszSrc, size_t cch) {
		if(cch > 5) {	// 6oCg
			// 1111 110x 10xx xxxx xxxx xxxx xxxx xxxx xxxx xxxx 10xx xxxx
			if((pszSrc[0] & 0xFE) == 0xFC
					&& (pszSrc[1] & 0xC0) == 0x80
					&& (pszSrc[5] & 0xC0) == 0x80)
				return 6;
		}
		if(cch > 4) {	// 5oCg
			// 1111 10xx 10xx xxxx xxxx xxxx xxxx xxxx 10xx xxxx
			if((pszSrc[0] & 0xFC) == 0xF8
					&& (pszSrc[1] & 0xC0) == 0x80
					&& (pszSrc[4] & 0xC0) == 0x80)
				return 5;
		}
		if(cch > 3) {	// 4oCg
			// 1111 0xxx 10xx xxxx xxxx xxxx 10xx xxxx
			if((pszSrc[0] & 0xF8) == 0xF0
					&& (pszSrc[1] & 0xC0) == 0x80
					&& (pszSrc[3] & 0xC0) == 0x80)
				return 4;
		}
		if(cch > 2) {	// 3oCg
			// 1110 xxxx 10xx xxxx 10xx xxxx
			if((pszSrc[0] & 0xF0) == 0xE0
					&& (pszSrc[1] & 0xC0) == 0x80
					&& (pszSrc[2] & 0xC0) == 0x80)
				return 3;
		}
		if(cch > 1) {	// 2oCg
			// 110x xxxx 10xx xxxx
			if((pszSrc[0] & 0xE0) == 0xC0
					&& (pszSrc[1] & 0xC0) == 0x80)
				return 2;
		}
		// 1oCgs
		// 0xxx xxxx
		if((pszSrc[0] & 0x80) == 0x00)
			return 1;

		return 0;
	}

	///	̐擪 UTF-16  BOM 邩ǂԂ (1: gGfBA, 2:rbOGfBA, 0:)
	inline size_t HasBOMOfUTF16(const uchar* pszText, size_t cch) {
		if(cch < 2)
			return 0;
		if(memcmp(pszText, szBom_Utf16LE, 2) == 0)		return 1;
		else if(memcmp(pszText, szBom_Utf16BE, 2) == 0)	return 2;
		else											return 0;
	}

	///	̐擪 UTF-32  BOM 邩ǂԂ (1: gGfBA, 2:rbOGfBA, 0:)
	inline size_t HasBOMOfUTF32(const uchar* pszText, size_t cch) {
		if(cch < 4)
			return 0;
		if(memcmp(pszText, szBom_Utf32LE, 4) == 0)		return 1;
		else if(memcmp(pszText, szBom_Utf32BE, 4) == 0)	return 2;
		else											return 0;
	}

	///	̐擪 UTF-8  BOM 邩ǂԂ
	inline bool HasBOMOfUTF8(const uchar* pszText, size_t cch) {
		return cch >= 3 && memcmp(pszText, szBom_Utf8, 3) == 0;
	}

	///	 UTF-8 ƂĉLԂ
	inline size_t IsUTF8String(const uchar* pszText, size_t cch) {
		size_t	i = 0;
		if(HasBOMOfUTF8(pszText, cch))
			return cch;
		while(true) {
			if(i >= cch)
				break;
			if(pszText[i] == 0x1B)
				break;
			const size_t	j = GetByteLengthAsUTF8Char(pszText + i, cch - i);
			if(j == 0)
				break;
			i += j;
		}
		return i;
	}

	void DetectCodePage_Unicode(const char* psz, size_t cch, CodePage& cpResult, size_t& cchConvertable) {
		cpResult = 0;
		cchConvertable = cch;
		if(HasBOMOfUTF8(reinterpret_cast<const uchar*>(psz), cch))
			cpResult = CP_UTF8;
		else if(size_t n = HasBOMOfUTF16(reinterpret_cast<const uchar*>(psz), cch))
			cpResult = (n == 1) ? CPEX_UNICODE_UTF16LE : CPEX_UNICODE_UTF16BE;
		else if(size_t n = HasBOMOfUTF32(reinterpret_cast<const uchar*>(psz), cch))
			cpResult = (n == 1) ? CPEX_UNICODE_UTF32LE : CPEX_UNICODE_UTF32BE;
		if(cpResult != 0)
			return;
		cpResult = CP_UTF8;
		cchConvertable = IsUTF8String(reinterpret_cast<const uchar*>(psz), cch);
	}
}


// UTF-16 little endian /////////////////////////////////////////////////////

CEncoder_Unicode_Utf16LE::CEncoder_Unicode_Utf16LE() {
}

size_t CEncoder_Unicode_Utf16LE::ConvertFromUnicode(CFU_ARGLIST) {
	CFU_CHECKARGS();

	size_t	iSrc = 0, iDest = 0;
	while(iSrc < cchSrc && iDest + 1 < cchDest) {
		pszDest[iDest + 0] = (pwszSrc[iSrc] & 0x00FF) >> 0;
		pszDest[iDest + 1] = (pwszSrc[iSrc] & 0xFF00) >> 8;
		iDest += 2;
		++iSrc;
	}
	return iDest;
}

size_t CEncoder_Unicode_Utf16LE::ConvertToUnicode(CTU_ARGLIST) {
	CTU_CHECKARGS();

	size_t	iSrc = 0, iDest = 0;
	while(iSrc + 1 < cchSrc && iDest < cchDest) {
		pwszDest[iDest] = static_cast<uchar>(pszSrc[iSrc]) | static_cast<wchar_t>(pszSrc[iSrc + 1] << 8);
		++iDest;
		iSrc += 2;
	}
	return iDest;
}

uchar CEncoder_Unicode_Utf16LE::GetMaxCharacterLength() const {
	return 2;
}


// UTF-16 big endian ////////////////////////////////////////////////////////

CEncoder_Unicode_Utf16BE::CEncoder_Unicode_Utf16BE() {
}

size_t CEncoder_Unicode_Utf16BE::ConvertFromUnicode(CFU_ARGLIST) {
	CFU_CHECKARGS();

	size_t	iSrc = 0, iDest = 0;
	while(iSrc < cchSrc && iDest + 1 < cchDest) {
		pszDest[iDest + 0] = (pwszSrc[iSrc] & 0xFF00) >> 8;
		pszDest[iDest + 1] = (pwszSrc[iSrc] & 0x00FF) >> 0;
		iDest += 2;
		++iSrc;
	}
	return iDest;
}

size_t CEncoder_Unicode_Utf16BE::ConvertToUnicode(CTU_ARGLIST) {
	CTU_CHECKARGS();

	size_t	iSrc = 0, iDest = 0;
	while(iSrc + 1 < cchSrc && iDest < cchDest) {
		pwszDest[iDest] = static_cast<wchar_t>(pszSrc[iSrc] << 8) | static_cast<uchar>(pszSrc[iSrc + 1]);
		++iDest;
		iSrc += 2;
	}
	return iDest;
}

uchar CEncoder_Unicode_Utf16BE::GetMaxCharacterLength() const {
	return 2;
}


// UTF-32 little endian ////////////////////////////////////////////////////////////////////

CEncoder_Unicode_Utf32LE::CEncoder_Unicode_Utf32LE() {
}

size_t CEncoder_Unicode_Utf32LE::ConvertFromUnicode(CFU_ARGLIST) {
	CFU_CHECKARGS();

	size_t	iDest = 0;
	for(size_t iSrc = 0; iSrc < cchSrc && iDest + 3 < cchDest; ++iSrc) {
		const CodePoint	cp = DecodeUTF16SurrogatePairToCodePoint(pwszSrc + iSrc, cchSrc - iSrc);

		pszDest[iDest + 0] = static_cast<char>((cp & 0x000000FF) >> 0);
		pszDest[iDest + 1] = static_cast<char>((cp & 0x0000FF00) >> 8);
		pszDest[iDest + 2] = static_cast<char>((cp & 0x00FF0000) >> 16);
		pszDest[iDest + 3] = static_cast<char>((cp & 0xFF000000) >> 24);

		if(cp > 0xFFFF)
			++iSrc;
		iDest += 4;
	}
	return iDest;
}

size_t CEncoder_Unicode_Utf32LE::ConvertToUnicode(CTU_ARGLIST) {
	CTU_CHECKARGS();

	size_t	iDest = 0;
	for(size_t iSrc = 0; iSrc + 3 < cchSrc && iDest < cchDest; iSrc += 4) {
		const uchar*	pszSrc_ = reinterpret_cast<const uchar*>(pszSrc) + iSrc;
		const CodePoint	cp = pszSrc_[0] + (pszSrc_[1] << 8) + (pszSrc_[2] << 16) + (pszSrc_[3] << 24);

		if(EncodeCodePointToUTF16SurrogatePair(cp, pwszDest + iDest))
			iDest += 2;
		else
			++iDest;
	}
	return iDest;
}

uchar CEncoder_Unicode_Utf32LE::GetMaxCharacterLength() const {
	return 4;
}


// UTF-32 big endian ////////////////////////////////////////////////////////////////////

CEncoder_Unicode_Utf32BE::CEncoder_Unicode_Utf32BE() {
}

size_t CEncoder_Unicode_Utf32BE::ConvertFromUnicode(CFU_ARGLIST) {
	CFU_CHECKARGS();

	size_t	iDest = 0;
	for(size_t iSrc = 0; iSrc < cchSrc && iDest + 3 < cchDest; ++iSrc) {
		const CodePoint	cp = DecodeUTF16SurrogatePairToCodePoint(pwszSrc + iSrc, cchSrc - iSrc);

		pszDest[iDest + 0] = static_cast<char>((cp & 0xFF000000) >> 24);
		pszDest[iDest + 1] = static_cast<char>((cp & 0x00FF0000) >> 16);
		pszDest[iDest + 2] = static_cast<char>((cp & 0x0000FF00) >> 8);
		pszDest[iDest + 3] = static_cast<char>((cp & 0x000000FF) >> 0);

		if(cp > 0xFFFF)
			++iSrc;
		iDest += 4;
	}
	return iDest;
}

size_t CEncoder_Unicode_Utf32BE::ConvertToUnicode(CTU_ARGLIST) {
	CTU_CHECKARGS();

	size_t	iDest = 0;
	for(size_t iSrc = 0; iSrc + 3 < cchSrc && iDest < cchDest; iSrc += 4) {
		const uchar*	pszSrc_ = reinterpret_cast<const uchar*>(pszSrc) + iSrc;
		const CodePoint	cp = pszSrc_[3] + (pszSrc_[2] << 8) + (pszSrc_[1] << 16) + (pszSrc_[0] << 24);

		if(EncodeCodePointToUTF16SurrogatePair(cp, pwszDest + iDest))
			iDest += 2;
		else
			++iDest;
	}
	return iDest;
}

uchar CEncoder_Unicode_Utf32BE::GetMaxCharacterLength() const {
	return 4;
}


// UTF-5 ////////////////////////////////////////////////////////////////////

/**
 *	UTF-5  UTF-16 ɕϊ
 *	@param pnCodePoint	ϊ̃R[h|Cg
 *	@param pszSrc		ϊ镶ւ̃|C^
 *	@param cchSrc		ϊ镶̃oCg
 *	@return				ϊɎg̃oCgB0Ǝs
 */
inline size_t DecodeUTF5CharToUnicode(CodePoint* pnCodePoint, const uchar* pszSrc, size_t cchSrc) {
	assert(pnCodePoint != 0 && pszSrc != 0);

	if(pszSrc[0] < 'G' || pszSrc[0] > 'V')
		return 0;

	size_t	iSrc = 1;
	*pnCodePoint = pszSrc[0] - 'G';
	for(; iSrc < cchSrc; ++iSrc) {
		if(pszSrc[iSrc] >= '0' && pszSrc[iSrc] <= '9') {
			*pnCodePoint <<= 4;
			*pnCodePoint |= pszSrc[iSrc] - '0';
		} else if(pszSrc[iSrc] >= 'A' && pszSrc[iSrc] <= 'F'){
			*pnCodePoint <<= 4;
			*pnCodePoint |= pszSrc[iSrc] - 'A' + 0x0A;
		} else
			break;
	}
	return iSrc;
}

/**
 *	UTF-16  UTF-5 ɕϊ
 *	@param pszDest	ϊ̕ւ̃|C^
 *	@param pszSrc	ϊ镶ւ̃|C^
 *	@param cchSrc	ϊ镶̃oCg
 *	@return			ϊ̃̕oCgB0Ǝs
 */
inline size_t EncodeUnicodeCharToUTF5(char* pszDest, const wchar_t* pwszSrc, size_t cch) {
	assert(pszDest != 0 && pwszSrc != 0);
#define D2C(n)	((n) < 0x0A) ? ((n) + '0') : ((n) - 0x0A + 'A')

	const CodePoint	cp = DecodeUTF16SurrogatePairToCodePoint(pwszSrc, cch);

	if(cp < 0x00000010) {
		pszDest[0] = static_cast<char>((cp & 0x0000000F) >> 0) + 'G';
		return 1;
	} else if(cp < 0x00000100) {
		pszDest[0] = static_cast<char>((cp & 0x000000F0) >> 4) + 'G';
		pszDest[1] = D2C(static_cast<char>((cp & 0x0000000F) >> 0));
		return 2;
	} else if(cp < 0x00001000) {
		pszDest[0] = static_cast<char>((cp & 0x00000F00) >> 8) + 'G';
		pszDest[1] = D2C(static_cast<char>((cp & 0x000000F0) >> 4));
		pszDest[2] = D2C(static_cast<char>((cp & 0x0000000F) >> 0));
		return 3;
	} else if(cp < 0x00010000) {
		pszDest[0] = static_cast<char>((cp & 0x0000F000) >> 12) + 'G';
		pszDest[1] = D2C(static_cast<char>((cp & 0x00000F00) >> 8));
		pszDest[2] = D2C(static_cast<char>((cp & 0x000000F0) >> 4));
		pszDest[3] = D2C(static_cast<char>((cp & 0x0000000F) >> 0));
		return 4;
	} else if(cp < 0x00100000) {
		pszDest[0] = static_cast<char>((cp & 0x000F0000) >> 16) + 'G';
		pszDest[1] = D2C(static_cast<char>((cp & 0x0000F000) >> 12));
		pszDest[2] = D2C(static_cast<char>((cp & 0x00000F00) >> 8));
		pszDest[3] = D2C(static_cast<char>((cp & 0x000000F0) >> 4));
		pszDest[4] = D2C(static_cast<char>((cp & 0x0000000F) >> 0));
		return 5;
	} else if(cp < 0x01000000) {
		pszDest[0] = static_cast<char>((cp & 0x00F00000) >> 20) + 'G';
		pszDest[1] = D2C(static_cast<char>((cp & 0x000F0000) >> 16));
		pszDest[2] = D2C(static_cast<char>((cp & 0x0000F000) >> 12));
		pszDest[3] = D2C(static_cast<char>((cp & 0x00000F00) >> 8));
		pszDest[4] = D2C(static_cast<char>((cp & 0x000000F0) >> 4));
		pszDest[5] = D2C(static_cast<char>((cp & 0x0000000F) >> 0));
		return 6;
	} else if(cp < 0x10000000) {
		pszDest[0] = static_cast<char>((cp & 0x0F000000) >> 24) + 'G';
		pszDest[1] = D2C(static_cast<char>((cp & 0x00F00000) >> 20));
		pszDest[2] = D2C(static_cast<char>((cp & 0x000F0000) >> 16));
		pszDest[3] = D2C(static_cast<char>((cp & 0x0000F000) >> 12));
		pszDest[4] = D2C(static_cast<char>((cp & 0x00000F00) >> 8));
		pszDest[5] = D2C(static_cast<char>((cp & 0x000000F0) >> 4));
		pszDest[6] = D2C(static_cast<char>((cp & 0x0000000F) >> 0));
		return 7;
	} else if(cp < 0x80000000) {
		pszDest[0] = static_cast<char>((cp & 0xF0000000) >> 28) + 'G';
		pszDest[1] = D2C(static_cast<char>((cp & 0x0F000000) >> 24));
		pszDest[2] = D2C(static_cast<char>((cp & 0x00F00000) >> 20));
		pszDest[3] = D2C(static_cast<char>((cp & 0x000F0000) >> 16));
		pszDest[4] = D2C(static_cast<char>((cp & 0x0000F000) >> 12));
		pszDest[5] = D2C(static_cast<char>((cp & 0x00000F00) >> 8));
		pszDest[6] = D2C(static_cast<char>((cp & 0x000000F0) >> 4));
		pszDest[7] = D2C(static_cast<char>((cp & 0x0000000F) >> 0));
		return 8;
	} else
		return 0;
#undef D2C
}

CEncoder_Unicode_Utf5::CEncoder_Unicode_Utf5() {
}

size_t CEncoder_Unicode_Utf5::ConvertFromUnicode(CFU_ARGLIST) {
	CFU_CHECKARGS();

	size_t	iDest = 0;
	size_t	cConverted = 0;
	for(size_t iSrc = 0; iSrc < cchSrc && iDest < cchDest; ++iSrc) {
		cConverted = EncodeUnicodeCharToUTF5(pszDest + iDest, pwszSrc + iSrc, cchSrc - iSrc);
		if(cConverted == 0) {
			pszDest[iSrc] = static_cast<const char>(pwszSrc[iSrc]);
			++iDest;
		} else
			iDest += cConverted;
	}
	return iDest;
}

size_t CEncoder_Unicode_Utf5::ConvertToUnicode(CTU_ARGLIST) {
	CTU_CHECKARGS();

	size_t	iSrc = 0, iDest = 0, cDecoded;
	CodePoint	cp;

	while(iSrc < cchSrc && iDest < cchDest) {
		cDecoded = DecodeUTF5CharToUnicode(&cp,
			reinterpret_cast<const uchar*>(pszSrc + iSrc), cchSrc - iSrc);
		if(cDecoded == 0) {
			pwszDest[iDest] = pszSrc[iSrc];
			++iDest;
			cDecoded = 1;
		} else {
			EncodeCodePointToUTF16SurrogatePair(cp, pwszDest + iDest);
			iDest += (cDecoded >= 5) ? 2 : 1;
		}
		iSrc += cDecoded;
	}
	return iDest;
}

uchar CEncoder_Unicode_Utf5::GetMaxCharacterLength() const {
	return 6; // as UTF-16 range
}


// UTF-7 ////////////////////////////////////////////////////////////////////

///	UTF-7 ̏W B (C BASE64 GR[hɎg) ̕Ԃ
inline bool IsUtf7SetB(uchar ch) {
	return toBoolean(isalnum(ch)) || ch == '+' || ch == '/';
}

///	UTF-7 ̏W D (̂܂܃Rs[ł) ̕Ԃ
inline bool IsUtf7SetD(wchar_t ch) {
	if(ch > L'z')
		return false;
	return toBoolean(isalpha(static_cast<uchar>(ch)))
			|| (ch >= L',' && ch <= L':')
			|| (ch >= L'\'' && ch <= L')')
			|| ch == L'\?' || ch == L'\t' || ch == L' ' || ch == L'\r' || ch == L'\n';
}

CEncoder_Unicode_Utf7::CEncoder_Unicode_Utf7() {
}

size_t CEncoder_Unicode_Utf7::ConvertFromUnicode(CFU_ARGLIST) {
	CFU_CHECKARGS();

	static const uchar	szBase64[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
	size_t				iSrc = 0, iDest = 0;

	while(true /* iSrc < cchSrc */) {
		// C BASE64 GR[hKvȕ񒷂߂
		size_t	iBase64End = iSrc;
		for(; iBase64End < cchSrc; ++iBase64End) {
			if(IsUtf7SetD(pwszSrc[iBase64End]) || pwszSrc[iBase64End] == L'+')
				break;
		}

		// C BASE64 GR[h
		if(iBase64End != iSrc) {
			pszDest[iDest++] = '+';
			while(iSrc < iBase64End) {
				pszDest[iDest++] = szBase64[pwszSrc[iSrc] >> 10];
				pszDest[iDest++] = szBase64[(pwszSrc[iSrc] >> 4) & 0x3F];
				if(iSrc + 1 >= iBase64End)
					pszDest[iDest++] = szBase64[(pwszSrc[iSrc] << 2)  & 0x3F];
				else {
					pszDest[iDest++] = szBase64[((pwszSrc[iSrc] << 2) | (pwszSrc[iSrc + 1] >> 14)) & 0x3F];
					pszDest[iDest++] = szBase64[(pwszSrc[iSrc + 1] >> 8) & 0x3F];
					pszDest[iDest++] = szBase64[(pwszSrc[iSrc + 1] >> 2) & 0x3F];
					if(iSrc + 2 >= iBase64End)
						pszDest[iDest++] = szBase64[(pwszSrc[iSrc + 1] << 4) & 0x3F];
					else {
						pszDest[iDest++] = szBase64[((pwszSrc[iSrc + 1] << 4) | (pwszSrc[iSrc + 2]) >> 12) & 0x3F];
						pszDest[iDest++] = szBase64[(pwszSrc[iSrc + 2] >> 6) & 0x3F];
						pszDest[iDest++] = szBase64[(pwszSrc[iSrc + 2] >> 0) & 0x3F];
						++iSrc;
					}
					++iSrc;
				}
				++iSrc;
			}
			pszDest[iDest++] = '-';
		}

		iSrc = iBase64End;
		if(iSrc < cchSrc) {
			if(pwszSrc[iSrc] == L'+') {	// '+' -> '+-'
				pszDest[iDest++] = '+';
				pszDest[iDest++] = '-';
				++iSrc;
			} else	// ̂܂܃Rs[
				pszDest[iDest++] = static_cast<uchar>(pwszSrc[iSrc++]);
		} else
			break;
	}
	return iDest;
}

size_t CEncoder_Unicode_Utf7::ConvertToUnicode(CTU_ARGLIST) {
	CTU_CHECKARGS();

	static const uchar	arrBase64[] = {
		0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,	//
		0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,	//
		0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,	//
		0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,	//
		0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,	//  !"#$%&'
		0xFF, 0xFF, 0xFF, 0x3E, 0xFF, 0xFF, 0xFF, 0x3F,	// ()*+,-./
		0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B,	// 01234567
		0x3C, 0x3D, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,	// 89:;<=>?
		0xFF, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,	// @ABCDEFG
		0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E,	// HIJKLMNO
		0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,	// PQRSTUVW
		0x17, 0x18, 0x19, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,	// XYZ[\]^_
		0xFF, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20,	// `abcdefg
		0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,	// hijklmno
		0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, 0x30,	// pqrstuvw
		0x31, 0x32, 0x33, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,	// xyz{|}~
	};

	size_t	iSrc = 0, iDest = 0;
	bool	bInBase64 = false;

	while(iSrc < cchSrc) {
		if(!bInBase64) {
			if(pszSrc[iSrc] == '+') {
				if(iSrc + 1 < cchSrc && pszSrc[iSrc + 1] == '-') {	// "+-" -> "+"
					pwszDest[iDest++] = L'+';
					iSrc += 2;
				} else {
					++iSrc;
					bInBase64 = true;
				}
			} else
				pwszDest[iDest++] = pszSrc[iSrc++];
		} else {
			// C BASE64 GR[h̒߂
			size_t	iBase64End = iSrc;
			while(iBase64End < cchSrc && IsUtf7SetB(pszSrc[iBase64End]))
				++iBase64End;

			// fR[h
			while(iSrc < iBase64End) {
				const size_t	cEncodeChars = min<size_t>(iBase64End - iSrc, 8);	// 1xɃfR[hłoCg
											pwszDest[iDest + 0]  = arrBase64[pszSrc[iSrc + 0]] << 10;
				if(iSrc + 1 < iBase64End)	pwszDest[iDest + 0] |= arrBase64[pszSrc[iSrc + 1]] << 4;
				if(iSrc + 2 < iBase64End)	pwszDest[iDest + 0] |= arrBase64[pszSrc[iSrc + 2]] >> 2;
				if(iSrc + 3 < iBase64End) {	pwszDest[iDest + 1]  = arrBase64[pszSrc[iSrc + 2]] << 14;
											pwszDest[iDest + 1] |= arrBase64[pszSrc[iSrc + 3]] << 8;}
				if(iSrc + 4 < iBase64End)	pwszDest[iDest + 1] |= arrBase64[pszSrc[iSrc + 4]] << 2;
				if(iSrc + 5 < iBase64End)	pwszDest[iDest + 1] |= arrBase64[pszSrc[iSrc + 5]] >> 4;
				if(iSrc + 6 < iBase64End) {	pwszDest[iDest + 2]  = arrBase64[pszSrc[iSrc + 5]] << 12;
											pwszDest[iDest + 2] |= arrBase64[pszSrc[iSrc + 6]] << 6;}
				if(iSrc + 7 < iBase64End)	pwszDest[iDest + 2] |= arrBase64[pszSrc[iSrc + 7]] << 0;

				iSrc += cEncodeChars;
				++iDest;
				if(cEncodeChars > 3)	++iDest;
				if(cEncodeChars > 6)	++iDest;
			}

			iSrc = iBase64End;
			if(iSrc < cchSrc && pszSrc[iSrc] == '-')
				++iSrc;
			bInBase64 = false;
		}
	}
	return iDest;
}

uchar CEncoder_Unicode_Utf7::GetMaxCharacterLength() const {
	return 8;
}


// UTF-8 ////////////////////////////////////////////////////////////////////

/**
 *	UTF-8  UTF-16 ɕϊ (: MultiByteToWideChar)
 *	@param cp		[out] ϊ̃R[h|Cg
 *	@param pszSrc	ϊ镶ւ̃|C^
 *	@param cchSrc	ϊ镶̃oCg
 *	@return			ϊɎg̃oCgB0Ǝs
 */
inline size_t DecodeUTF8CharToUnicode(CodePoint& cp, const uchar* pszSrc, size_t cchSrc) {
	assert(pszSrc != 0);

	const size_t	len = GetByteLengthAsUTF8Char(pszSrc, cchSrc);

	switch(len) {
/*	case 6:	// 6oCg
		// 1111 110u  10vv vvvv  ...  10zz zzzz -> 0uvv vvvv wwww wwxx xxxx yyyy yyzz zzzz
		cp = ((pszSrc[0] & 0x01) << 30)
				| ((pszSrc[1] & 0x3F) << 24)
				| ((pszSrc[2] & 0x3F) << 18)
				| ((pszSrc[3] & 0x3F) << 12)
				| ((pszSrc[4] & 0x3F) << 6)
				| ((pszSrc[5] & 0x3F) << 0);
		break;
	case 5:	// 5oCg
		// 1111 10vv  10ww wwww  ...  10zz zzzz -> 0000 00vv wwww wwxx xxxx yyyy yyzz zzzz
		cp = ((pszSrc[0] & 0x03) << 24)
				| ((pszSrc[1] & 0x3F) << 18)
				| ((pszSrc[2] & 0x3F) << 12)
				| ((pszSrc[3] & 0x3F) << 6)
				| ((pszSrc[4] & 0x3F) << 0);
		break;
*/	case 4:	// 4oCg
		// 1111 0www  10xx xxxx  10yy yyyy  10zz zzzz -> 0000 0000 000w wwxx xxxx yyyy yyzz zzzz
		cp = ((pszSrc[0] & 0x07) << 18)
				| ((pszSrc[1] & 0x3F) << 12)
				| ((pszSrc[2] & 0x3F) << 6)
				| ((pszSrc[3] & 0x3F) << 0);
		break;
	case 3:	// 3oCg
		// 1110 xxxx  10yy yyyy  10zz zzzz -> xxxx yyyy yyzz zzzz
		cp = ((pszSrc[0] & 0x0F) << 12)
				| ((pszSrc[1] & 0x3F) << 6)
				| ((pszSrc[2] & 0x3F) << 0);
		break;
	case 2:	// 2oCg
		// 110y yyyy  10zz zzzz -> 0000 0yyy yyzz zzzz
		cp = ((pszSrc[0] & 0x1F) << 6)
				| ((pszSrc[1] & 0x3F) << 0);
		break;
	case 1:	// 1oCg
		// 0zzz zzzz -> 0000 0000 0zzz zzzz
		cp = pszSrc[0];
		break;
	}

	return len;
}

/**
 *	UTF-16  UTF-8 ɕϊ (: WideCharToMultiByte)
 *	@param pszDest	ϊ̕ւ̃|C^
 *	@param pszSrc	ϊ镶ւ̃|C^
 *	@param cchSrc	ϊ镶̃oCg
 *	@return			ϊ̃̕oCgB0Ǝs
 */
inline size_t EncodeUnicodeCharToUTF8(char* pszDest, const wchar_t* pwszSrc, size_t cch) {
	assert(pszDest != 0 && cch != 0);

	const CodePoint	cp = DecodeUTF16SurrogatePairToCodePoint(pwszSrc, cch);

	if(cp <= 0x0000007F) {	// 1oCg
		// 0000 0000  0zzz zzzz -> 0zzz zzzz
		pszDest[0] = static_cast<const char>(cp);
		return 1;
	} else if(cp <= 0x000007FF) {	// 2oCg
		// 0000 0yyy  yyzz zzzz -> 110y yyyy  10zz zzzz
		pszDest[0] = 0xC0 | static_cast<char>(cp >> 6);
		pszDest[1] = 0x80 | static_cast<char>(cp & 0x003F);
		return 2;
	} else if(cp <= 0x0000FFFF) {	// 3oCg
		// xxxx yyyy  yyzz zzzz -> 1110 xxxx  10yy yyyy  10zz zzzz
		pszDest[0] = 0xE0 | static_cast<char>((cp & 0xF000) >> 12);
		pszDest[1] = 0x80 | static_cast<char>((cp & 0x0FC0) >> 6);
		pszDest[2] = 0x80 | static_cast<char>((cp & 0x003F) >> 0);
		return 3;
	} else if(cp <= 0x0010FFFF) {	// 4oCg
		// 0000 0000  000w wwxx  xxxx yyyy  yyzz zzzz -> 1111 0www  10xx xxxx  10yy yyyy 10zz zzzz
		pszDest[0] = 0xF0 | static_cast<char>((cp & 0x001C0000) >> 18);
		pszDest[1] = 0x80 | static_cast<char>((cp & 0x0003F000) >> 12);
		pszDest[2] = 0x80 | static_cast<char>((cp & 0x00000FC0) >> 6);
		pszDest[3] = 0x80 | static_cast<char>((cp & 0x0000003F) >> 0);
		return 4;
/*	} else if(cp <= 0x03FFFFFF) {	// 5oCg
		// 0000 00vv  wwww wwxx  xxxx yyyy  yyzz zzzz -> 1111 10vv  10ww wwww  ...  10zz zzzz
		pszDest[0] = 0xF8 | static_cast<char>((cp & 0x03000000) >> 24);
		pszDest[1] = 0x80 | static_cast<char>((cp & 0x00FC0000) >> 18);
		pszDest[2] = 0x80 | static_cast<char>((cp & 0x0003F000) >> 12);
		pszDest[3] = 0x80 | static_cast<char>((cp & 0x00000FC0) >> 6);
		pszDest[4] = 0x80 | static_cast<char>((cp & 0x0000003F) >> 0);
		return 5;
	} else if(cp <= 0x7FFFFFFF) {	// 6oCg
		// 0uvv vvvv  wwww wwxx  xxxx yyyy  yyzz zzzz -> 1111 110u  10vv vvvv  ...  10zz zzzz
		pszDest[0] = 0xFC | static_cast<char>((cp & 0x40000000) >> 30);
		pszDest[1] = 0x80 | static_cast<char>((cp & 0x3F000000) >> 24);
		pszDest[2] = 0x80 | static_cast<char>((cp & 0x00FC0000) >> 18);
		pszDest[3] = 0x80 | static_cast<char>((cp & 0x3F03F000) >> 12);
		pszDest[4] = 0x80 | static_cast<char>((cp & 0x3F000FC0) >> 6);
		pszDest[5] = 0x80 | static_cast<char>((cp & 0x3F00003F) >> 0);
		return 6;
*/	} else	// s
		return 0;
}

CEncoder_Unicode_Utf8::CEncoder_Unicode_Utf8() {
}

size_t CEncoder_Unicode_Utf8::ConvertFromUnicode(CFU_ARGLIST) {
	CFU_CHECKARGS();

	size_t	iDest = 0;
	size_t	nConverted;

	for(size_t iSrc = 0; iSrc < cchSrc; ++iSrc) {
		nConverted = EncodeUnicodeCharToUTF8(pszDest + iDest, pwszSrc + iSrc, cchSrc - iSrc);
		if(nConverted == 0) {
			pszDest[iDest] = static_cast<const char>(pwszSrc[iSrc]);
			++iDest;
		} else {
			if(nConverted >= 4)
				++iSrc;
			iDest += nConverted;
		}
	}
	return iDest;
}

size_t CEncoder_Unicode_Utf8::ConvertToUnicode(CTU_ARGLIST) {
	CTU_CHECKARGS();

	size_t	iSrc = 0, iDest = 0;
	size_t	cDecoded;
	CodePoint	cp;

	if(cchSrc > 2
			&& pszSrc[0] == 0xEF
			&& pszSrc[1] == 0xBB
			&& pszSrc[2] == 0xBF)
		iSrc = 3;

	while(iSrc < cchSrc && iDest < cchDest) {
		cDecoded = DecodeUTF8CharToUnicode(cp,
			reinterpret_cast<const uchar*>(pszSrc + iSrc), cchSrc - iSrc);
		if(cDecoded == 0) {
			pwszDest[iDest] = pszSrc[iSrc];
			++iDest;
			cDecoded = 1;
		} else {
			EncodeCodePointToUTF16SurrogatePair(cp, pwszDest + iDest);
			iDest += (cDecoded >= 4) ? 2 : 1;
		}
		iSrc += cDecoded;
	}
	return iDest;
}

uchar CEncoder_Unicode_Utf8::GetMaxCharacterLength() const {
	return 4; // as UTF-16 range
}

/* [EOF] */