// Japanese.cpp
// (c) 2004 exeal

#include "StdAfx.h"
#include "Encoder.h"
#include <memory>	// std::auto_ptr

using namespace Ascension::Encodings;
using namespace std;


DEFINE_ENCODER_CLASS(CPEX_JAPANESE_SHIFTJIS, Japanese_ShiftJis);
//DEFINE_ENCODER_CLASS(CPEX_JAPANESE_SHIFTJIS2004, Japanese_ShiftJis2004);
DEFINE_ENCODER_CLASS(CPEX_JAPANESE_EUC, Japanese_EucJp);
//DEFINE_ENCODER_CLASS(CPEX_JAPANESE_EUC2004, Japanese_EucJis2004);
DEFINE_ENCODER_CLASS(51932, Japanese_EucJpWindows);
DEFINE_ENCODER_CLASS(CPEX_JAPANESE_ISO2022JP, Japanese_Iso2022Jp);
DEFINE_ENCODER_CLASS(CPEX_JAPANESE_ISO2022JP1, Japanese_Iso2022Jp1);
DEFINE_ENCODER_CLASS(CPEX_JAPANESE_ISO2022JP2, Japanese_Iso2022Jp2);
//DEFINE_ENCODER_CLASS(CPEX_JAPANESE_ISO2022JP2004, Japanese_Iso2022Jp2004);
//DEFINE_ENCODER_CLASS(50221, Japanese_Iso2022JpWindows);
DEFINE_DETECTOR(CPEX_JAPANESE_AUTODETECT, Japanese);

// ܂ĂȂ
//#define IMPLEMENT_JISX0213_BASE_ENCODERS


#define CONFIRM_ILLEGAL_CHAR(lhs)				\
	{											\
		if(pCallBack == 0 || (*pCallBack)()) {	\
			lhs = __DEFAULT_CHAR;				\
			pCallBack = 0;						\
		} else									\
			return 0;							\
	}

namespace {
	// JIS <-> UCS ϊe[u (make_table.pl 쐬)
	const ushort	arrJISX0208ToUCS[] = {
		#include "Japanese_utils\JISX0208_A2U"
	};
	const ushort	arrUCSToJISX0208[] = {
		#include "Japanese_utils\JISX0208_U2A"
	};
	const ushort	arrJISX0212ToUCS[] = {
		#include "Japanese_utils\JISX0212_A2U"
	};
	const ushort	arrUCSToJISX0212[] = {
		#include "Japanese_utils\JISX0212_U2A"
	};
#ifdef IMPLEMENT_JISX0213_BASE_ENCODERS
	const ulong arrJISX0213P1ToUCS[] = {
		#include "Japanese_utils\JISX0213_plane1_A2U"
	};
	const ulong arrJISX0213P2ToUCS[] = {
		#include "Japanese_utils\JISX0213_plane2_A2U"
	};
#endif /* IMPLEMENT_JISX0213_BASE_ENCODERS */

	enum Iso2022Jp2Charset_G0 {ascii, jisx0201_roman, /*jisx0201_kana,*/ jisx0208, jisx0212, gb2312, ksc5601};
	enum Iso2022Jp2Charset_G2 {undesignated = ksc5601 + 1 , iso8859_1, iso8859_7};

	// JIS X0201 -> UCS ϊ
	inline wchar_t _JISX0201ToUCS(uchar ch) {
		if(ch == 0x5C)						return 0x00A5;		// Yen Sign
		else if(ch == 0x7E)					return 0x203E;		// Overline
		else if(ch >= 0x20 && ch <= 0x7D)	return ch;			// 7-bit
		else if(ch >= 0xA1 && ch <= 0xDF)	return ch + 0xFEC0;	// kana
		else								return 0x0000;		// invalid
	}

	// UCS -> JIS X0201 ϊ
	inline uchar _UCSToJISX0201(wchar_t ch) {
		if(ch >= 0x0020 && ch <= 0x005B)		return static_cast<uchar>(ch);			// 7-bit
		else if(ch >= 0x005D && ch <= 0x007D)	return static_cast<uchar>(ch);			// 7-bit
		else if(ch == 0x00A5)					return 0x5C;							// Yen Sign
		else if(ch == 0x203E)					return 0x7E;							// Overline
		else if(ch >= 0xFF61 && ch <= 0xFF9F)	return static_cast<uchar>(ch - 0xFEC0);	// kana
		else									return 0x00;							// invalid
	}

	// ISO-2022-JP-X -> UTF-16 ϊwp
	size_t _ConvertIso2022JpXToUtf16(CodePage cp,
			wchar_t* pwszDest, size_t cchDest, const uchar* pszSrc, size_t cchSrc, bool(*pCallBack)(void)) {
		assert(cp == CPEX_JAPANESE_ISO2022JP || cp == CPEX_JAPANESE_ISO2022JP1
			|| cp == CPEX_JAPANESE_ISO2022JP2 || cp == CPEX_JAPANESE_ISO2022JP2004);

		size_t					iSrc = 0, iDest = 0;
		Iso2022Jp2Charset_G0	g0 = ascii;
		Iso2022Jp2Charset_G2	g2 = undesignated;

		CEncoderFactory&	encoderFactory = CEncoderFactory::GetInstance();
		auto_ptr<CEncoder>	pIso88591Encoder, pIso88597Encoder;

		while(iSrc < cchSrc && iDest < cchDest) {
			if(pszSrc[iSrc] == 0x1B && cchSrc - iSrc >= 3) {	// expect esc. seq.
				if(memcmp(pszSrc + iSrc + 1, "(B", 2) == 0) {
					g0 = ascii; iSrc += 3; continue;
				} else if(memcmp(pszSrc + iSrc + 1, "(J", 2) == 0) {
					g0 = jisx0201_roman; iSrc += 3; continue;
//				} else if(memcmp(pszSrc + iSrc + 1, "(I", 2) == 0) {
//					g0 = jisx0201_kana; iSrc += 3; continue;
				} else if(memcmp(pszSrc + iSrc + 1, "$@", 2) == 0
						|| memcmp(pszSrc + iSrc + 1, "$B", 2) == 0) {
					g0 = jisx0208; iSrc += 3; continue;
				} else if((cp == CPEX_JAPANESE_ISO2022JP1 || cp == CPEX_JAPANESE_ISO2022JP2)
						&& cchSrc - iSrc >= 4 && memcmp(pszSrc + iSrc + 1, "$(D", 3) == 0) {
					g0 = jisx0212; iSrc += 4; continue;
				} else if(cp == CPEX_JAPANESE_ISO2022JP2) {
					if(memcmp(pszSrc + iSrc + 1, "$A", 2) == 0 && encoderFactory.IsValidCodePage(936)) {
						g0 = gb2312; iSrc += 3; continue;
					} else if(cchSrc - iSrc >= 4
							&& memcmp(pszSrc + iSrc + 1, "$(C", 3) == 0 && encoderFactory.IsValidCodePage(949)) {
						g0 = ksc5601; iSrc += 4; continue;
					} else if(memcmp(pszSrc + iSrc + 1, ".A", 2) == 0) {
						g2 = iso8859_1; iSrc += 3; continue;
					} else if(memcmp(pszSrc + iSrc + 1, ".F", 2) == 0) {
						g2 = iso8859_7; iSrc += 3; continue;
					}
				}
			}

			if((pszSrc[iSrc] <= 0x20 && pszSrc[iSrc] != 0x1B) || (pszSrc[iSrc] >= 0x80 && pszSrc[iSrc] < 0xA0)) {	// C0 AC1
				if(pszSrc[iSrc] == 0x0A || pszSrc[iSrc] == 0x0D) {
					g0 = ascii;
					g2 = undesignated;
				}
				pwszDest[iDest++] = pszSrc[iSrc++];	// SI ASO A(1oCg) SS2 ASS3 ͖
			} else if(cchSrc - iSrc > 1 && cchDest - iDest > 1
					&& memcmp(pszSrc + iSrc, "\x1BN", 2) == 0) {	// SS2
				iSrc += 2;
				if(cchSrc > iSrc) {
					const uchar	chAnsi = pszSrc[iSrc] | 0x80;
					if(g2 == iso8859_1) {	// ISO-8859-1
						if(pIso88591Encoder.get() == 0)
							pIso88591Encoder.reset(encoderFactory.CreateEncoder(CPEX_ISO8859_1));
						const size_t	cchConverted = pIso88591Encoder->ConvertToUnicode(
											pwszDest + iDest, cchDest - iDest,
											reinterpret_cast<const char*>(&chAnsi), 1, pCallBack);
						if(cchConverted == 0)
							return 0;
						++iSrc;
						iDest += cchConverted;
					} else if(g2 == iso8859_7) {	// ISO-8859-7
						if(pIso88597Encoder.get() == 0)
							pIso88597Encoder.reset(encoderFactory.CreateEncoder(CPEX_ISO8859_7));
						const size_t	cchConverted = pIso88597Encoder->ConvertToUnicode(
											pwszDest + iDest, cchDest - iDest,
											reinterpret_cast<const char*>(&chAnsi), 1, pCallBack);
						if(cchConverted == 0)
							return 0;
						++iSrc;
						iDest += cchConverted;
					} else {
						wchar_t	ucs;
						CONFIRM_ILLEGAL_CHAR(ucs);
						pwszDest[iDest++] = ucs;
						++iSrc;
					}
				}
			} else if(g0 == jisx0201_roman) {	// JIS X0201-Roman
				wchar_t	ucs = _JISX0201ToUCS(pszSrc[iSrc]);
				if(ucs == 0)
					CONFIRM_ILLEGAL_CHAR(ucs);
				pwszDest[iDest++] = ucs;
				++iSrc;
/*			} else if(g0 == jisx0201_kana) {	// JIS X0201-Kana
				wchar_t	ucs;
				if(pszSrc[iSrc] >= 0x80) {
					CONFIRM_ILLEGAL_CHAR(ucs);
				} else {
					ucs = _JISX0201ToUCS(pszSrc[iSrc] + 0x80);
					if(ucs == 0)
						CONFIRM_ILLEGAL_CHAR(ucs);
				}
				pwszDest[iDest++] = ucs;
				++iSrc;
*/			} else if(g0 == ascii || cchSrc - iSrc == 1 || cchDest - iDest == 1) {	// ASCII or illegal char
				uchar	jis = pszSrc[iSrc];
				if(jis >= 0x80)
					CONFIRM_ILLEGAL_CHAR(jis);
				pwszDest[iDest++] = jis;
				++iSrc;
			} else if(g0 == jisx0208) {	// JIS X0208-1978 or -1983
				const ushort	jis = (pszSrc[iSrc] << 8) | pszSrc[iSrc + 1];
				wchar_t			ucs = (jis >= 0x2100 && jis < 0x7F00) ? arrJISX0208ToUCS[jis - 0x2100] : 0;

				if(ucs == 0)
					CONFIRM_ILLEGAL_CHAR(ucs);
				pwszDest[iDest++] = ucs;
				iSrc += 2;
			} else if(g0 == jisx0212) {	// JIS X0212-1990
				const ushort	jis = (pszSrc[iSrc] << 8) | pszSrc[iSrc + 1];
				wchar_t			ucs = (jis >= 0x2100 && jis < 0x7F00) ? arrJISX0212ToUCS[jis - 0x2100] : 0;

				if(ucs == 0)
					CONFIRM_ILLEGAL_CHAR(ucs);
				pwszDest[iDest++] = ucs;
				iSrc += 2;
			} else if(g0 == gb2312 || g0 == ksc5601) {	// GB2312-1980 or KSC5601-1987
				wchar_t		ucs;	// for error
				char		sz[2] = {pszSrc[iSrc] | 0x80, pszSrc[iSrc + 1] | 0x80};
				const int	cch = ::MultiByteToWideChar(
									(g0 == gb2312) ? 936 : 949, MB_PRECOMPOSED, sz, 2, pwszDest + iDest, 2);
				if(cch == 0) {
					CONFIRM_ILLEGAL_CHAR(ucs);
					pwszDest[iDest++] = ucs;
				} else
					iDest += cch;
				iSrc += 2;
			}
		}
		return iDest;
	}

	// UTF-16 -> ISO-2022-JP-X ϊwp
	size_t _ConvertUtf16ToIso2022JpX(CodePage cp,
			char* _pszDest, size_t cchDest, const wchar_t* pwszSrc, size_t cchSrc, bool(*pCallBack)(void)) {
		assert(cp == CPEX_JAPANESE_ISO2022JP || cp == CPEX_JAPANESE_ISO2022JP1
			|| cp == CPEX_JAPANESE_ISO2022JP2 || cp == CPEX_JAPANESE_ISO2022JP2004);

		uchar*					pszDest = reinterpret_cast<uchar*>(_pszDest);
		size_t					iSrc = 0, iDest = 0;
		int						charset = ascii;
		Iso2022Jp2Charset_G0	g0 = ascii;
		Iso2022Jp2Charset_G2	g2 = undesignated;

		CEncoderFactory&	encoderFactory = CEncoderFactory::GetInstance();
		auto_ptr<CEncoder>	pIso88591Encoder((cp == CPEX_JAPANESE_ISO2022JP2) ? encoderFactory.CreateEncoder(CPEX_ISO8859_1) : 0);
		auto_ptr<CEncoder>	pIso88597Encoder((cp == CPEX_JAPANESE_ISO2022JP2) ? encoderFactory.CreateEncoder(CPEX_ISO8859_7) : 0);

		while(iSrc < cchSrc && iDest < cchDest) {
			const wchar_t	ucs = pwszSrc[iSrc];
			ushort			jis;
			char			szMultiBytes[2];

			if(ucs < 0x80) {
				jis = ucs;
				szMultiBytes[0] = static_cast<char>(ucs);
				szMultiBytes[1] = 0;
				charset = ascii;
			} else if(jis = _UCSToJISX0201(ucs) && jis < 0x80)
				charset = /*(jis < 0x80) ?*/ jisx0201_roman /*: jisx0201_kana*/;
			else if(jis = arrUCSToJISX0208[ucs])
				charset = jisx0208;
			else if((cp == CPEX_JAPANESE_ISO2022JP1 || cp == CPEX_JAPANESE_ISO2022JP2)
					&& toBoolean(jis = arrUCSToJISX0212[ucs]))
				charset = jisx0212;
			else if(cp == CPEX_JAPANESE_ISO2022JP2
					&& encoderFactory.IsValidCodePage(936)
					&& ::WideCharToMultiByte(936, 0, pwszSrc + iSrc, 1, szMultiBytes, 2, 0, 0) != 0)
				charset = gb2312;
			else if(cp == CPEX_JAPANESE_ISO2022JP2
					&& encoderFactory.IsValidCodePage(949)
					&& ::WideCharToMultiByte(949, 0, pwszSrc + iSrc, 1, szMultiBytes, 2, 0, 0) != 0)
				charset = ksc5601;
			else if(cp == CPEX_JAPANESE_ISO2022JP2
					&& pIso88591Encoder->ConvertFromUnicode(szMultiBytes, 2, pwszSrc + iSrc, 1, 0) != 0)
				charset = iso8859_1;
			else if(cp == CPEX_JAPANESE_ISO2022JP2
					&& pIso88597Encoder->ConvertFromUnicode(szMultiBytes, 2, pwszSrc + iSrc, 1, 0) != 0)
				charset = iso8859_7;
			else if(ucs == 0)	//  NUL
				charset = ascii;
			else {
				CONFIRM_ILLEGAL_CHAR(jis);
				charset = ascii;
			}

#define DESIGNATE_TO_G0(esc_sequence, cch_esc)						\
	if(g0 != charset) {												\
		if(cchDest < cch_esc + 1 || iDest > cchDest - cch_esc - 1)	\
			break;													\
		memcpy(pszDest + iDest, esc_sequence, cch_esc);				\
		iDest += cch_esc;											\
		g0 = static_cast<Iso2022Jp2Charset_G0>(charset);			\
	}
#define DESIGNATE_TO_G2(esc_sequence, cch_esc)						\
	if(g2 != charset) {												\
		if(cchDest < cch_esc + 3 || iDest > cchDest - cch_esc - 3)	\
			break;													\
		memcpy(pszDest + iDest, esc_sequence, cch_esc);				\
		iDest += cch_esc;											\
		g2 = static_cast<Iso2022Jp2Charset_G2>(charset);			\
	}

			if(charset == ascii) {	// ASCII
				DESIGNATE_TO_G0("\x1B(B", 3);
				pszDest[iDest++] = static_cast<uchar>(ucs);
			} else if(charset == jisx0201_roman) {	// JIS X0201-Roman
				DESIGNATE_TO_G0("\x1B(J", 3);
				pszDest[iDest++] = static_cast<uchar>(jis);
//			} else if(charset == jisx0201_kana) {	// JIS X0201-Kana
//				DESIGNATE_TO_G0("\x1B(I", 3);
//				pszDest[iDest++] = static_cast<uchar>(jis & 0x7F);
			} else if(charset == jisx0208) {	// JIS X0208:1997 (1990)
				DESIGNATE_TO_G0("\x1B$B", 3);
				pszDest[iDest++] = static_cast<uchar>(jis >> 8);
				pszDest[iDest++] = static_cast<uchar>(jis);
			} else if(charset == jisx0212) {	// JIS X0212:1990
				DESIGNATE_TO_G0("\x1B$(D", 4);
				pszDest[iDest++] = static_cast<uchar>(jis >> 8);
				pszDest[iDest++] = static_cast<uchar>(jis);
			} else if(charset == gb2312) {	// GB2312:1980
				DESIGNATE_TO_G0("\x1B$A", 3);
				pszDest[iDest++] = static_cast<uchar>(szMultiBytes[0] & 0x7F);
				if(szMultiBytes[1] != 0)
					pszDest[iDest++] = static_cast<uchar>(szMultiBytes[1] & 0x7F);
			} else if(charset == ksc5601) {	// KSC5601:1987
				DESIGNATE_TO_G0("\x1B$(C", 4);
				pszDest[iDest++] = static_cast<uchar>(szMultiBytes[0] & 0x7F);
				if(szMultiBytes[1] != 0)
					pszDest[iDest++] = static_cast<uchar>(szMultiBytes[1] & 0x7F);
			} else if(charset == iso8859_1) {	// ISO-8859-1
				DESIGNATE_TO_G2("\x1B.A", 3);
				if(iDest + 3 >= cchDest)
					break;
				pszDest[iDest++] = 0x1B;	// SS2
				pszDest[iDest++] = 'N';
				pszDest[iDest++] = static_cast<uchar>(szMultiBytes[0]);
			} else if(charset == iso8859_7) {	// ISO-8859-7
				DESIGNATE_TO_G2("\x1B.F", 3);
				if(iDest + 3 >= cchDest)
					break;
				pszDest[iDest++] = 0x1B;	// SS2
				pszDest[iDest++] = 'N';
				pszDest[iDest++] = static_cast<uchar>(szMultiBytes[0]);
			}
			++iSrc;
		}

		// G0  ASCII ɖ߂ďI
		if(g0 != ascii && cchDest > 3 && iDest <= cchDest - 3) {
			memcpy(pszDest + iDest, "\x1B(B", 3);
			iDest += 3;
		}
		return iDest;

#undef DESIGNATE_TO_G0
#undef DESIGNATE_TO_G2
	}

	// ʃwp
	inline size_t IsShiftJisWindows932(const char* psz, size_t cch) {
		for(size_t i = 0; i < cch; ++i) {
			const uchar	ch = static_cast<uchar>(psz[i]);

			if(ch == 0x1B)
				return i;
			else if((ch < 0x80)						// At@xbgApLA󔒁A䕶
					|| (ch >= 0xA1 && ch <= 0xDF))	// JIS X0201 
				continue;
			else if(i < cch - 1) {	// 2oCg?
				const uchar	chTrail = static_cast<uchar>(psz[i + 1]);
				if(((ch >= 0x81 && ch <= 0x9F) || (ch >= 0xE0 && ch <= 0xFC))
						&& (chTrail >= 0x40 && chTrail <= 0xFC) && chTrail != 0x7F) {
					++i;
					continue;
				}
			}
			return i;
		}
		return cch;
	}
	inline size_t IsEucJpWindows51932(const char* psz, size_t cch) {
		for(size_t i = 0; i < cch; ++i) {
			const uchar	ch = static_cast<uchar>(psz[i]);

			if(ch == 0x1B)
				return i;
			else if((ch < 0x80)						// At@xbgApLA󔒁A䕶
					|| (ch >= 0xA1 && ch <= 0xDF))	// JIS X0201 
				continue;
			else if(i < cch - 1) {	// 2oCg?
				const uchar	chTrail = static_cast<uchar>(psz[i + 1]);
				if(((ch >= 0x81 && ch <= 0x9F) || (ch >= 0xE0 && ch <= 0xFE))
						&& (chTrail >= 0xA1 && chTrail <= 0xFE) && chTrail != 0x7F && chTrail != 0x8E) {
					++i;
					continue;
				}
			}
			return i;
		}
		return cch;
	}
	inline size_t IsIso2022Jp(const char* psz, size_t cch, CodePage& cp) {
		bool	bFoundEsc[3] = {false, false, false};	// ISO-2022-JP A-1 A-2
		for(size_t i = 0; i < cch; ++i) {
			const uchar	ch = static_cast<uchar>(psz[i]);

			if(ch >= 0x80)
				return i;
			else if(ch == 0x1B && i + 2 < cch) {
				if(memcmp(psz + i + 1, "(J", 2) == 0
						|| memcmp(psz + i + 1, "$@", 2) == 0 || memcmp(psz + i + 1, "$B", 2) == 0) {
					bFoundEsc[0] = true; i += 2;
				} else if(i + 3 < cch && memcmp(psz + 1, "$(D", 3) == 0) {
					bFoundEsc[1] = true; i += 3;
				} else if(memcmp(psz + i + 1, "$A", 2) == 0
						|| memcmp(psz + i + 1, ".A", 2) == 0 || memcmp(psz + i + 1, ".F", 2) == 0) {
					bFoundEsc[2] = true; i += 2;
				} else if(i + 3 < cch && memcmp(psz + i + 1, "$(C", 3) == 0) {
					bFoundEsc[2] = true; i += 3;
				}
			}
		}
		if(bFoundEsc[2])		return (cp = CPEX_JAPANESE_ISO2022JP2), cch;
		else if(bFoundEsc[1])	return (cp = CPEX_JAPANESE_ISO2022JP1), cch;
		else if(bFoundEsc[0])	return (cp = 50221), cch;
		else					return 0;
	}

	void DetectCodePage_Japanese(const char* psz, size_t cch, CodePage& cpResult, size_t& cchConvertable) {
		// ܂ Unicode 𒲂ׂ
		if(CEncoderFactory::CodePageDetector unicodeDetector = CEncoderFactory::GetInstance().GetUnicodeDetector()) {
			unicodeDetector(psz, cch, cpResult, cchConvertable);
			if(cch == cchConvertable)
				return;
		} else
			cchConvertable = 0;

		size_t	cchConverted;

		cchConverted = IsShiftJisWindows932(psz, cch);
		if(cchConverted > cchConvertable) {
			cpResult = 932;
			cchConvertable = cchConverted;
		}
		if(cchConverted == cch) {
			cchConvertable = cch;
			return;
		}

		cchConverted = IsEucJpWindows51932(psz, cch);
		if(cchConverted > cchConvertable) {
			cpResult = 51932;
			cchConvertable = cchConverted;
		}
		if(cchConverted == cch)
			return;

		CodePage	cpIso2022Jp;
		cchConverted = IsIso2022Jp(psz, cch, cpIso2022Jp);
		if(cchConverted > cchConvertable) {
			cpResult = cpIso2022Jp;
			cchConvertable = cchConverted;
		}
	}
} // namespace `anonymous'


// { (Vtg JIS) ///////////////////////////////////////////////////////////////

CEncoder_Japanese_ShiftJis::CEncoder_Japanese_ShiftJis() {
}

size_t CEncoder_Japanese_ShiftJis::ConvertFromUnicode(CFU_ARGLIST) {
	CFU_CHECKARGS();

	size_t	iSrc = 0, iDest = 0;
	while(iSrc < cchSrc && iDest < cchDest) {
		ushort	jis = (pwszSrc[iSrc] > 0x007F) ? arrUCSToJISX0208[pwszSrc[iSrc]] : pwszSrc[iSrc];
		if(jis == 0)
			CONFIRM_ILLEGAL_CHAR(pszDest[iDest]);
		if(jis < 0x0100)
			pszDest[iDest++] = static_cast<char>(jis);
		else if(iDest + 1 < cchDest) {
			const uchar	jk = (jis - 0x2020) >> 8;		// 
			const uchar	jt = (jis - 0x2020) & 0x00FF;	// _

			assert(jk >= 1 && jk <= 94 && jt >= 1 && jt <= 94);
			pszDest[iDest++] = (jk - 1) / 2 + ((jk <= 62) ? 0x81 : 0xC1);
			if(jk % 2 == 0)	pszDest[iDest++] = jt + 0x9E;
			else			pszDest[iDest++] = jt + ((jt <= 63) ? 0x3F : 0x40);
		}
		++iSrc;
	}
	return iDest;
}

size_t CEncoder_Japanese_ShiftJis::ConvertToUnicode(CTU_ARGLIST) {
	CTU_CHECKARGS();

	size_t	iSrc = 0, iDest = 0;

	while(iSrc < cchSrc && iDest < cchDest) {
		const uchar	nLeadByte = pszSrc[iSrc];
		const uchar	nTrailByte = pszSrc[iSrc + 1];
		if(iSrc < cchSrc - 1
				&& ((nLeadByte >= 0x81 && nLeadByte <= 0x9F) || (nLeadByte >= 0xE0 && nLeadByte <= 0xEF))
				&& (nTrailByte >= 0x40 && nTrailByte <= 0xFC && nTrailByte != 0x7F)) {	// double-byte
			// sjis -> jis
			uchar	jk, jt;
			if(nLeadByte >= 0x81 && nLeadByte <= 0x9F)	// : 01-62
				jk = (nLeadByte - 0x81) * 2 + ((nTrailByte > 0x9E) ? 2 : 1);	// < leadbyte = (jk - 1) / 2 + 0x81
			else	// : 63-94
				jk = (nLeadByte - 0xC1) * 2 + ((nTrailByte > 0x9E) ? 2 : 1);	// < leadbyte = (jk - 1) / 2 + 0xC1
			if(jk % 2 == 0)
				jt = nTrailByte - 0x9E;	// < trailbyte = jt + 0x9E
			else if(nTrailByte <= 0x3F + 63)	// _: 01-63
				jt = nTrailByte - 0x3F;	// < trailbyte = jt + 0x3F
			else	// _: 64-94
				jt = nTrailByte - 0x40;	// < trailbyte = jt + 0x40

			const ushort	jis = ((jk << 8) | jt) + 0x2020;
			ushort			ucs = (jis >= 0x2100 && jis < 0x7F00) ? arrJISX0208ToUCS[jis - 0x2100] : 0;
			if(ucs == 0)
				CONFIRM_ILLEGAL_CHAR(ucs);
			pwszDest[iDest] = ucs;
			iSrc += 2;
		} else if(nLeadByte < 0x80)	// single-byte
			pwszDest[iDest] = pszSrc[iSrc++];
		else if(pCallBack == 0 || (*pCallBack)()) {	// illegal -> default character
			pwszDest[iDest] = pszSrc[iSrc++];
			pCallBack = 0;
		} else	// illegal -> abort
			return 0;
		++iDest;
	}
	return iDest;
}

uchar CEncoder_Japanese_ShiftJis::GetMaxCharacterLength() const {
	return 2;
}


#ifdef IMPLEMENT_JISX0213_BASE_ENCODERS
// { (Shift_JIS-2004) /////////////////////////////////////////////////

CEncoder_Japanese_ShiftJis2004::CEncoder_Japanese_ShiftJis2004() {
}

size_t CEncoder_Japanese_ShiftJis2004::ConvertFromUnicode(CFU_ARGLIST) {
	return 0;
}

size_t CEncoder_Japanese_ShiftJis2004::ConvertToUnicode(CTU_ARGLIST) {
	CTU_CHECKARGS();

	size_t	iSrc = 0, iDest = 0;

	while(iSrc < cchSrc && iDest < cchDest) {
		const uchar	nLeadByte = pszSrc[iSrc];
		const uchar	nTrailByte = pszSrc[iSrc + 1];
		if(iSrc < cchSrc - 1
				&& ((nLeadByte >= 0x81 && nLeadByte <= 0x9F) || (nLeadByte >= 0xE0 && nLeadByte <= 0xFC))
				&& (nTrailByte >= 0x40 && nTrailByte <= 0xFC && nTrailByte != 0x7F)) {	// double-byte
			// sjis -> jis
			uchar		jk, jt;
			const uchar	plane = (nLeadByte < 0xF0) ? 1 : 2;
			const bool	bKuIsEven = nTrailByte > 0x9E;
			if(nLeadByte >= 0x81 && nLeadByte <= 0x9F)
				jk = nLeadByte * 2 - 0x101 + (bKuIsEven ? 1 : 0);
			else if(nLeadByte >= 0xE0 && nLeadByte <= 0xEF)
				jk = nLeadByte * 2 - 0x181 + (bKuIsEven ? 1 : 0);
			else if((nLeadByte == 0xF4 && bKuIsEven) || (nLeadByte >= 0xF5 && nLeadByte <= 0xFC))
				jk = nLeadByte * 2 - 0x19B + (bKuIsEven ? 1 : 0);
			else if((nLeadByte >= 0xF0 && nLeadByte <= 0xF3) || (nLeadByte == 0xF4 && !bKuIsEven)) {
				switch(nLeadByte) {
				case 0xF0:	jk = bKuIsEven ? 8 : 1; break;
				case 0xF1:	jk = bKuIsEven ? 4 : 3; break;
				case 0xF2:	jk = bKuIsEven ? 12 : 5; break;
				case 0xF3:	jk = bKuIsEven ? 14 : 13; break;
				case 0xF4:	jk = 15; break;
				}
			}
			if(jk % 2 == 0)
				jt = nTrailByte - 0x9E;	// < trailbyte = jt + 0x9E
			else if(nTrailByte <= 0x3F + 63)	// _: 01-63
				jt = nTrailByte - 0x3F;	// < trailbyte = jt + 0x3F
			else	// _: 64-94
				jt = nTrailByte - 0x40;	// < trailbyte = jt + 0x40

			const ushort	jis = ((jk << 8) | jt) + 0x2020;
			ulong			ucs;
			
			if(jis >= 0x2100 && jis < 0x7F00)
				ucs = (plane == 1) ? arrJISX0213P1ToUCS[jis - 0x2100] : arrJISX0213P2ToUCS[jis - 0x2100];
			else
				ucs = 0;
			if(ucs == 0)
				CONFIRM_ILLEGAL_CHAR(ucs);
			if(ucs > 0x0010FFFF) {	// 2R[h|Cg
				if(iDest + 1 >= cchDest)
					return iDest;
				pwszDest[iDest] = static_cast<ushort>(ucs >> 16);
				pwszDest[++iDest] = static_cast<ushort>(ucs);
			} else if(ucs >= 0x00010000) {	//  BMP
				if(iDest + 1 >= cchDest)
					return iDest;
				Manah::Text::EncodeCodePointToUTF16SurrogatePair(ucs, pwszDest + iDest);
				++iDest;
			} else
				pwszDest[iDest] = static_cast<wchar_t>(ucs);
			iSrc += 2;
		} else if(nLeadByte < 0x80)	// single-byte
			pwszDest[iDest] = static_cast<wchar_t>(pszSrc[iSrc++]);
		else if(pCallBack == 0 || (*pCallBack)()) {	// illegal -> default character
			pwszDest[iDest] = static_cast<wchar_t>(pszSrc[iSrc++]);
			pCallBack = 0;
		} else	// illegal -> abort
			return 0;
		++iDest;
	}
	return iDest;
}

uchar CEncoder_Japanese_ShiftJis2004::GetMaxCharacterLength() const {
	return 2;
}
#endif /* IMPLEMENT_JISX0213_BASE_ENCODERS */


// { (EUC) ////////////////////////////////////////////////////////////

CEncoder_Japanese_EucJp::CEncoder_Japanese_EucJp() {
}

size_t CEncoder_Japanese_EucJp::ConvertFromUnicode(CFU_ARGLIST) {
	CFU_CHECKARGS();

	size_t	iSrc = 0, iDest = 0;

	while(iSrc < cchSrc && iDest < cchDest) {
		ushort	jis = pwszSrc[iSrc];
		bool	bX0212 = false;

		if(jis >= 0x80) {
			jis = arrUCSToJISX0208[jis];
			if(jis == 0) {
				jis = arrUCSToJISX0212[pwszSrc[iSrc]];
				if(jis != 0)
					bX0212 = true;
			}
		}
		if(jis == 0)
			CONFIRM_ILLEGAL_CHAR(pszDest[iDest]);
		if(jis < 0x100)
			pszDest[iDest++] = static_cast<char>(jis);
		else {
			jis += 0x8080;	// jis -> euc-jp
			if(!bX0212) {	// JIS X0208
				pszDest[iDest++] = static_cast<char>(jis >> 8);
				pszDest[iDest++] = static_cast<char>(jis);
			} else {	// JIS X0212
				pszDest[iDest++] = '\x8F';	// SS3
				pszDest[iDest++] = static_cast<char>(jis >> 8);
				pszDest[iDest++] = static_cast<char>(jis);
			}
		}
		++iSrc;
	}
	return iDest;
}

size_t CEncoder_Japanese_EucJp::ConvertToUnicode(CTU_ARGLIST) {
	CTU_CHECKARGS();

	size_t	iSrc = 0, iDest = 0;

	while(iSrc < cchSrc && iDest < cchDest) {
		const uchar	nFirstByte = pszSrc[iSrc];

		if(nFirstByte == 0x8F) {	// SS3 -> JIS X0212
			if(iSrc + 3 > cchSrc)
				return iDest;
			const ushort	jis = ((pszSrc[iSrc + 1] << 8) | pszSrc[iSrc + 2]) - 0x8080;
			ushort			ucs = arrJISX0212ToUCS[jis - 0x2100];

			if(ucs == 0)
				CONFIRM_ILLEGAL_CHAR(ucs);
			pwszDest[iDest] = ucs;
			iSrc += 3;
		} else if(nFirstByte >= 0x80) {	// JIS X0208
			if(iSrc + 2 > cchSrc)
				return iDest;
			const ushort	jis = ((nFirstByte << 8) | pszSrc[iSrc + 1]) - 0x8080;
			ushort			ucs = arrJISX0208ToUCS[jis - 0x2100];

			if(ucs == 0)
				CONFIRM_ILLEGAL_CHAR(ucs);
			pwszDest[iDest] = ucs;
			iSrc += 2;
		} else
			pwszDest[iDest] = pszSrc[iSrc++];
		++iDest;
	}
	return iDest;
}

uchar CEncoder_Japanese_EucJp::GetMaxCharacterLength() const {
	return 3;
}


#ifdef IMPLEMENT_JISX0213_BASE_ENCODERS
// { (EUC-JIS-2004) /////////////////////////////////////////////////

CEncoder_Japanese_EucJis2004::CEncoder_Japanese_EucJis2004() {
}

size_t CEncoder_Japanese_EucJis2004::ConvertFromUnicode(CFU_ARGLIST) {
	return 0;
}

size_t CEncoder_Japanese_EucJis2004::ConvertToUnicode(CTU_ARGLIST) {
	CTU_CHECKARGS();

	size_t	iSrc = 0, iDest = 0;

	while(iSrc < cchSrc && iDest < cchDest) {
		const uchar	nFirstByte = pszSrc[iSrc];

		if(nFirstByte == 0x8F) {	// SS3 -> plane-2
			if(iSrc + 3 > cchSrc)
				return iDest;
			const ushort	jis = ((pszSrc[iSrc + 1] << 8) | pszSrc[iSrc + 2]) - 0x8080;
			ulong			ucs = arrJISX0213P2ToUCS[jis - 0x2100];

			if(ucs == 0)
				CONFIRM_ILLEGAL_CHAR(ucs);
			if(ucs > 0x0010FFFF) {
				pwszDest[iDest] = static_cast<wchar_t>(ucs >> 8);
				pwszDest[++iDest] = static_cast<wchar_t>(ucs);
			} else if(ucs >= 0x00010000) {
				Manah::Text::EncodeCodePointToUTF16SurrogatePair(ucs, pwszDest + iDest);
				++iDest;
			} else
				pwszDest[iDest] = static_cast<wchar_t>(ucs);
			iSrc += 3;
		} else if(nFirstByte >= 0x80) {	// plane-1
			if(iSrc + 2 > cchSrc)
				return iDest;
			const ushort	jis = ((nFirstByte << 8) | pszSrc[iSrc + 1]) - 0x8080;
			ulong			ucs = arrJISX0213P1ToUCS[jis - 0x2100];

			if(ucs == 0)
				CONFIRM_ILLEGAL_CHAR(ucs);
			if(ucs > 0x0010FFFF) {
				pwszDest[iDest] = static_cast<wchar_t>(ucs >> 8);
				pwszDest[++iDest] = static_cast<wchar_t>(ucs);
			} else if(ucs >= 0x00010000) {
				Manah::Text::EncodeCodePointToUTF16SurrogatePair(ucs, pwszDest + iDest);
				++iDest;
			} else
				pwszDest[iDest] = static_cast<wchar_t>(ucs);
			iSrc += 2;
		} else
			pwszDest[iDest] = pszSrc[iSrc++];
		++iDest;
	}
	return iDest;
}

uchar CEncoder_Japanese_EucJis2004::GetMaxCharacterLength() const {
	return 3;
}
#endif /* IMPLEMENT_JISX0213_BASE_ENCODERS */

// { (ISO-2022-JP) //////////////////////////////////////////////////////////////

CEncoder_Japanese_Iso2022Jp::CEncoder_Japanese_Iso2022Jp() {
}

// cchDest ȂƃGXP[vV[PX܂Ȃ\
size_t CEncoder_Japanese_Iso2022Jp::ConvertFromUnicode(CFU_ARGLIST) {
	CFU_CHECKARGS();
	return _ConvertUtf16ToIso2022JpX(CPEX_JAPANESE_ISO2022JP, pszDest, cchDest, pwszSrc, cchSrc, pCallBack);
}

size_t CEncoder_Japanese_Iso2022Jp::ConvertToUnicode(CTU_ARGLIST) {
	CTU_CHECKARGS();
	return _ConvertIso2022JpXToUtf16(CPEX_JAPANESE_ISO2022JP, pwszDest, cchDest, pszSrc, cchSrc, pCallBack);
}

uchar CEncoder_Japanese_Iso2022Jp::GetMaxCharacterLength() const {
	return 8;
}


// { (ISO-2022-JP-1) ///////////////////////////////////////////////////

CEncoder_Japanese_Iso2022Jp1::CEncoder_Japanese_Iso2022Jp1() {
}

size_t CEncoder_Japanese_Iso2022Jp1::ConvertFromUnicode(CFU_ARGLIST) {
	CFU_CHECKARGS();
	return _ConvertUtf16ToIso2022JpX(CPEX_JAPANESE_ISO2022JP1, pszDest, cchDest, pwszSrc, cchSrc, pCallBack);
}

size_t CEncoder_Japanese_Iso2022Jp1::ConvertToUnicode(CTU_ARGLIST) {
	CTU_CHECKARGS();
	return _ConvertIso2022JpXToUtf16(CPEX_JAPANESE_ISO2022JP1, pwszDest, cchDest, pszSrc, cchSrc, pCallBack);
}

uchar CEncoder_Japanese_Iso2022Jp1::GetMaxCharacterLength() const {
	return 9;
}


// { (ISO-2022-JP-2) ///////////////////////////////////////////////////

CEncoder_Japanese_Iso2022Jp2::CEncoder_Japanese_Iso2022Jp2() {
}

size_t CEncoder_Japanese_Iso2022Jp2::ConvertFromUnicode(CFU_ARGLIST) {
	CFU_CHECKARGS();
	return _ConvertUtf16ToIso2022JpX(CPEX_JAPANESE_ISO2022JP2, pszDest, cchDest, pwszSrc, cchSrc, pCallBack);
}

size_t CEncoder_Japanese_Iso2022Jp2::ConvertToUnicode(CTU_ARGLIST) {
	CTU_CHECKARGS();
	return _ConvertIso2022JpXToUtf16(CPEX_JAPANESE_ISO2022JP2, pwszDest, cchDest, pszSrc, cchSrc, pCallBack);
}

uchar CEncoder_Japanese_Iso2022Jp2::GetMaxCharacterLength() const {
	return 9;
}


#ifdef IMPLEMENT_JISX0213_BASE_ENCODERS
// { (ISO-2022-JP-2004) ////////////////////////////////////////////////

CEncoder_Japanese_Iso2022Jp2004::CEncoder_Japanese_Iso2022Jp2004() {
}

size_t CEncoder_Japanese_Iso2022Jp2004::ConvertFromUnicode(CFU_ARGLIST) {
	return 0;
}

size_t CEncoder_Japanese_Iso2022Jp2004::ConvertToUnicode(CTU_ARGLIST) {
	return 0;
}

uchar CEncoder_Japanese_Iso2022Jp2004::GetMaxCharacterLength() const {
	return 3;
}
#endif /* IMPLEMENT_JISX0213_BASE_ENCODERS */

// { (EUC, windows-51932) //////////////////////////////////////////////

CEncoder_Japanese_EucJpWindows::CEncoder_Japanese_EucJpWindows() {
	if(!::IsValidCodePage(932))
		throw exception("This code page is not unsupported.");
}

size_t CEncoder_Japanese_EucJpWindows::ConvertFromUnicode(CFU_ARGLIST) {
	CFU_CHECKARGS();

	BOOL	bUsedDefaultChar;
	size_t	iSrc = 0, iDest = 0;
	uchar	szSJis[2];
	while(iSrc < cchSrc && iDest < cchDest) {
		// UTF-16 -> cp932
		const int	cConvertedBytes = ::WideCharToMultiByte(932, 0,
			pwszSrc + iSrc, 1, reinterpret_cast<char*>(szSJis), 2, 0, &bUsedDefaultChar);
		if(bUsedDefaultChar) {
			if(pCallBack != 0 && (*pCallBack)())
				return 0;
			pCallBack = 0;
		}

		// cp932 -> cp51932
		if(cConvertedBytes == 1 && szSJis[0] >= 0xA1 && szSJis[0] <= 0xDF) {	// pJi
			pszDest[iDest + 0] = static_cast<uchar>(0x8E);
			pszDest[iDest + 1] = szSJis[0];
			iDest += 2;
		} else if(cConvertedBytes == 2
				&& ((szSJis[0] >= 0x81 && szSJis[0] <= 0x9F) || (szSJis[0] >= 0xE0 && szSJis[0] <= 0xFC))
				&& (szSJis[1] >= 0x40 && szSJis[1] <= 0xFC) && szSJis[1] != 0x7F) {	// 2oCg
			if(const uint n = _mbcjmstojis(szSJis[0] << 8 | szSJis[1])) {
				pszDest[iDest + 0] = (n | 0x8000) >> 8;
				pszDest[iDest + 1] = (n | 0x0080) >> 0;
				iDest += 2;
			} else {
				pszDest[iDest + 0] = szSJis[0];
				pszDest[iDest + 1] = szSJis[1];
				iDest += 2;
			}
		} else	// ̑
			pszDest[iDest++] = szSJis[0];
		++iSrc;
	}
	return iDest;
}

size_t CEncoder_Japanese_EucJpWindows::ConvertToUnicode(CTU_ARGLIST) {
	CTU_CHECKARGS();

	size_t	iSrc = 0, iDest = 0;
	uchar	szSJis[2];
	while(iSrc < cchSrc && iDest < cchDest) {
		const uchar*	pszSrc_ = reinterpret_cast<const uchar*>(pszSrc + iSrc);
		// cp51932 -> cp932
		if(pszSrc_[0] == 0x8E) {	// pJi
			if(iSrc + 1 >= cchSrc)
				break;
			szSJis[0] = pszSrc_[1];
			szSJis[1] = 0;
			iSrc += 2;
		} else if(pszSrc_[0] >= 0xA1 && pszSrc_[0] <= 0xFE
				&& pszSrc_[1] >= 0xA1 && pszSrc_[1] <= 0xFE) {	// 2oCg
			uint	n = (pszSrc_[0] << 8) | pszSrc_[1];
			if(n = _mbcjistojms(n & 0x7F7F)) {
				szSJis[0] = n >> 8;
				szSJis[1] = n >> 0;
				iSrc += 2;
			} else {
				szSJis[0] = pszSrc[iSrc++];
				szSJis[1] = 0;
			}
		} else {
			szSJis[0] = pszSrc[iSrc++];
			szSJis[1] = 0;
		}

		// cp932 -> UTF-16
		::MultiByteToWideChar(932, 0, reinterpret_cast<char*>(szSJis), (szSJis[1] == 0) ? 1 : 2, pwszDest + iDest, 1);
		++iDest;
	}
	return iDest;
}

uchar CEncoder_Japanese_EucJpWindows::GetMaxCharacterLength() const {
	return 2;
}

/* [EOF] */