// Encoder.h
// (c) 2004 exeal

#ifndef _ENCODER_H_
#define _ENCODER_H_

#include "../AscensionCommon.h"
#include <cassert>
#include <set>
#include <map>


namespace Ascension {
namespace Encodings {

///	Windows R[hy[W
typedef uint CodePage;

//	Windows R[hy[WɖA͎Ŏ镶R[h
const CodePage
	CPEX_UNICODE_UTF16LE		= 1200,		///< UTF-16
	CPEX_UNICODE_UTF16BE		= 1201,		///< UTF-16 big endian
	CPEX_UNICODE_UTF32LE		= 12000,	///< UTF-32
	CPEX_UNICODE_UTF32BE		= 12001,	///< UTF-32 big endian
	CPEX_AUTODETECT				= 50001,	///< 
	CPEX_JAPANESE_AUTODETECT	= 50932,	///< { (I)
	CPEX_KOREAN_AUTODETECT		= 50949,	///< ؍ (I)
	CPEX_AUTODETECT_SYSTEMLANG	= 70000,	///< I (VXě)
	CPEX_AUTODETECT_USERLANG	= 70001,	///< I ([Ǔ)
	CPEX_UNICODE_AUTODETECT		= 70010,	///< Unicode ()
	CPEX_UNICODE_UTF5			= 70011,	///< UTF-5
	CPEX_ARMENIAN_AUTODETECT	= 70020,	///< AjA (I)
	CPEX_ARMENIAN_ARMSCII7		= 70021,	///< AjA (ARMSCII-7)
	CPEX_ARMENIAN_ARMSCII8		= 70022,	///< AjA (ARMSCII-8)
	CPEX_ARMENIAN_ARMSCII8A		= 70023,	///< AjA (ARMSCII-8A)
	CPEX_VIETNAMESE_AUTODETECT	= 70030,	///< xgi (I)
	CPEX_VIETNAMESE_TCVN		= 70031,	///< xgi (TCVN)
	CPEX_VIETNAMESE_VISCII		= 70032,	///< xgi (VISCII)
	CPEX_VIETNAMESE_VPS			= 70033,	///< xgi (VPS)
	CPEX_JAPANESE_ISO2022JP		= 70040,	///< { (ISO-2022-JP)
	CPEX_JAPANESE_SHIFTJIS		= 70041,	///< { (Vtg JIS)
	CPEX_JAPANESE_ISO2022JP1	= 70042,	///< { (ISO-2022-JP-1)
	CPEX_JAPANESE_ISO2022JP2	= 70043,	///< { (ISO-2022-JP-2)
	CPEX_JAPANESE_EUC			= 70044,	///< { (EUC)
	CPEX_JAPANESE_ISO2022JP2004	= 70045,	///< { (ISO-2022-JP-2004)
	CPEX_JAPANESE_SHIFTJIS2004	= 70046,	///< { (Shift_JIS-2004)
	CPEX_JAPANESE_EUCJIS2004	= 70047,	///< { (EUC-JIS-2004)
	CPEX_MULTILINGUAL_ISO2022	= 70050,	///< }`K (ISO-2022)
	CPEX_UNCATEGORIZED_BINARY	= 70060,	///< oCi
	CPEX_THAI_TIS620			= 70070,	///< ^C (TIS 620-2533:1990)
	CPEX_LAO_MULELAO			= 70080,	///< I (MuleLao)
	CPEX_LAO_CP1133				= 70081,	///< I (ibm-1133)
	CPEX_ISO8859_1	= 28591,	///< [bp (ISO-8859-1)
	CPEX_ISO8859_2	= 28592,	///< [bp (ISO-8859-2)
	CPEX_ISO8859_3	= 28593,	///< 새[bp (ISO-8859-3)
	CPEX_ISO8859_4	= 28594,	///< og (ISO-8859-4)
	CPEX_ISO8859_5	= 28595,	///< L (ISO-8859-5)
	CPEX_ISO8859_6	= 28596,	///< ArA (ISO-8859-6)
	CPEX_ISO8859_7	= 28597,	///< MV (ISO-8859-7)
	CPEX_ISO8859_8	= 28598,	///< wuC (ISO-8859-8)
	CPEX_ISO8859_9	= 28599,	///< gR (ISO-8859-9)
	CPEX_ISO8859_10	= 28600,	///< k (ISO-8859-10)
	CPEX_ISO8859_11	= 28601,	///< ^C (ISO-8859-11)
	CPEX_ISO8859_13	= 28603,	///< og (ISO-8859-13)
	CPEX_ISO8859_14	= 28604,	///< Pg (ISO-8859-14)
	CPEX_ISO8859_15	= 28605,	///< [bp (ISO-8859-15)
	CPEX_ISO8859_16	= 28606;	///< [bp (ISO-8859-16)


// oCgI[_[}[N
///	UTF-16  BOM
const uchar	szBom_Utf16LE[] = "\xFF\xFE";
///	UTF-16 big endian  BOM
const uchar	szBom_Utf16BE[] = "\xFE\xFF";
///	UTF-32  BOM
const uchar	szBom_Utf32LE[] = "\xFF\xFF\x00\x00";
///	UTF-32 big endian  BOM
const uchar	szBom_Utf32BE[] = "\xFE\xFF\x00\x00";
///	UTF-8  BOM
const uchar	szBom_Utf8[] = "\xEF\xBB\xBF";


// CEncoder class definition
/////////////////////////////////////////////////////////////////////////////

// ϊłȂꍇ̊̕ ([Ǔꂩ擾قȂ)
#define __DEFAULT_CHAR	'?'

// ZbgɃ}bvȂ
#define __REPLACEMENT_CHARACTER	0xFFFD
#define __RPCH					__REPLACEMENT_CHARACTER

#define CFU_ARGLIST											\
	char* pszDest, std::size_t cchDest,						\
	const wchar_t* pwszSrc, std::size_t cchSrc /* = -1 */,	\
	bool(*pCallBack)(void) /* = 0 */

#define CTU_ARGLIST											\
	wchar_t* pwszDest, std::size_t cchDest,					\
	const char* pszSrc_, std::size_t cchSrc /* = -1 */,		\
	bool(*pCallBack)(void) /* = 0 */

#define CFU_CHECKARGS()						\
	assert(pszDest != 0 && pwszSrc != 0);	\
	if(cchSrc == -1)						\
		cchSrc = wcslen(pwszSrc);

#define CTU_CHECKARGS()						\
	assert(pwszDest != 0 && pszSrc_ != 0);	\
	if(cchSrc == -1)						\
		cchSrc = strlen(pszSrc_);			\
	const uchar* const	pszSrc = reinterpret_cast<const uchar* const>(pszSrc_);

///	GR[_
class CEncoder {
	// RXgN^
protected:
	CEncoder() {}
public:
	virtual ~CEncoder() {}

	NOCOPY(CEncoder);

	// \bh
public:
	/**
	 *	UTF-16 ϊ
	 *	@param pszDest		[out] ϊ
	 *	@param cchDest		ϊ̒
	 *	@param pwszSrc		ϊ
	 *	@param cchSrc		ϊ̕
	 *	@param pCallBack	ϊłȂƂɌĂяoR[obN֐B
	 *						Ăяô͏̂݁Bnull Ɗ̕ɒuB
	 *						R[obN֐ true ԂƕϊłȂ̕ɁA
	 *						false ԂƂ̃\bh͂ɏԂ (0Ԃ)
	 *	@return				ϊ̕
	 */
	virtual std::size_t ConvertFromUnicode(
							char* pszDest, std::size_t cchDest,
							const wchar_t* pwszSrc, std::size_t cchSrc = -1,
							bool(*pCallBack)(void) = 0) = 0;
	/**
	 *	UTF-16 ɕϊ
	 *	@param pwszDest		[out] ϊ
	 *	@param cchDest		ϊ̕
	 *	@param pszSrc		ϊ
	 *	@param cchSrc		ϊ̕
	 *	@param pCallBack	ϊłȂƂɌĂяoR[obN֐B
	 *						CEncoder::ConvertFromUnicode \bhƓ
	 *	@return				ϊ̕
	 */
	virtual std::size_t ConvertToUnicode(
							wchar_t* pwszDest, std::size_t cchDest,
							const char* pszSrc, std::size_t cchSrc = -1,
							bool(*pCallBack)(void) = 0) = 0;
	///	1̍őoCgԂ
	virtual uchar GetMaxCharacterLength() const = 0;
};


/// GR[_̃t@Ng
class CEncoderFactory {
	// f[^^
public:
	typedef CEncoder*(*EncoderProducer)();
	typedef void(*CodePageDetector)(const char*, std::size_t, CodePage&, std::size_t&);

	// \bh
public:
	CEncoder*				CreateEncoder(CodePage cp);
	CodePage				DetectCodePage(const char* psz, std::size_t cch, CodePage cp);
	void					EnumCodePages(std::set<CodePage>& codePages) const;
	static CEncoderFactory&	GetInstance();
	CodePageDetector		GetUnicodeDetector() const;
	bool					IsCodePageForAutoDetection(CodePage cp) const;
	bool					IsValidCodePage(CodePage cp) const;
	bool					RegisterDetector(CodePage cp, CodePageDetector factoryMethod);
	bool					RegisterEncoder(CodePage cp, EncoderProducer factoryMethod);

	// f[^o
private:
	typedef std::map<CodePage, EncoderProducer>		EncoderMap;
	typedef std::map<CodePage, CodePageDetector>	DetectorMap;
	EncoderMap	m_registeredEncoders;
	DetectorMap	m_registeredDetectors;
};


#define DEFINE_ENCODER_CLASS(cp, name)														\
	namespace {																				\
		class CEncoder_##name : public CEncoder {											\
		private:																			\
			CEncoder_##name();																\
		public:																				\
			std::size_t	ConvertFromUnicode(CFU_ARGLIST);									\
			std::size_t	ConvertToUnicode(CTU_ARGLIST);										\
			uchar		GetMaxCharacterLength() const;										\
			static CEncoder*	Create() {return new CEncoder_##name;}						\
		};																					\
		const bool b##name =																\
			CEncoderFactory::GetInstance().RegisterEncoder(cp, &CEncoder_##name::Create);	\
	}

#define DEFINE_DETECTOR(cp, name)														\
	namespace {																			\
		void DetectCodePage_##name(const char* psz,										\
			std::size_t cch, CodePage& cpResult, std::size_t& cchConvertable);			\
		const bool b##name =															\
			CEncoderFactory::GetInstance().RegisterDetector(cp, &DetectCodePage_##name);\
	}


// Windows ϊe[û܂܎gpGR[_
class CWindowsEncoder : public CEncoder {
private:
	CWindowsEncoder(CodePage cp) throw(std::invalid_argument) {
		if(!toBoolean(::IsValidCodePage(cp)))
			throw std::invalid_argument("Specified code page is not supported.");
		m_nCodePage = cp;
	}
public:
	std::size_t ConvertFromUnicode(CFU_ARGLIST) {
		const int	result = ::WideCharToMultiByte(m_nCodePage, 0, pwszSrc, cchSrc, pszDest, cchDest, 0, 0);
		if(result == 0) {
			if(pCallBack == 0 || (*pCallBack)())
				return ::WideCharToMultiByte(m_nCodePage, WC_DEFAULTCHAR, pwszSrc, cchSrc, pszDest, cchDest, 0, 0);
			else
				return 0;
		}
		return result;
	}
	std::size_t ConvertToUnicode(CTU_ARGLIST) {
		const int	result = ::MultiByteToWideChar(m_nCodePage, MB_ERR_INVALID_CHARS, pszSrc_, cchSrc, pwszDest, cchDest);
		if(result == 0)
			return (pCallBack == 0 || (*pCallBack)()) ?
				::MultiByteToWideChar(m_nCodePage, 0, pszSrc_, cchSrc, pwszDest, cchDest) : 0;
		return result;
	}
	uchar GetMaxCharacterLength() const {
		CPINFO	cpi;
		if(!::GetCPInfo(m_nCodePage, &cpi))
			return 0;
		return cpi.MaxCharSize;
	}
	friend class CEncoderFactory;
protected:
	CodePage	m_nCodePage;
};



/// B̃CX^XԂ
inline CEncoderFactory& CEncoderFactory::GetInstance() {
	static CEncoderFactory	instance;
	return instance;
}

/// Unicode ̎ʊԂBo^ĂȂ null
inline CEncoderFactory::CodePageDetector CEncoderFactory::GetUnicodeDetector() const {
	DetectorMap::const_iterator	it = m_registeredDetectors.find(CPEX_UNICODE_AUTODETECT);
	return (it != m_registeredDetectors.end()) ? it->second : 0;
}

/// ʂ̂߂̃R[hy[W
inline bool CEncoderFactory::IsCodePageForAutoDetection(CodePage cp) const {
	return m_registeredDetectors.find(cp) != m_registeredDetectors.end();
}

/// LȃR[hy[W
inline bool CEncoderFactory::IsValidCodePage(CodePage cp) const {
	return toBoolean(::IsValidCodePage(cp))
		|| IsCodePageForAutoDetection(cp)
		|| m_registeredEncoders.find(cp) != m_registeredEncoders.end();
}

/**
 *	ʊ̓o^
 *	@param cp				R[hy[W
 *	@param factoryMethod	ʂs֐
 *	@return					
 */
inline bool CEncoderFactory::RegisterDetector(CodePage cp, CodePageDetector factoryMethod) {
	assert(factoryMethod != 0);
	return m_registeredDetectors.insert(std::make_pair(cp, factoryMethod)).second;	// VC extended return
}

/**
 *	GR[_̓o^
 *	@param cp				R[hy[W
 *	@param factoryMethod	GR[_쐬֐
 *	@return					
 */
inline bool CEncoderFactory::RegisterEncoder(CodePage cp, EncoderProducer factoryMethod) {
	assert(factoryMethod != 0);
	return m_registeredEncoders.insert(std::make_pair(cp, factoryMethod)).second;	// VC extended return
}

} // namespace Encodings
} // namespace Ascension

#endif /* _ENCODER_H_ */

/* [EOF] */