#define STRICT
#include <locale.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <wchar.h>
#include "euc_jp_to_utf16_table.h"
#define DUMMY_CODE (0xff1f)
#ifndef min
#define min(a,b) ((a)<(b)?(a):(b))
#endif
#define DEFAULT_BUFF_LEN 2048
int EucJpToUtf16(wchar_t *dest, size_t dest_size, char *src, size_t src_size);
int main(int argc, char *argv[])
{
char inpStr[DEFAULT_BUFF_LEN + 1];
wchar_t outStr[DEFAULT_BUFF_LEN + 1];
FILE *pInpFile;
FILE *pOutFile;
long sizeInpFile;
size_t countRead;
size_t sizeRead;
int iResult = 0;
wchar_t bom;
_wsetlocale(LC_ALL, L"japanese");
memset(inpStr, 0, sizeof(inpStr));
pInpFile = fopen("data.txt", "rb");
if (pInpFile == NULL)
{
fwprintf(stderr, L"ファイルが開けません: data.txt");
return 1;
}
fseek(pInpFile, 0, SEEK_END);
sizeInpFile = ftell(pInpFile);
fseek(pInpFile, 0, SEEK_SET);
countRead = fread(inpStr, min(sizeInpFile, DEFAULT_BUFF_LEN), 1, pInpFile);
fclose(pInpFile);
if (!countRead)
{
fwprintf(stderr, L"ファイル読み込み失敗: data.txt");
return 1;
}
sizeRead = min(sizeInpFile, DEFAULT_BUFF_LEN) * countRead;
memset(outStr, 0, sizeof(outStr));
iResult = EucJpToUtf16(outStr, DEFAULT_BUFF_LEN, inpStr, sizeRead);
if (iResult == (-1))
{
fprintf(stderr, "EucJpToUtf16() Failed.\n");
return 1;
}
wprintf(L"%d 文字変換しました。\n", iResult);
pOutFile = fopen("out.txt", "wb");
if (pOutFile == NULL)
{
fwprintf(stderr, L"ファイルが開けません: out.txt");
return 1;
}
bom = 0xfeff;
fwrite((char *)&bom, sizeof(wchar_t), 1, pOutFile);
fwrite(outStr, iResult * sizeof(wchar_t), 1, pOutFile);
fclose(pOutFile);
return 0;
}
int EucJpToUtf16(wchar_t *dest, size_t dest_size, char *src, size_t src_size)
{
const int nMaxReadSize = 3;
int countNeedsWords = 0;
int cursor = 0;
int nReadDataSize = 0;
unsigned char chBuffer[3];
unsigned char ch1 = 0;
unsigned char ch2 = 0;
unsigned char ch3 = 0;
unsigned long euc_jp_code = 0;
unsigned long firstIndex = 0;
unsigned long secondIndex = 0;
unsigned long thirdIndex = 0;
int sizeBytes = 0;
unsigned long unicode = 0;
if (dest_size)
{
if (dest == NULL)
{
return (-1);
}
if (dest_size < 0)
{
return (-1);
}
}
if (src == NULL)
{
return (-1);
}
if (src_size < 1)
{
return (-1);
}
countNeedsWords = 0;
for (cursor = 0; cursor < src_size;)
{
nReadDataSize = (nMaxReadSize < (src_size - cursor))?(nMaxReadSize):(src_size - cursor);
memcpy(chBuffer, (src + cursor), nReadDataSize);
memset(chBuffer + nReadDataSize, 0, sizeof(chBuffer) - nReadDataSize);
ch1 = *chBuffer;
ch2 = *(chBuffer + 1);
ch3 = *(chBuffer + 2);
if (ch1 <= 0x7f)
{
sizeBytes = 1;
}
else if (0x8f != ch1)
{
euc_jp_code = ch1;
euc_jp_code <<= 8;
euc_jp_code |= ch2;
if (
(0x8ea1 <= euc_jp_code && euc_jp_code <= 0x8edf)
||
(0xa1a1 <= euc_jp_code && euc_jp_code <= 0xf4a6)
)
{
sizeBytes = 2;
}
else
{
return (-1);
}
}
else
{
euc_jp_code = ch1;
euc_jp_code <<= 8;
euc_jp_code |= ch2;
euc_jp_code <<= 8;
euc_jp_code |= ch3;
if (0x8fa2af <= euc_jp_code && euc_jp_code <= 0x8fede3)
{
sizeBytes = 3;
}
else
{
return (-1);
}
}
if (dest_size && (dest_size < (countNeedsWords + 1)))
{
return countNeedsWords;
}
if (dest_size)
{
unicode = DUMMY_CODE;
switch (sizeBytes)
{
case 1:
euc_jp_code = ch1;
firstIndex = ch1;
if (euc_jp_to_utf16_table[firstIndex].byType != 3)
{
break;
}
unicode = euc_jp_to_utf16_table[firstIndex].dwUtf16Code;
break;
case 2:
euc_jp_code = ch1;
euc_jp_code <<= 8;
euc_jp_code |= ch2;
firstIndex = ch1;
if (euc_jp_to_utf16_table[firstIndex].byType != 2)
{
break;
}
secondIndex = euc_jp_to_utf16_table[firstIndex].dwBitmapIndex + ch2;
if (euc_jp_to_utf16_table[secondIndex].byType != 3)
{
break;
}
unicode = euc_jp_to_utf16_table[secondIndex].dwUtf16Code;
break;
case 3:
euc_jp_code = ch1;
euc_jp_code <<= 8;
euc_jp_code |= ch2;
euc_jp_code <<= 8;
euc_jp_code |= ch3;
firstIndex = ch1;
if (euc_jp_to_utf16_table[firstIndex].byType != 2)
{
break;
}
secondIndex = euc_jp_to_utf16_table[firstIndex].dwBitmapIndex + ch2;
if (euc_jp_to_utf16_table[secondIndex].byType != 2)
{
break;
}
thirdIndex = euc_jp_to_utf16_table[secondIndex].dwBitmapIndex + ch3;
if (euc_jp_to_utf16_table[thirdIndex].byType != 3)
{
break;
}
unicode = euc_jp_to_utf16_table[thirdIndex].dwUtf16Code;
break;
}
*dest = unicode;
dest++;
}
countNeedsWords++;
cursor += sizeBytes;
}
return countNeedsWords;
} |