UTF-16 to CP932 example

[PR] 歯周病
TOP >>UTF-16 to CP932 example
更新日2007年01月30日
UTF-16 to CP932 example
UTF-16 to CP932 への文字コード変換のサンプル実装。
MinGW + gcc 環境において動作確認してあります。
Example Code
以下の例は、Utf16ToCp932()の実装の例です。 [utf16_to_cp932.zip]
/*
 * file:Utf16ToCp932.c
 */
/** @file
 * @brief UTF-16 → CP932 への変換実装サンプルプログラム。
 * 
 * UTF-16 → CP932 への変換実装サンプルプログラム。
 * UTF-16にて用意された"data.txt"ファイルを読み込み、
 * CP932へと変換、
 * "out.txt"ファイルへと出力します。
 * 変換前の文字列にBOMが見つかれば、除去します。
 */
#define STRICT
#include <stdio.h>
#include "utf16_to_cp932_table.c"
#ifndef FALSE
#define FALSE 0
#endif
#ifndef TRUE
#define TRUE 1
#endif
#ifndef min
#define min(a,b) ((a)<(b)?(a):(b))
#endif
#define DEFAULT_BUFF_LEN 512

int Utf16ToCp932(char *dest, size_t dest_size, wchar_t *src, size_t src_size);

int main(int argc, char *argv[])
{
    wchar_t     instr[DEFAULT_BUFF_LEN];
    char        outstr[DEFAULT_BUFF_LEN];
    FILE        *pFile;
    long        sizeFile = 0;
    size_t      countRead;
    int         instr_size = 0;
    int         iResult = 0;
    char        *pOutStr = NULL;
    wchar_t     *pInStr = NULL;
    
    /*
     * ファイルより文字列を読み込み
     */
    memset(instr, 0, sizeof(instr));
    pFile = fopen("data.txt", "rb");
    if (pFile == NULL)
    {
        perror("ファイルがひらけません: data.txt");
        return 1;
    }
    fseek(pFile, 0, SEEK_END);
    sizeFile = ftell(pFile);
    fseek(pFile, 0, SEEK_SET);
    countRead = fread(instr, min(sizeFile, DEFAULT_BUFF_LEN), 1, pFile);
    fclose(pFile);
    if (countRead == 0)
    {
        perror("ファイル読み込み失敗: data.txt");
        return 1;
    }
    
    instr_size = min(sizeFile, DEFAULT_BUFF_LEN) * countRead / sizeof(wchar_t);
    
    pInStr = instr;
    /*
     * BOMの除去
     */
    if (*pInStr == 0xfeff)
    {
        pInStr++;
        instr_size--;
    }
    
    /*
     * utf16:instr -> cp932:outstr 変換。
     */
    memset(outstr, 0, sizeof(outstr));
    iResult = Utf16ToCp932(outstr, sizeof(outstr), pInStr, instr_size);
    if (iResult == FALSE)
    {
        perror("Utf16ToCp932() Failed.\n");
        return 1;
    }
    printf("%d バイト変換しました。\n", iResult);
    
    pOutStr = outstr;
    
    /*
     * ファイルに文字列を書き込み。
     */
    pFile = fopen("out.txt", "wb");
    if (pFile == NULL)
    {
        perror("ファイルが開けません: out.txt");
        return 1;
    }
    fwrite(pOutStr, iResult, 1, pFile);
    fclose(pFile);
    
    return 0;
}

/**
 * 文字コードをUTF-16よりCP932へと変換。
 * 
 * @param[out] dest 出力文字列CP932
 * @param[in]  dest_size destのバイト数
 * @param[in]  src 入力文字列UTF-16
 * @param[in]  src_size 入力文字列の文字数
 * 
 * @return 成功時には出力文字列のバイト数を戻します。
 *         dest_size に0を指定し、こちらの関数を呼び出すと、変換された
 *         文字列を格納するのに必要なdestのバイト数を戻します。
 *         関数が失敗した場合には、FALSEを戻します。
 */
int Utf16ToCp932(char *dest, size_t dest_size, wchar_t *src, size_t src_size)
{
    long            countNeedsBytes;
    long            cursor;
    wchar_t         unicode;
    unsigned int    cp932code;
    long            sizeBytes;
    const char      dummy_code = 0x3f;
    
    /*
     * 入力パラメータをチェック
     */
    if (dest_size == 0)
    {
        /*
         * dest_size == 0
         */
    }
    else
    {
        /*
         * dest_size != 0
         */
        if (dest == NULL)
        {
            /* Error : dest is NULL. */
            return FALSE;
        }
        if (dest_size < 0)
        {
            /* Error : dest_size < 0. */
            return FALSE;
        }
    }
    if (src == NULL)
    {
        /* Error : src is NULL. */
        return FALSE;
    }
    if (src_size < 0)
    {
        /* Error : src_size < 0. */
        return FALSE;
    }
    
    countNeedsBytes = 0;
    for (cursor = 0; cursor < src_size; cursor++)
    {
        /* srcより1ワードのデータを読み出し */
        unicode = *(src + cursor);
        cp932code = utf16_to_cp932_table[unicode];
        if (cp932code <= 0x00ff)
        {
            sizeBytes = 1;
        }
        else
        {
            sizeBytes = 2;
        }
        
        /*
         * dest_size をチェック
         */
        if (dest_size && (dest_size < (countNeedsBytes + sizeBytes)))
        {
            /* Error : memory is not enough for dest */
            return countNeedsBytes;
        }
        
        if (dest_size)
        {
            if (cp932code == 0x0000)
            {
                /* 0x0000 */
                if (unicode == 0x0000)
                {
                    *(dest) = (char)0x00;
                    dest++;
                }
                else
                {
                    *(dest) = (char)dummy_code;
                    dest++;
                }
            }
            else if (sizeBytes == 1)
            {
                /* sizeBytes == 1 */
                *(dest) = (char)((cp932code) & 0x00ff);
                dest++;
            }
            else
            {
                /* sizeBytes == 2 */
                *(dest) = (char)((cp932code >> 8) & 0x00ff);
                dest++;
                *(dest) = (char)((cp932code) & 0x00ff);
                dest++;
            }
        }
        countNeedsBytes += sizeBytes;
    }
    
    return countNeedsBytes;
}