Line data Source code
1 : /***************************************************************************//**
2 :
3 : @file utf8test.c
4 :
5 : @author Stephen Brennan
6 :
7 : @date Created Wednesday, 9 July 2014
8 :
9 : @brief Tests the utf8 functions.
10 :
11 : @copyright Copyright (c) 2013-2015, Stephen Brennan. Released under the
12 : Revised BSD License. See the LICENSE.txt file for details.
13 :
14 : *******************************************************************************/
15 :
16 : #include <stdio.h>
17 : #include <wchar.h>
18 :
19 : #include "libstephen/util.h"
20 : #include "libstephen/ut.h"
21 : #include "tests.h"
22 :
23 1 : int utf8_test_predetermined(void)
24 : {
25 : #define BFSZ 5
26 : wchar_t result[BFSZ];
27 :
28 : // Code point U+1F602 -- Face with tears of joy
29 : // Code point in binary: 0001 1111 0110 0000 0010
30 : // 17 bits, so 4 bytes in UTF-8
31 : // UTF-8 template: 1111 0xxx 10xx xxxx 10xx xxxx 10xx xxxx
32 : // UTF-8 binary: 1111 0000 1001 1111 1001 1000 1000 0010
33 : // UTF-8 hex: F 0 9 F 9 8 8 2
34 1 : char *face = "\xF0\x9F\x98\x82";
35 1 : utf8toucs4(result, face, BFSZ);
36 1 : TEST_ASSERT(wcscmp(result, L"\U0001F602") == 0);
37 :
38 : // Code point U+4EBA -- Chinese character ren ('man')
39 : // Code point in binary: 0100 1110 1011 1010
40 : // 15 bits, so 3 bytes in UTF-8
41 : // UTF-8 template: 1110 xxxx 10xx xxxx 10xx xxxx
42 : // UTF-8 binary: 1110 0100 1011 1010 1011 1010
43 : // UTF-8 hex: E 4 B A B A
44 1 : char *ren = "\xE4\xBA\xBA";
45 1 : utf8toucs4(result, ren, BFSZ);
46 1 : TEST_ASSERT(wcscmp(result, L"\u4EBA") == 0);
47 :
48 : // Code point U+0101 -- Latin small letter a with macron
49 : // Code point in binary: 0000 0001 0000 0001
50 : // 9 bits, so 2 bytes in UTF-8
51 : // UTF-8 template: 110x xxxx 10xx xxxx
52 : // UTF-8 binary: 1100 0100 1000 0001
53 : // UTF-8 hex: C 4 8 1
54 1 : char *amac = "\xC4\x81";
55 1 : utf8toucs4(result, amac, BFSZ);
56 1 : TEST_ASSERT(wcscmp(result, L"\u0101") == 0);
57 :
58 : // Code point U+0061 -- Latin small letter a
59 : // Code point in binary: 0110 0001
60 : // 7 bits, so 1 byte in UTF-8
61 : // UTF-8 hex: 61
62 1 : char *a = "\x61";
63 1 : utf8toucs4(result, a, BFSZ);
64 1 : TEST_ASSERT(wcscmp(result, L"a") == 0);
65 :
66 1 : char *alltogether = "\xF0\x9F\x98\x82\xE4\xBA\xBA\xC4\x81\x61";
67 1 : utf8toucs4(result, alltogether, BFSZ);
68 1 : TEST_ASSERT(wcscmp(result, L"\U0001F602\u4EBA\u0101a") == 0);
69 :
70 1 : return 0;
71 : }
72 :
73 1 : void utf8_test(void)
74 : {
75 1 : smb_ut_group *group = su_create_test_group("utf8");
76 :
77 1 : smb_ut_test *predetermined = su_create_test("predetermined",
78 : utf8_test_predetermined);
79 1 : su_add_test(group, predetermined);
80 :
81 1 : su_run_group(group);
82 1 : su_delete_group(group);
83 1 : }
|