HTML Tidy
5.7.28
The HTACG Tidy HTML Project
utf8.h
Go to the documentation of this file.
1
#ifndef __UTF8_H__
2
#define __UTF8_H__
3
4
/* utf8.h -- convert characters to/from UTF-8
5
6
(c) 1998-2006 (W3C) MIT, ERCIM, Keio University
7
See tidy.h for the copyright notice.
8
9
*/
10
11
#include "
tidyplatform.h
"
12
#include "
tidybuffio.h
"
13
14
/* UTF-8 encoding/decoding support
15
** Does not convert character "codepoints", i.e. to/from 10646.
16
*/
17
18
int
TY_
(DecodeUTF8BytesToChar)(
uint
* c,
uint
firstByte,
ctmbstr
successorBytes,
19
TidyInputSource
* inp,
int
* count );
20
21
int
TY_
(EncodeCharToUTF8Bytes)(
uint
c,
tmbstr
encodebuf,
22
TidyOutputSink
* outp,
int
* count );
23
24
25
uint
TY_
(GetUTF8)(
ctmbstr
str,
uint
*ch );
26
tmbstr
TY_
(PutUTF8)(
tmbstr
buf,
uint
c );
27
28
#define UNICODE_BOM_BE 0xFEFF
/* big-endian (default) UNICODE BOM */
29
#define UNICODE_BOM UNICODE_BOM_BE
30
#define UNICODE_BOM_LE 0xFFFE
/* little-endian UNICODE BOM */
31
#define UNICODE_BOM_UTF8 0xEFBBBF
/* UTF-8 UNICODE BOM */
32
33
34
Bool
TY_
(IsValidUTF16FromUCS4)(
tchar
ucs4 );
35
Bool
TY_
(IsHighSurrogate)(
tchar
ch );
36
Bool
TY_
(IsLowSurrogate)(
tchar
ch );
37
38
Bool
TY_
(IsCombinedChar)(
tchar
ch );
39
Bool
TY_
(IsValidCombinedChar)(
tchar
ch );
40
41
tchar
TY_
(CombineSurrogatePair)(
tchar
high,
tchar
low );
42
Bool
TY_
(SplitSurrogatePair)(
tchar
utf16,
tchar
* high,
tchar
* low );
43
44
45
46
#endif
/* __UTF8_H__ */
tmbstr
tmbchar * tmbstr
Definition:
tidyplatform.h:608
tidyplatform.h
TidyInputSource
This type defines an input source capable of delivering raw bytes of input.
Definition:
tidy.h:1079
uint
unsigned int uint
Definition:
tidyplatform.h:569
tidybuffio.h
TidyOutputSink
This type defines an output destination capable of accepting raw bytes of output.
Definition:
tidy.h:1129
tchar
uint tchar
Definition:
tidyplatform.h:605
Bool
Bool
Definition:
tidyplatform.h:647
TY_
#define TY_(str)
Definition:
forward.h:23
ctmbstr
const tmbchar * ctmbstr
Definition:
tidyplatform.h:609
src
utf8.h
Generated by
1.8.18