22 #ifndef TESSERACT_CCUTIL_UNICHARCOMPRESS_H_
23 #define TESSERACT_CCUTIL_UNICHARCOMPRESS_H_
25 #include <unordered_map>
40 memset(code_, 0,
sizeof(code_));
44 void Set(
int index,
int value) {
46 if (length_ <= index) length_ = index + 1;
50 void Set3(
int code0,
int code1,
int code2) {
57 int length()
const {
return length_; }
62 return fp->
Serialize(&self_normalized_) &&
73 if (length_ != other.length_)
return false;
74 for (
int i = 0; i < length_; ++i) {
75 if (code_[i] != other.code_[i])
return false;
83 for (
int i = 0; i < code.length_; ++i) {
84 result ^=
static_cast<uint64_t
>(code(i)) << (7 * i);
93 int8_t self_normalized_;
150 STRING* radical_stroke_table);
174 auto it = next_codes_.find(code);
175 return it == next_codes_.end() ? nullptr : it->second;
180 auto it = final_codes_.find(code);
181 return it == final_codes_.end() ? nullptr : it->second;
207 void DefragmentCodeValues(
int encoded_null);
209 void ComputeCodeRange();
219 std::unordered_map<RecodedCharID, int, RecodedCharID::RecodedCharIDHash>
225 std::unordered_map<RecodedCharID, GenericVectorEqEq<int>*,
230 std::unordered_map<RecodedCharID, GenericVectorEqEq<int>*,
bool Serialize(const char *data, size_t count=1)
bool DeSerialize(char *data, size_t count=1)
bool DeSerialize(TFile *fp)
bool Serialize(TFile *fp) const
void Truncate(int length)
void Set(int index, int value)
static const int kMaxCodeLen
int operator()(int index) const
void Set3(int code0, int code1, int code2)
bool operator==(const RecodedCharID &other) const
uint64_t operator()(const RecodedCharID &code) const
void SetupDirect(const GenericVector< RecodedCharID > &codes)
int EncodeUnichar(int unichar_id, RecodedCharID *code) const
STRING GetEncodingAsString(const UNICHARSET &unicharset) const
static bool DecomposeHangul(int unicode, int *leading, int *vowel, int *trailing)
bool DeSerialize(TFile *fp)
static const int kNumHangul
UnicharCompress & operator=(const UnicharCompress &src)
static const int kFirstHangul
const GenericVector< int > * GetNextCodes(const RecodedCharID &code) const
bool IsValidFirstCode(int code) const
bool ComputeEncoding(const UNICHARSET &unicharset, int null_id, STRING *radical_stroke_table)
const GenericVector< int > * GetFinalCodes(const RecodedCharID &code) const
void SetupPassThrough(const UNICHARSET &unicharset)
int DecodeUnichar(const RecodedCharID &code) const
bool Serialize(TFile *fp) const