Go to the documentation of this file.
20 #ifndef TESSERACT_TRAINING_VALIDATOR_H_
21 #define TESSERACT_TRAINING_VALIDATOR_H_
82 const std::vector<char32>& src,
83 std::vector<std::vector<char32>>*
dest);
133 using IndicPair = std::pair<CharClass, char32>;
151 const std::vector<char32>& src,
152 std::vector<std::vector<char32>>*
dest);
155 std::vector<std::vector<char32>>*
dest);
160 const std::vector<char32>& utf32);
231 std::vector<IndicPair>
codes_;
233 std::vector<std::vector<char32>>
parts_;
246 #endif // TESSERACT_TRAINING_VALIDATOR_H_
static const char32 kMaxViramaScriptUnicode
static const char32 kKhmerVirama
bool UseMultiCode(unsigned length)
static const char32 kRightToLeftMark
virtual bool ConsumeGraphemeIfValid()=0
static bool IsVirama(char32 unicode)
static ViramaScript MostFrequentViramaScript(const std::vector< char32 > &utf32)
static const char32 kJavaneseVirama
static const char32 kMyanmarVirama
static const char32 kInvalid
static bool IsVedicAccent(char32 unicode)
static const char32 kZeroWidthJoiner
static const char32 kMaxSinhalaUnicode
static const char32 kZeroWidthNonJoiner
static const char32 kLeftToRightMark
static const char32 kZeroWidthSpace
static const char32 kMinIndicUnicode
static std::unique_ptr< Validator > ScriptValidator(ViramaScript script, bool report_errors)
std::pair< CharClass, char32 > IndicPair
static const char32 kMaxJavaneseUnicode
virtual CharClass UnicodeToCharClass(char32 ch) const =0
std::vector< IndicPair > codes_
static const int kIndicCodePageSize
Validator(ViramaScript script, bool report_errors)
static const char32 kSinhalaVirama
void MultiCodePart(unsigned length)
bool ValidateCleanAndSegmentInternal(GraphemeNormMode g_mode, const std::vector< char32 > &src, std::vector< std::vector< char32 >> *dest)
std::vector< std::vector< char32 > > parts_
bool IsSubscriptScript() const
std::vector< char32 > output_
static bool IsZeroWidthMark(char32 ch)
static bool ValidateCleanAndSegment(GraphemeNormMode g_mode, bool report_errors, const std::vector< char32 > &src, std::vector< std::vector< char32 >> *dest)
void MoveResultsToDest(GraphemeNormMode g_mode, std::vector< std::vector< char32 >> *dest)
void ComputeClassCodes(const std::vector< char32 > &text)