tesseract  4.1.1
tesseract Namespace Reference

Classes

class  AlignedBlob
 
struct  AlignedBlobParams
 
class  AmbigSpec
 
struct  AssociateStats
 
class  AssociateUtils
 
class  BaselineBlock
 
class  BaselineDetect
 
class  BaselineRow
 
class  BBGrid
 
struct  BestChoiceBundle
 Bundle together all the things pertaining to the best choice/state. More...
 
class  BitVector
 
struct  BlobData
 
class  BlobGrid
 
struct  BlockGroup
 
class  BoolParam
 
class  BoxChar
 
struct  BoxCharPtrSort
 
class  BoxWord
 
class  CCNonTextDetect
 
class  CCStruct
 
class  CCUtil
 
class  CCUtilMutex
 
class  ChoiceIterator
 
class  Classify
 
class  ClassPruner
 
struct  ClipFFunc
 
struct  ClipFPrime
 
struct  ClipGFunc
 
struct  ClipGPrime
 
struct  Cluster
 
class  ColPartition
 
class  ColPartitionGrid
 
class  ColPartitionSet
 
class  ColSegment
 
class  ColumnFinder
 
class  Convolve
 
class  CTC
 
class  CUtil
 
class  Dawg
 
struct  DawgArgs
 
class  DawgCache
 
struct  DawgLoader
 
struct  DawgPosition
 
class  DawgPositionVector
 
class  DebugPixa
 
class  DetLineFit
 
class  Dict
 
struct  DocQualCallbacks
 
class  DocumentCache
 
class  DocumentData
 
class  DoubleParam
 
class  DoublePtr
 
class  DPPoint
 
class  EquationDetect
 
class  EquationDetectBase
 
class  ErrorCounter
 
struct  FFunc
 
class  File
 
struct  FloatWordFeature
 
struct  FontInfo
 
class  FontInfoTable
 
struct  FontSet
 
struct  FontSpacingInfo
 
class  FontUtils
 
struct  FPrime
 
class  FRAGMENT
 
class  FullyConnected
 
class  GenericHeap
 
struct  GeometricClassifierState
 
struct  GFunc
 
struct  GPrime
 
class  GridBase
 
class  GridSearch
 
struct  HFunc
 
struct  HPrime
 
class  IcuErrorCode
 
struct  IdentityFunc
 
class  ImageData
 
class  ImageFind
 
class  ImageThresholder
 
class  IndexMap
 
class  IndexMapBiDi
 
class  Input
 
class  InputBuffer
 
struct  Interval
 
class  IntFeatureDist
 
class  IntFeatureMap
 
class  IntFeatureSpace
 
class  IntGrid
 
class  IntParam
 
struct  IntSimdMatrix
 
struct  KDPair
 
struct  KDPairDec
 
struct  KDPairInc
 
class  KDPtrPair
 
struct  KDPtrPairDec
 
struct  KDPtrPairInc
 
class  KDVector
 
class  LanguageModel
 
struct  LanguageModelDawgInfo
 
struct  LanguageModelNgramInfo
 
struct  LanguageModelState
 Struct to store information maintained by various language model components. More...
 
class  LigatureTable
 
class  LineFinder
 
struct  LineHypothesis
 
struct  LMConsistencyInfo
 
class  LMPainPoints
 
class  LSTM
 
class  LSTMRecognizer
 
class  LSTMTester
 
class  LSTMTrainer
 
class  LTRResultIterator
 
class  MasterTrainer
 
class  Maxpool
 
class  MutableIterator
 
class  Network
 
class  NetworkBuilder
 
class  NetworkIO
 
class  NetworkScratch
 
struct  NodeChild
 
class  ObjectCache
 
class  OutputBuffer
 
class  PageIterator
 
class  PangoFontInfo
 
class  ParagraphModelSmearer
 
class  ParagraphTheory
 
class  Parallel
 
class  Param
 
class  ParamsModel
 
class  ParamsTrainingBundle
 
struct  ParamsTrainingHypothesis
 
struct  ParamsVectors
 
class  ParamUtils
 
class  PixelHistogram
 
class  Plumbing
 
class  PointerVector
 
struct  PtrHash
 
class  RecodeBeamSearch
 
class  RecodedCharID
 
struct  RecodeNode
 
class  Reconfig
 
struct  Relu
 
struct  ReluPrime
 
class  ResultIterator
 
class  Reversed
 
class  RowInfo
 
class  RowScratchRegisters
 
class  SampleIterator
 
struct  ScoredFont
 
class  SegSearchPending
 
class  Series
 
class  Shape
 
class  ShapeClassifier
 
struct  ShapeDist
 
struct  ShapeQueueEntry
 
struct  ShapeRating
 
class  ShapeTable
 
class  ShiroRekhaSplitter
 
class  SIMDDetect
 
class  SimpleClusterer
 
struct  SpacingProperties
 
class  SquishedDawg
 
class  StaticShape
 
class  StrideMap
 
class  StringParam
 
class  StringRenderer
 
class  StrokeWidth
 
class  StructuredTable
 
class  TabConstraint
 
class  TabEventHandler
 
class  TabFind
 
class  TableFinder
 
class  TableRecognizer
 
class  TabVector
 
struct  TESS_CHAR
 
class  TessAltoRenderer
 
class  TessBaseAPI
 
class  TessBoxTextRenderer
 
class  TessClassifier
 
class  TessdataManager
 
class  Tesseract
 
struct  TesseractStats
 
class  TessHOcrRenderer
 
class  TessLSTMBoxRenderer
 
class  TessOsdRenderer
 
class  TessPDFRenderer
 
class  TessResultRenderer
 
class  TessTextRenderer
 
class  TessTsvRenderer
 
class  TessUnlvRenderer
 
class  TessWordStrBoxRenderer
 
class  TextlineProjection
 
class  Textord
 
class  TFile
 
class  TFNetworkModel
 
class  TFNetworkModelDefaultTypeInternal
 
class  TrainingSample
 
class  TrainingSampleSet
 
class  TRand
 
class  TransposedArray
 
class  Trie
 
class  UNICHAR
 
class  UnicharAmbigs
 
struct  UnicharAndFonts
 
class  UnicharCompress
 
class  UnicharIdArrayUtils
 
struct  UnicharRating
 
class  UnicodeSpanSkipper
 
struct  UnityFunc
 
class  ValidateGrapheme
 
class  ValidateIndic
 
class  ValidateJavanese
 
class  ValidateKhmer
 
class  ValidateMyanmar
 
class  Validator
 
struct  ViterbiStateEntry
 
class  WeightMatrix
 
struct  WordData
 
class  WordFeature
 
class  Wordrec
 
class  WordWithBox
 
class  WorkingPartSet
 

Typedefs

using DictFunc = int(Dict::*)(void *, const UNICHARSET &, UNICHAR_ID, bool) const
 
using ProbabilityInContextFunc = double(Dict::*)(const char *, const char *, int, const char *, int)
 
using ParamsModelClassifyFunc = float(Dict::*)(const char *, void *)
 
using FillLatticeFunc = void(Wordrec::*)(const MATRIX &, const WERD_CHOICE_LIST &, const UNICHARSET &, BlamerBundle *)
 
typedef TessCallback4< const UNICHARSET &, int, PageIterator *, Pix * > TruthCallback
 
using DotProductFunction = double(*)(const double *, const double *, int)
 
using SetOfModels = GenericVectorEqEq< const ParagraphModel * >
 
using WordRecognizer = void(Tesseract::*)(const WordData &, WERD_RES **, PointerVector< WERD_RES > *)
 
using ParamsTrainingHypothesisList = GenericVector< ParamsTrainingHypothesis >
 
using UnicharIdVector = GenericVector< UNICHAR_ID >
 
using UnicharAmbigsVector = GenericVector< AmbigSpec_LIST * >
 
using IntKDPair = KDPairInc< int, int >
 
using FileReader = bool(*)(const STRING &, GenericVector< char > *)
 
using FileWriter = bool(*)(const GenericVector< char > &, const STRING &)
 
using char32 = signed int
 
using RSMap = std::unordered_map< int, std::unique_ptr< std::vector< int > >>
 
using RSCounts = std::unordered_map< int, int >
 
using ShapeQueue = GenericHeap< ShapeQueueEntry >
 
using NodeChildVector = GenericVector< NodeChild >
 
using SuccessorList = GenericVector< int >
 
using SuccessorListsVector = GenericVector< SuccessorList * >
 
using DawgVector = GenericVector< Dawg * >
 
typedef TessResultCallback2< bool, const GenericVector< char > &, LSTMTrainer * > * CheckPointReader
 
typedef TessResultCallback3< bool, SerializeAmount, const LSTMTrainer *, GenericVector< char > * > * CheckPointWriter
 
typedef TessResultCallback4< STRING, int, const double *, const TessdataManager &, int > * TestCallback
 
using RecodePair = KDPairInc< double, RecodeNode >
 
using RecodeHeap = GenericHeap< RecodePair >
 
using BlobGridSearch = GridSearch< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT >
 
using ColPartitionGridSearch = GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT >
 
using PartSetVector = GenericVector< ColPartitionSet * >
 
using WidthCallback = TessResultCallback1< bool, int >
 
using ColSegmentGrid = BBGrid< ColSegment, ColSegment_CLIST, ColSegment_C_IT >
 
using ColSegmentGridSearch = GridSearch< ColSegment, ColSegment_CLIST, ColSegment_C_IT >
 
using WordGrid = BBGrid< WordWithBox, WordWithBox_CLIST, WordWithBox_C_IT >
 
using WordSearch = GridSearch< WordWithBox, WordWithBox_CLIST, WordWithBox_C_IT >
 
using LigHash = std::unordered_map< std::string, std::string, StringHash >
 
using PainPointHeap = GenericHeap< MatrixCoordPair >
 
using LanguageModelFlagsType = unsigned char
 Used for expressing various language model flags. More...
 

Enumerations

enum  LineType { LT_START = 'S', LT_BODY = 'C', LT_UNKNOWN = 'U', LT_MULTIPLE = 'M' }
 
enum  CMD_EVENTS { ACTION_1_CMD_EVENT, RECOG_WERDS, RECOG_PSEUDO, ACTION_2_CMD_EVENT }
 
enum  CachingStrategy { CS_SEQUENTIAL, CS_ROUND_ROBIN }
 
enum  NormalizationMode { NM_BASELINE = -3, NM_CHAR_ISOTROPIC = -2, NM_CHAR_ANISOTROPIC = -1 }
 
enum  kParamsTrainingFeatureType {
  PTRAIN_DIGITS_SHORT, PTRAIN_DIGITS_MED, PTRAIN_DIGITS_LONG, PTRAIN_NUM_SHORT,
  PTRAIN_NUM_MED, PTRAIN_NUM_LONG, PTRAIN_DOC_SHORT, PTRAIN_DOC_MED,
  PTRAIN_DOC_LONG, PTRAIN_DICT_SHORT, PTRAIN_DICT_MED, PTRAIN_DICT_LONG,
  PTRAIN_FREQ_SHORT, PTRAIN_FREQ_MED, PTRAIN_FREQ_LONG, PTRAIN_SHAPE_COST_PER_CHAR,
  PTRAIN_NGRAM_COST_PER_CHAR, PTRAIN_NUM_BAD_PUNC, PTRAIN_NUM_BAD_CASE, PTRAIN_XHEIGHT_CONSISTENCY,
  PTRAIN_NUM_BAD_CHAR_TYPE, PTRAIN_NUM_BAD_SPACING, PTRAIN_NUM_BAD_FONT, PTRAIN_RATING_PER_CHAR,
  PTRAIN_NUM_FEATURE_TYPES
}
 
enum  Orientation { ORIENTATION_PAGE_UP = 0, ORIENTATION_PAGE_RIGHT = 1, ORIENTATION_PAGE_DOWN = 2, ORIENTATION_PAGE_LEFT = 3 }
 
enum  WritingDirection { WRITING_DIRECTION_LEFT_TO_RIGHT = 0, WRITING_DIRECTION_RIGHT_TO_LEFT = 1, WRITING_DIRECTION_TOP_TO_BOTTOM = 2 }
 
enum  TextlineOrder { TEXTLINE_ORDER_LEFT_TO_RIGHT = 0, TEXTLINE_ORDER_RIGHT_TO_LEFT = 1, TEXTLINE_ORDER_TOP_TO_BOTTOM = 2 }
 
enum  PageSegMode {
  PSM_OSD_ONLY = 0, PSM_AUTO_OSD = 1, PSM_AUTO_ONLY = 2, PSM_AUTO = 3,
  PSM_SINGLE_COLUMN = 4, PSM_SINGLE_BLOCK_VERT_TEXT = 5, PSM_SINGLE_BLOCK = 6, PSM_SINGLE_LINE = 7,
  PSM_SINGLE_WORD = 8, PSM_CIRCLE_WORD = 9, PSM_SINGLE_CHAR = 10, PSM_SPARSE_TEXT = 11,
  PSM_SPARSE_TEXT_OSD = 12, PSM_RAW_LINE = 13, PSM_COUNT
}
 
enum  PageIteratorLevel {
  RIL_BLOCK, RIL_PARA, RIL_TEXTLINE, RIL_WORD,
  RIL_SYMBOL
}
 
enum  ParagraphJustification { JUSTIFICATION_UNKNOWN, JUSTIFICATION_LEFT, JUSTIFICATION_CENTER, JUSTIFICATION_RIGHT }
 
enum  OcrEngineMode {
  OEM_TESSERACT_ONLY, OEM_LSTM_ONLY, OEM_TESSERACT_LSTM_COMBINED, OEM_DEFAULT,
  OEM_COUNT
}
 
enum  ScriptPos { SP_NORMAL, SP_SUBSCRIPT, SP_SUPERSCRIPT, SP_DROPCAP }
 
enum  AmbigType {
  NOT_AMBIG, REPLACE_AMBIG, DEFINITE_AMBIG, SIMILAR_AMBIG,
  CASE_AMBIG, AMBIG_TYPE_COUNT
}
 
enum  SetParamConstraint { SET_PARAM_CONSTRAINT_NONE, SET_PARAM_CONSTRAINT_DEBUG_ONLY, SET_PARAM_CONSTRAINT_NON_DEBUG_ONLY, SET_PARAM_CONSTRAINT_NON_INIT_ONLY }
 
enum  TessdataType {
  TESSDATA_LANG_CONFIG, TESSDATA_UNICHARSET, TESSDATA_AMBIGS, TESSDATA_INTTEMP,
  TESSDATA_PFFMTABLE, TESSDATA_NORMPROTO, TESSDATA_PUNC_DAWG, TESSDATA_SYSTEM_DAWG,
  TESSDATA_NUMBER_DAWG, TESSDATA_FREQ_DAWG, TESSDATA_FIXED_LENGTH_DAWGS, TESSDATA_CUBE_UNICHARSET,
  TESSDATA_CUBE_SYSTEM_DAWG, TESSDATA_SHAPE_TABLE, TESSDATA_BIGRAM_DAWG, TESSDATA_UNAMBIG_DAWG,
  TESSDATA_PARAMS_MODEL, TESSDATA_LSTM, TESSDATA_LSTM_PUNC_DAWG, TESSDATA_LSTM_SYSTEM_DAWG,
  TESSDATA_LSTM_NUMBER_DAWG, TESSDATA_LSTM_UNICHARSET, TESSDATA_LSTM_RECODER, TESSDATA_VERSION,
  TESSDATA_NUM_ENTRIES
}
 
enum  CharSegmentationType { CST_FRAGMENT, CST_WHOLE, CST_IMPROPER, CST_NGRAM }
 
enum  DawgType {
  DAWG_TYPE_PUNCTUATION, DAWG_TYPE_WORD, DAWG_TYPE_NUMBER, DAWG_TYPE_PATTERN,
  DAWG_TYPE_COUNT
}
 
enum  XHeightConsistencyEnum { XH_GOOD, XH_SUBNORMAL, XH_INCONSISTENT }
 
enum  TrainingFlags { TF_INT_MODE = 1, TF_COMPRESS_UNICHARSET = 64 }
 
enum  ErrorTypes {
  ET_RMS, ET_DELTA, ET_WORD_RECERR, ET_CHAR_ERROR,
  ET_SKIP_RATIO, ET_COUNT
}
 
enum  Trainability {
  TRAINABLE, PERFECT, UNENCODABLE, HI_PRECISION_ERR,
  NOT_BOXED
}
 
enum  SerializeAmount { LIGHT, NO_BEST_TRAINER, FULL }
 
enum  SubTrainerResult { STR_NONE, STR_UPDATED, STR_REPLACED }
 
enum  NetworkType {
  NT_NONE, NT_INPUT, NT_CONVOLVE, NT_MAXPOOL,
  NT_PARALLEL, NT_REPLICATED, NT_PAR_RL_LSTM, NT_PAR_UD_LSTM,
  NT_PAR_2D_LSTM, NT_SERIES, NT_RECONFIG, NT_XREVERSED,
  NT_YREVERSED, NT_XYTRANSPOSE, NT_LSTM, NT_LSTM_SUMMARY,
  NT_LOGISTIC, NT_POSCLIP, NT_SYMCLIP, NT_TANH,
  NT_RELU, NT_LINEAR, NT_SOFTMAX, NT_SOFTMAX_NO_CTC,
  NT_LSTM_SOFTMAX, NT_LSTM_SOFTMAX_ENCODED, NT_TENSORFLOW, NT_COUNT
}
 
enum  NetworkFlags { NF_LAYER_SPECIFIC_LR = 64, NF_ADAM = 128 }
 
enum  TrainingState { TS_DISABLED, TS_ENABLED, TS_TEMP_DISABLE, TS_RE_ENABLE }
 
enum  NodeContinuation { NC_ANYTHING, NC_ONLY_DUP, NC_NO_DUP, NC_COUNT }
 
enum  TopNState { TN_TOP2, TN_TOPN, TN_ALSO_RAN, TN_COUNT }
 
enum  LossType { LT_NONE, LT_CTC, LT_SOFTMAX, LT_LOGISTIC }
 
enum  FlexDimensions { FD_BATCH, FD_HEIGHT, FD_WIDTH, FD_DIMSIZE }
 
enum  ColumnSpanningType {
  CST_NOISE, CST_FLOWING, CST_HEADING, CST_PULLOUT,
  CST_COUNT
}
 
enum  NeighbourPartitionType {
  NPT_HTEXT, NPT_VTEXT, NPT_WEAK_HTEXT, NPT_WEAK_VTEXT,
  NPT_IMAGE, NPT_COUNT
}
 
enum  LeftOrRight { LR_LEFT, LR_RIGHT }
 
enum  PartitionFindResult { PFR_OK, PFR_SKEW, PFR_NOISE }
 
enum  ColSegType {
  COL_UNKNOWN, COL_TEXT, COL_TABLE, COL_MIXED,
  COL_COUNT
}
 
enum  TabAlignment {
  TA_LEFT_ALIGNED, TA_LEFT_RAGGED, TA_CENTER_JUSTIFIED, TA_RIGHT_ALIGNED,
  TA_RIGHT_RAGGED, TA_SEPARATOR, TA_COUNT
}
 
enum  FactorNames {
  FN_INCOLOR, FN_Y0, FN_Y1, FN_Y2,
  FN_Y3, FN_X0, FN_X1, FN_SHEAR,
  FN_NUM_FACTORS
}
 
enum  CountTypes {
  CT_UNICHAR_TOP_OK, CT_UNICHAR_TOP1_ERR, CT_UNICHAR_TOP2_ERR, CT_UNICHAR_TOPN_ERR,
  CT_UNICHAR_TOPTOP_ERR, CT_OK_MULTI_UNICHAR, CT_OK_JOINED, CT_OK_BROKEN,
  CT_REJECT, CT_FONT_ATTR_ERR, CT_OK_MULTI_FONT, CT_NUM_RESULTS,
  CT_RANK, CT_REJECTED_JUNK, CT_ACCEPTED_JUNK, CT_SIZE
}
 
enum  UnicodeNormMode { UnicodeNormMode::kNFD, UnicodeNormMode::kNFC, UnicodeNormMode::kNFKD, UnicodeNormMode::kNFKC }
 
enum  OCRNorm { OCRNorm::kNone, OCRNorm::kNormalize }
 
enum  GraphemeNorm { GraphemeNorm::kNone, GraphemeNorm::kNormalize }
 
enum  GraphemeNormMode { GraphemeNormMode::kSingleString, GraphemeNormMode::kCombined, GraphemeNormMode::kGlyphSplit, GraphemeNormMode::kIndividualUnicodes }
 
enum  ViramaScript : char32 {
  ViramaScript::kNonVirama = 0, ViramaScript::kDevanagari = 0x900, ViramaScript::kBengali = 0x980, ViramaScript::kGurmukhi = 0xa00,
  ViramaScript::kGujarati = 0xa80, ViramaScript::kOriya = 0xb00, ViramaScript::kTamil = 0xb80, ViramaScript::kTelugu = 0xc00,
  ViramaScript::kKannada = 0xc80, ViramaScript::kMalayalam = 0xd00, ViramaScript::kSinhala = 0xd80, ViramaScript::kMyanmar = 0x1000,
  ViramaScript::kKhmer = 0x1780, ViramaScript::kJavanese = 0xa980
}
 
enum  LMPainPointsType {
  LM_PPTYPE_BLAMER, LM_PPTYPE_AMBIG, LM_PPTYPE_PATH, LM_PPTYPE_SHAPE,
  LM_PPTYPE_NUM
}
 

Functions

STRING HOcrEscape (const char *text)
 
double DotProductNative (const double *u, const double *v, int n)
 
double DotProductAVX (const double *u, const double *v, int n)
 
double DotProductFMA (const double *u, const double *v, int n)
 
double DotProductSSE (const double *u, const double *v, int n)
 
bool IsTextOrEquationType (PolyBlockType type)
 
bool IsLeftIndented (const EquationDetect::IndentType type)
 
bool IsRightIndented (const EquationDetect::IndentType type)
 
bool AsciiLikelyListItem (const STRING &word)
 
int UnicodeFor (const UNICHARSET *u, const WERD_CHOICE *werd, int pos)
 
void LeftWordAttributes (const UNICHARSET *unicharset, const WERD_CHOICE *werd, const STRING &utf8, bool *is_list, bool *starts_idea, bool *ends_idea)
 
void RightWordAttributes (const UNICHARSET *unicharset, const WERD_CHOICE *werd, const STRING &utf8, bool *is_list, bool *starts_idea, bool *ends_idea)
 
bool ValidFirstLine (const GenericVector< RowScratchRegisters > *rows, int row, const ParagraphModel *model)
 
bool ValidBodyLine (const GenericVector< RowScratchRegisters > *rows, int row, const ParagraphModel *model)
 
bool CrownCompatible (const GenericVector< RowScratchRegisters > *rows, int a, int b, const ParagraphModel *model)
 
void RecomputeMarginsAndClearHypotheses (GenericVector< RowScratchRegisters > *rows, int start, int end, int percentile)
 
int InterwordSpace (const GenericVector< RowScratchRegisters > &rows, int row_start, int row_end)
 
bool FirstWordWouldHaveFit (const RowScratchRegisters &before, const RowScratchRegisters &after, tesseract::ParagraphJustification justification)
 
bool FirstWordWouldHaveFit (const RowScratchRegisters &before, const RowScratchRegisters &after)
 
bool RowsFitModel (const GenericVector< RowScratchRegisters > *rows, int start, int end, const ParagraphModel *model)
 
void CanonicalizeDetectionResults (GenericVector< PARA * > *row_owners, PARA_LIST *paragraphs)
 
void DetectParagraphs (int debug_level, GenericVector< RowInfo > *row_infos, GenericVector< PARA * > *row_owners, PARA_LIST *paragraphs, GenericVector< ParagraphModel * > *models)
 
void DetectParagraphs (int debug_level, bool after_text_recognition, const MutableIterator *block_start, GenericVector< ParagraphModel * > *models)
 
bool StrongModel (const ParagraphModel *model)
 
bool CompareFontInfo (const FontInfo &fi1, const FontInfo &fi2)
 
bool CompareFontSet (const FontSet &fs1, const FontSet &fs2)
 
void FontInfoDeleteCallback (FontInfo f)
 
void FontSetDeleteCallback (FontSet fs)
 
bool read_info (TFile *f, FontInfo *fi)
 
bool write_info (FILE *f, const FontInfo &fi)
 
bool read_spacing_info (TFile *f, FontInfo *fi)
 
bool write_spacing_info (FILE *f, const FontInfo &fi)
 
bool read_set (TFile *f, FontSet *fs)
 
bool write_set (FILE *f, const FontSet &fs)
 
void * ReCachePagesFunc (void *data)
 
int OtsuThreshold (Pix *src_pix, int left, int top, int width, int height, int **thresholds, int **hi_values)
 
void HistogramRect (Pix *src_pix, int channel, int left, int top, int width, int height, int *histogram)
 
int OtsuStats (const int *histogram, int *H_out, int *omega0_out)
 
int ParamsTrainingFeatureByName (const char *name)
 
bool PSM_OSD_ENABLED (int pageseg_mode)
 
bool PSM_ORIENTATION_ENABLED (int pageseg_mode)
 
bool PSM_COL_FIND_ENABLED (int pageseg_mode)
 
bool PSM_SPARSE (int pageseg_mode)
 
bool PSM_BLOCK_FIND_ENABLED (int pageseg_mode)
 
bool PSM_LINE_FIND_ENABLED (int pageseg_mode)
 
bool PSM_WORD_FIND_ENABLED (int pageseg_mode)
 
const char * ScriptPosToString (enum ScriptPos script_pos)
 
bool LoadDataFromFile (const char *filename, GenericVector< char > *data)
 
bool LoadDataFromFile (const STRING &filename, GenericVector< char > *data)
 
bool SaveDataToFile (const GenericVector< char > &data, const STRING &filename)
 
template<typename T >
bool cmp_eq (T const &t1, T const &t2)
 
template<typename T >
int sort_cmp (const void *t1, const void *t2)
 
template<typename T >
int sort_ptr_cmp (const void *t1, const void *t2)
 
bool DeSerialize (FILE *fp, char *data, size_t n)
 
bool DeSerialize (FILE *fp, float *data, size_t n)
 
bool DeSerialize (FILE *fp, int8_t *data, size_t n)
 
bool DeSerialize (FILE *fp, int16_t *data, size_t n)
 
bool DeSerialize (FILE *fp, int32_t *data, size_t n)
 
bool DeSerialize (FILE *fp, uint8_t *data, size_t n)
 
bool DeSerialize (FILE *fp, uint16_t *data, size_t n)
 
bool DeSerialize (FILE *fp, uint32_t *data, size_t n)
 
bool Serialize (FILE *fp, const char *data, size_t n)
 
bool Serialize (FILE *fp, const float *data, size_t n)
 
bool Serialize (FILE *fp, const int8_t *data, size_t n)
 
bool Serialize (FILE *fp, const int16_t *data, size_t n)
 
bool Serialize (FILE *fp, const int32_t *data, size_t n)
 
bool Serialize (FILE *fp, const uint8_t *data, size_t n)
 
bool Serialize (FILE *fp, const uint16_t *data, size_t n)
 
bool Serialize (FILE *fp, const uint32_t *data, size_t n)
 
template<typename T , size_t N>
constexpr size_t countof (T const (&)[N]) noexcept
 
void ExtractFontName (const STRING &filename, STRING *fontname)
 
TrainingSampleBlobToTrainingSample (const TBLOB &blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT *fx_info, GenericVector< INT_FEATURE_STRUCT > *bl_features)
 
void ClearFeatureSpaceWindow (NORM_METHOD norm_method, ScrollView *window)
 
double Tanh (double x)
 
double Logistic (double x)
 
template<class Func >
void FuncInplace (int n, double *inout)
 
template<class Func >
void FuncMultiply (const double *u, const double *v, int n, double *out)
 
template<typename T >
void SoftmaxInPlace (int n, T *inout)
 
void CopyVector (int n, const double *src, double *dest)
 
void AccumulateVector (int n, const double *src, double *dest)
 
void MultiplyVectorsInPlace (int n, const double *src, double *inout)
 
void MultiplyAccumulate (int n, const double *u, const double *v, double *out)
 
void SumVectors (int n, const double *v1, const double *v2, const double *v3, const double *v4, const double *v5, double *sum)
 
template<typename T >
void ZeroVector (int n, T *vec)
 
template<typename T >
void ClipVector (int n, T lower, T upper, T *vec)
 
void CodeInBinary (int n, int nf, double *vec)
 
Pix * TraceOutlineOnReducedPix (C_OUTLINE *outline, int gridsize, ICOORD bleft, int *left, int *bottom)
 
Pix * TraceBlockOnReducedPix (BLOCK *block, int gridsize, ICOORD bleft, int *left, int *bottom)
 
template<class BBC >
int SortByBoxLeft (const void *void1, const void *void2)
 
template<class BBC >
int SortRightToLeft (const void *void1, const void *void2)
 
template<class BBC >
int SortByBoxBottom (const void *void1, const void *void2)
 
template<typename T >
void DeleteObject (T *object)
 
void SetBlobStrokeWidth (Pix *pix, BLOBNBOX *blob)
 
void assign_blobs_to_blocks2 (Pix *pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *port_blocks)
 
void ParseCommandLineFlags (const char *usage, int *argc, char ***argv, const bool remove_flags)
 
ShapeTableLoadShapeTable (const STRING &file_prefix)
 
void WriteShapeTable (const STRING &file_prefix, const ShapeTable &shape_table)
 
MasterTrainerLoadTrainingData (int argc, const char *const *argv, bool replication, ShapeTable **shape_table, STRING *file_prefix)
 
Pix * DegradeImage (Pix *input, int exposure, TRand *randomizer, float *rotation)
 
Pix * PrepareDistortedPix (const Pix *pix, bool perspective, bool invert, bool white_noise, bool smooth_noise, bool blur, int box_reduction, TRand *randomizer, GenericVector< TBOX > *boxes)
 
void GeneratePerspectiveDistortion (int width, int height, TRand *randomizer, Pix **pix, GenericVector< TBOX > *boxes)
 
int ProjectiveCoeffs (int width, int height, TRand *randomizer, float **im_coeffs, float **box_coeffs)
 
bool LoadFileLinesToStrings (const char *filename, GenericVector< STRING > *lines)
 
bool WriteFile (const std::string &output_dir, const std::string &lang, const std::string &suffix, const GenericVector< char > &data, FileWriter writer)
 
STRING ReadFile (const std::string &filename, FileReader reader)
 
bool WriteUnicharset (const UNICHARSET &unicharset, const std::string &output_dir, const std::string &lang, FileWriter writer, TessdataManager *traineddata)
 
bool WriteRecoder (const UNICHARSET &unicharset, bool pass_through, const std::string &output_dir, const std::string &lang, FileWriter writer, STRING *radical_table_data, TessdataManager *traineddata)
 
int CombineLangModel (const UNICHARSET &unicharset, const std::string &script_dir, const std::string &version_str, const std::string &output_dir, const std::string &lang, bool pass_through_recoder, const GenericVector< STRING > &words, const GenericVector< STRING > &puncs, const GenericVector< STRING > &numbers, bool lang_is_rtl, FileReader reader, FileWriter writer)
 
bool NormalizeUTF8String (UnicodeNormMode u_mode, OCRNorm ocr_normalize, GraphemeNorm grapheme_normalize, const char *str8, std::string *normalized)
 
bool NormalizeCleanAndSegmentUTF8 (UnicodeNormMode u_mode, OCRNorm ocr_normalize, GraphemeNormMode g_mode, bool report_errors, const char *str8, std::vector< std::string > *graphemes)
 
char32 OCRNormalize (char32 ch)
 
bool IsOCREquivalent (char32 ch1, char32 ch2)
 
bool IsValidCodepoint (const char32 ch)
 
bool IsWhitespace (const char32 ch)
 
bool IsUTF8Whitespace (const char *text)
 
unsigned int SpanUTF8Whitespace (const char *text)
 
unsigned int SpanUTF8NotWhitespace (const char *text)
 
bool IsInterchangeValid (const char32 ch)
 
bool IsInterchangeValid7BitAscii (const char32 ch)
 
char32 FullwidthToHalfwidth (const char32 ch)
 
void SetupBasicProperties (bool report_errors, bool decompose, UNICHARSET *unicharset)
 
void SetScriptProperties (const std::string &script_dir, UNICHARSET *unicharset)
 
std::string GetXheightString (const std::string &script_dir, const UNICHARSET &unicharset)
 
void SetPropertiesForInputFile (const std::string &script_dir, const std::string &input_unicharset_file, const std::string &output_unicharset_file, const std::string &output_xheights_file)
 
void SetupBasicProperties (bool report_errors, UNICHARSET *unicharset)
 
template<class BLOB_CHOICE >
int SortByUnicharID (const void *void1, const void *void2)
 
template<class BLOB_CHOICE >
int SortByRating (const void *void1, const void *void2)
 

Variables

const int kMinRectSize = 10
 
const char kTesseractReject = '~'
 
const char kUNLVReject = '~'
 
const char kUNLVSuspect = '^'
 
const int kMaxIntSize = 22
 
const int kNumbersPerBlob = 5
 
const int kBytesPerNumber = 5
 
const int kBytesPerBoxFileLine = (kBytesPerNumber + 1) * kNumbersPerBlob + 1
 
const int kBytesPer64BitNumber = 20
 
const int kMaxBytesPerLine
 
const int kUniChs []
 
const int kLatinChs []
 
constexpr int kNumOutputsPerRegister = 8
 
constexpr int kMaxOutputRegisters = 8
 
constexpr int kNumInputsPerRegister = 32
 
constexpr int kNumInputsPerGroup = 4
 
constexpr int kNumInputGroups = kNumInputsPerRegister / kNumInputsPerGroup
 
DotProductFunction DotProduct
 
const float kMathDigitDensityTh1 = 0.25
 
const float kMathDigitDensityTh2 = 0.1
 
const float kMathItalicDensityTh = 0.5
 
const float kUnclearDensityTh = 0.25
 
const int kSeedBlobsCountTh = 10
 
const int kLeftIndentAlignmentCountTh = 1
 
const int kMaxCharTopRange = 48
 
const float kCertaintyScale = 7.0f
 
const float kWorstDictCertainty = -25.0f
 
const int kMaxCircleErosions = 8
 
const ParagraphModelkCrownLeft = reinterpret_cast<ParagraphModel *>(static_cast<uintptr_t>(0xDEAD111F))
 
const ParagraphModelkCrownRight = reinterpret_cast<ParagraphModel *>(static_cast<uintptr_t>(0xDEAD888F))
 
const int16_t kMaxBoxEdgeDiff = 2
 
const int kBoxClipTolerance = 2
 
const int kNumEndPoints = 3
 
const int kMinPointsForErrorCount = 16
 
const int kMaxRealDistance = 2.0
 
const int kFeaturePadding = 2
 
const int kImagePadding = 4
 
const int kHistogramSize = 256
 
const int kMaxAmbigStringSize = UNICHAR_LEN * (MAX_AMBIG_SIZE + 1)
 
const int kRadicalRadix = 29
 
const char *const kLRM = "\u200E"
 Left-to-Right Mark. More...
 
const char *const kRLM = "\u200F"
 Right-to-Left Mark. More...
 
const char *const kRLE = "\u202A"
 Right-to-Left Embedding. More...
 
const char *const kPDF = "\u202C"
 Pop Directional Formatting. More...
 
const char kUniversalAmbigsFile []
 
const int ksizeofUniversalAmbigsFile = sizeof(kUniversalAmbigsFile)
 
const int kMaxOffsetDist = 32
 
const int kRandomizingCenter = 128
 
const int kTestChar = -1
 
const int kSquareLimit = 25
 
const int kPrime1 = 17
 
const int kPrime2 = 13
 
const int case_state_table [6][4]
 
const char kDoNotReverse [] = "RRP_DO_NO_REVERSE"
 
const char kReverseIfHasRTL [] = "RRP_REVERSE_IF_HAS_RTL"
 
const char kForceReverse [] = "RRP_FORCE_REVERSE"
 
const char *const RTLReversePolicyNames []
 
const double TanhTable []
 
const double LogisticTable []
 
constexpr int kTableSize = 4096
 
constexpr double kScaleFactor = 256.0
 
const int kMaxInputHeight = 48
 
const double kStateClip = 100.0
 
const double kErrClip = 1.0f
 
const double kDictRatio = 2.25
 
const double kCertOffset = -0.085
 
const double kMinDivergenceRate = 50.0
 
const int kMinStallIterations = 10000
 
const double kSubTrainerMarginFraction = 3.0 / 128
 
const double kLearningRateDecay = M_SQRT1_2
 
const int kNumAdjustmentIterations = 100
 
const int kErrorGraphInterval = 1000
 
const int kNumPagesPerBatch = 100
 
const int kMinStartedErrorRate = 75
 
const double kStageTransitionThreshold = 10.0
 
const double kHighConfidence = 0.9375
 
const double kImprovementFraction = 15.0 / 16.0
 
const double kBestCheckpointFraction = 31.0 / 32.0
 
const int kTargetXScale = 5
 
const int kTargetYScale = 100
 
const int kMinWinSize = 500
 
const int kMaxWinSize = 2000
 
const int kXWinFrameSize = 30
 
const int kYWinFrameSize = 80
 
const float kMinCertainty = -20.0f
 
const float kMinProb = exp(kMinCertainty)
 
class tesseract::TFNetworkModelDefaultTypeInternal _TFNetworkModel_default_instance_
 
const int kAdamCorrectionIterations = 200000
 
const double kAdamEpsilon = 1e-8
 
const int kInt8Flag = 1
 
const int kAdamFlag = 4
 
const int kDoubleFlag = 128
 
const int kHistogramBuckets = 16
 
const double kAlignedFraction = 0.03125
 
const double kRaggedFraction = 2.5
 
const double kAlignedGapFraction = 0.75
 
const double kRaggedGapFraction = 1.0
 
const int kVLineAlignment = 3
 
const int kVLineGutter = 1
 
const int kVLineSearchSize = 150
 
const int kMinRaggedTabs = 5
 
const int kMinAlignedTabs = 4
 
const int kVLineMinLength = 500
 
const double kMinTabGradient = 4.0
 
const int kMaxSkewFactor = 15
 
const double kMaxSmallNeighboursPerPix = 1.0 / 32
 
const int kMaxLargeOverlapsWithSmall = 3
 
const int kMaxMediumOverlapsWithSmall = 12
 
const int kMaxLargeOverlapsWithMedium = 12
 
const int kOriginalNoiseMultiple = 8
 
const int kNoisePadding = 4
 
const double kPhotoOffsetFraction = 0.375
 
const double kMinGoodTextPARatio = 1.5
 
const int kMaxIncompatibleColumnCount = 2
 
const double kHorizontalGapMergeFraction = 0.5
 
const double kMinGutterWidthGrid = 0.5
 
const double kMaxDistToPartSizeRatio = 1.5
 
const double kMaxSpacingDrift = 1.0 / 72
 
const double kMaxTopSpacingFraction = 0.25
 
const double kMaxSameBlockLineSpacing = 3
 
const double kMaxSizeRatio = 1.5
 
const double kMaxLeaderGapFractionOfMax = 0.25
 
const double kMaxLeaderGapFractionOfMin = 0.5
 
const int kMinLeaderCount = 5
 
const int kMinStrongTextValue = 6
 
const int kMinChainTextValue = 3
 
const int kHorzStrongTextlineCount = 8
 
const int kHorzStrongTextlineHeight = 10
 
const int kHorzStrongTextlineAspect = 5
 
const double kMaxBaselineError = 0.4375
 
const double kMinBaselineCoverage = 0.5
 
const int kMaxRMSColorNoise = 128
 
const int kMaxColorDistance = 900
 
const int kRGBRMSColors = 4
 
const int kMaxPadFactor = 6
 
const int kMaxNeighbourDistFactor = 4
 
const int kMaxCaptionLines = 7
 
const double kMinCaptionGapRatio = 2.0
 
const double kMinCaptionGapHeightRatio = 0.5
 
const double kMarginOverlapFraction = 0.25
 
const double kBigPartSizeRatio = 1.75
 
const double kTinyEnoughTextlineOverlapFraction = 0.25
 
const double kMaxPartitionSpacing = 1.75
 
const int kSmoothDecisionMargin = 4
 
const double kMinColumnWidth = 2.0 / 3
 
const double kMinRectangularFraction = 0.125
 
const double kMaxRectangularFraction = 0.75
 
const double kMaxRectangularGradient = 0.1
 
const int kMinImageFindSize = 100
 
const double kRMSFitScaling = 8.0
 
const int kMinColorDifference = 16
 
const int kThinLineFraction = 20
 Denominator of resolution makes max pixel width to allow thin lines. More...
 
const int kMinLineLengthFraction = 4
 Denominator of resolution makes min pixels to demand line lengths to be. More...
 
const int kCrackSpacing = 100
 Spacing of cracks across the page to break up tall vertical lines. More...
 
const int kLineFindGridSize = 50
 Grid size used by line finder. Not very critical. More...
 
const int kMinThickLineWidth = 12
 
const int kMaxLineResidue = 6
 
const double kThickLengthMultiple = 0.75
 
const double kMaxNonLineDensity = 0.25
 
const double kMaxStaveHeight = 1.0
 
const double kMinMusicPixelFraction = 0.75
 
const double kStrokeWidthFractionTolerance = 0.125
 
const double kStrokeWidthTolerance = 1.5
 
const double kStrokeWidthFractionCJK = 0.25
 
const double kStrokeWidthCJK = 2.0
 
const int kCJKRadius = 2
 
const double kCJKBrokenDistanceFraction = 0.25
 
const int kCJKMaxComponents = 8
 
const double kCJKAspectRatio = 1.25
 
const double kCJKAspectRatioIncrease = 1.0625
 
const int kMaxCJKSizeRatio = 5
 
const double kBrokenCJKIterationFraction = 0.125
 
const double kDiacriticXPadRatio = 7.0
 
const double kDiacriticYPadRatio = 1.75
 
const double kMinDiacriticSizeRatio = 1.0625
 
const double kMaxDiacriticDistanceRatio = 1.25
 
const double kMaxDiacriticGapToBaseCharHeight = 1.0
 
const int kLineTrapLongest = 4
 
const int kLineTrapShortest = 2
 
const int kMostlyOneDirRatio = 3
 
const double kLineResidueAspectRatio = 8.0
 
const int kLineResiduePadRatio = 3
 
const double kLineResidueSizeRatio = 1.75
 
const float kSizeRatioToReject = 2.0
 
const double kNeighbourSearchFactor = 2.5
 
const double kNoiseOverlapGrowthFactor = 4.0
 
const double kNoiseOverlapAreaFactor = 1.0 / 512
 
const int kTabRadiusFactor = 5
 
const int kMinVerticalSearch = 3
 
const int kMaxVerticalSearch = 12
 
const int kMaxRaggedSearch = 25
 
const int kMinLinesInColumn = 10
 
const double kMinFractionalLinesInColumn = 0.125
 
const double kMaxGutterWidthAbsolute = 2.00
 
const int kRaggedGutterMultiple = 5
 
const double kLineFragmentAspectRatio = 10.0
 
const int kMinEvaluatedTabs = 3
 
const double kCosMaxSkewAngle = 0.866025
 
const int kColumnWidthFactor = 20
 
const int kMaxVerticalSpacing = 500
 
const int kMaxBlobWidth = 500
 
const double kSplitPartitionSize = 2.0
 
const double kAllowTextHeight = 0.5
 
const double kAllowTextWidth = 0.6
 
const double kAllowTextArea = 0.8
 
const double kAllowBlobHeight = 0.3
 
const double kAllowBlobWidth = 0.4
 
const double kAllowBlobArea = 0.05
 
const int kMinBoxesInTextPartition = 10
 
const int kMaxBoxesInDataPartition = 20
 
const double kMaxGapInTextPartition = 4.0
 
const double kMinMaxGapInTextPartition = 0.5
 
const double kMaxBlobOverlapFactor = 4.0
 
const double kMaxTableCellXheight = 2.0
 
const int kMaxColumnHeaderDistance = 4
 
const double kTableColumnThreshold = 3.0
 
const double kMinOverlapWithTable = 0.6
 
const int kSideSpaceMargin = 10
 
const double kSmallTableProjectionThreshold = 0.35
 
const double kLargeTableProjectionThreshold = 0.45
 
const int kLargeTableRowCount = 6
 
const int kMinRowsInTable = 3
 
const int kAdjacentLeaderSearchPadding = 2
 
const double kParagraphEndingPreviousLineRatio = 1.3
 
const double kMaxParagraphEndingLeftSpaceMultiple = 3.0
 
const double kMinParagraphEndingTextToWhitespaceRatio = 3.0
 
const double kMaxXProjectionGapFactor = 2.0
 
const double kStrokeWidthFractionalTolerance = 0.25
 
const double kStrokeWidthConstantTolerance = 2.0
 
const double kHorizontalSpacing = 0.30
 
const double kVerticalSpacing = -0.2
 
const int kCellSplitRowThreshold = 0
 
const int kCellSplitColumnThreshold = 0
 
const int kLinedTableMinVerticalLines = 3
 
const int kLinedTableMinHorizontalLines = 3
 
const double kRequiredColumns = 0.7
 
const double kMarginFactor = 1.1
 
const double kMaxRowSize = 2.5
 
const double kGoodRowNumberOfColumnsSmall [] = { 2, 2, 2, 2, 2, 3, 3 }
 
const int kGoodRowNumberOfColumnsSmallSize
 
const double kGoodRowNumberOfColumnsLarge = 0.7
 
const double kMinFilledArea = 0.35
 
const int kGutterMultiple = 4
 
const int kGutterToNeighbourRatio = 3
 
const int kSimilarVectorDist = 10
 
const int kSimilarRaggedDist = 50
 
const int kMaxFillinMultiple = 11
 
const double kMinGutterFraction = 0.5
 
const double kLineCountReciprocal = 4.0
 
const double kMinAlignedGutter = 0.25
 
const double kMinRaggedGutter = 1.5
 
double textord_tabvector_vertical_gap_fraction = 0.5
 
double textord_tabvector_vertical_box_ratio = 0.5
 
const int kMaxLineLength = 1024
 
const float kRotationRange = 0.02f
 
const int kExposureFactor = 16
 
const int kSaltnPepper = 5
 
const int kMinRampSize = 1000
 
const double kRatingEpsilon = 1.0 / 32
 
const int kMinLigature = 0xfb00
 
const int kMaxLigature = 0xfb17
 
const int kMinClusteredShapes = 1
 
const int kMaxUnicharsPerCluster = 2000
 
const float kFontMergeDistance = 0.025
 
const float kInfiniteDist = 999.0f
 
const int kDefaultResolution = 300
 

Detailed Description

The box file is assumed to contain box definitions, one per line, of the following format for blob-level boxes:

*   <UTF8 str> <left> <bottom> <right> <top> <page id>
* 

and for word/line-level boxes:

*   WordStr <left> <bottom> <right> <top> <page id> #<space-delimited word str>
* 

NOTES: The boxes use tesseract coordinates, i.e. 0,0 is at BOTTOM-LEFT.

<page id> is 0-based, and the page number is used for multipage input (tiff).

In the blob-level form, each line represents a recognizable unit, which may be several UTF-8 bytes, but there is a bounding box around each recognizable unit, and no classifier is needed to train in this mode (bootstrapping.)

In the word/line-level form, the line begins with the literal "WordStr", and the bounding box bounds either a whole line or a whole word. The recognizable units in the word/line are listed after the # at the end of the line and are space delimited, ignoring any original spaces on the line. Eg.

* word -> #w o r d
* multi word line -> #m u l t i w o r d l i n e
* 

The recognizable units must be space-delimited in order to allow multiple unicodes to be used for a single recognizable unit, eg Hindi.

In this mode, the classifier must have been pre-trained with the desired character set, or it will not be able to find the character segmentations.

Make a word from the selected blobs and run Tess on them.

Parameters
page_resrecognise blobs
selection_boxwithin this box

fp_eval_word_spacing() Evaluation function for fixed pitch word lists.

Basically, count the number of "nice" characters - those which are in tess acceptable words or in dict words and are not rejected. Penalise any potential noise chars

build_menu()

Construct the menu tree used by the command window

process_cmd_win_event()

Process a command returned from the command window (Just call the appropriate command handler)

word_blank_and_set_display() Word processor

Blank display of word then redisplay word according to current display mode settings


Include Files and Type Defines


Public Function Prototypes


Include Files and Type Defines


Include Files and Type Defines

Typedef Documentation

◆ BlobGridSearch

using tesseract::BlobGridSearch = typedef GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT>

Definition at line 31 of file blobgrid.h.

◆ char32

using tesseract::char32 = typedef signed int

Definition at line 51 of file unichar.h.

◆ CheckPointReader

Definition at line 73 of file lstmtrainer.h.

◆ CheckPointWriter

Definition at line 78 of file lstmtrainer.h.

◆ ColPartitionGridSearch

using tesseract::ColPartitionGridSearch = typedef GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>

Definition at line 934 of file colpartition.h.

◆ ColSegmentGrid

using tesseract::ColSegmentGrid = typedef BBGrid<ColSegment, ColSegment_CLIST, ColSegment_C_IT>

Definition at line 116 of file tablefind.h.

◆ ColSegmentGridSearch

using tesseract::ColSegmentGridSearch = typedef GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT>

Definition at line 119 of file tablefind.h.

◆ DawgVector

Definition at line 53 of file dict.h.

◆ DictFunc

using tesseract::DictFunc = typedef int (Dict::*)(void *, const UNICHARSET &, UNICHAR_ID, bool) const

Definition at line 76 of file baseapi.h.

◆ DotProductFunction

using tesseract::DotProductFunction = typedef double (*)(const double*, const double*, int)

Definition at line 25 of file simddetect.h.

◆ FileReader

using tesseract::FileReader = typedef bool (*)(const STRING&, GenericVector<char>*)

Definition at line 49 of file serialis.h.

◆ FileWriter

using tesseract::FileWriter = typedef bool (*)(const GenericVector<char>&, const STRING&)

Definition at line 52 of file serialis.h.

◆ FillLatticeFunc

using tesseract::FillLatticeFunc = typedef void (Wordrec::*)(const MATRIX &, const WERD_CHOICE_LIST &, const UNICHARSET &, BlamerBundle *)

Definition at line 79 of file baseapi.h.

◆ IntKDPair

using tesseract::IntKDPair = typedef KDPairInc<int, int>

Definition at line 179 of file kdpair.h.

◆ LanguageModelFlagsType

using tesseract::LanguageModelFlagsType = typedef unsigned char

Used for expressing various language model flags.

Definition at line 37 of file lm_state.h.

◆ LigHash

using tesseract::LigHash = typedef std::unordered_map<std::string, std::string, StringHash>

Definition at line 55 of file ligature_table.h.

◆ NodeChildVector

Definition at line 64 of file dawg.h.

◆ PainPointHeap

Definition at line 38 of file lm_pain_points.h.

◆ ParamsModelClassifyFunc

using tesseract::ParamsModelClassifyFunc = typedef float (Dict::*)(const char *, void *)

Definition at line 78 of file baseapi.h.

◆ ParamsTrainingHypothesisList

◆ PartSetVector

Definition at line 34 of file colpartitionset.h.

◆ ProbabilityInContextFunc

using tesseract::ProbabilityInContextFunc = typedef double (Dict::*)(const char *, const char *, int, const char *, int)

Definition at line 77 of file baseapi.h.

◆ RecodeHeap

Definition at line 176 of file recodebeam.h.

◆ RecodePair

using tesseract::RecodePair = typedef KDPairInc<double, RecodeNode>

Definition at line 175 of file recodebeam.h.

◆ RSCounts

using tesseract::RSCounts = typedef std::unordered_map<int, int>

Definition at line 48 of file unicharcompress.cpp.

◆ RSMap

using tesseract::RSMap = typedef std::unordered_map<int, std::unique_ptr<std::vector<int> >>

Definition at line 46 of file unicharcompress.cpp.

◆ SetOfModels

Definition at line 99 of file paragraphs_internal.h.

◆ ShapeQueue

Definition at line 155 of file shapetable.h.

◆ SuccessorList

Definition at line 65 of file dawg.h.

◆ SuccessorListsVector

Definition at line 66 of file dawg.h.

◆ TestCallback

typedef TessResultCallback4<STRING, int, const double*, const TessdataManager&, int>* tesseract::TestCallback

Definition at line 83 of file lstmtrainer.h.

◆ TruthCallback

Definition at line 81 of file baseapi.h.

◆ UnicharAmbigsVector

using tesseract::UnicharAmbigsVector = typedef GenericVector<AmbigSpec_LIST *>

Definition at line 134 of file ambigs.h.

◆ UnicharIdVector

Definition at line 35 of file ambigs.h.

◆ WidthCallback

using tesseract::WidthCallback = typedef TessResultCallback1<bool, int>

Definition at line 36 of file tabfind.h.

◆ WordGrid

using tesseract::WordGrid = typedef BBGrid<WordWithBox, WordWithBox_CLIST, WordWithBox_C_IT>

Definition at line 65 of file textord.h.

◆ WordRecognizer

using tesseract::WordRecognizer = typedef void (Tesseract::*)(const WordData&, WERD_RES**, PointerVector<WERD_RES>*)

Definition at line 171 of file tesseractclass.h.

◆ WordSearch

using tesseract::WordSearch = typedef GridSearch<WordWithBox, WordWithBox_CLIST, WordWithBox_C_IT>

Definition at line 66 of file textord.h.

Enumeration Type Documentation

◆ AmbigType

Enumerator
NOT_AMBIG 
REPLACE_AMBIG 
DEFINITE_AMBIG 
SIMILAR_AMBIG 
CASE_AMBIG 
AMBIG_TYPE_COUNT 

Definition at line 37 of file ambigs.h.

37  {
38  NOT_AMBIG, // the ngram pair is not ambiguous
39  REPLACE_AMBIG, // ocred ngram should always be substituted with correct
40  DEFINITE_AMBIG, // add correct ngram to the classifier results (1-1)
41  SIMILAR_AMBIG, // use pairwise classifier for ocred/correct pair (1-1)
42  CASE_AMBIG, // this is a case ambiguity (1-1)
43 
44  AMBIG_TYPE_COUNT // number of enum entries
45 };

◆ CachingStrategy

Enumerator
CS_SEQUENTIAL 
CS_ROUND_ROBIN 

Definition at line 42 of file imagedata.h.

42  {
43  // Reads all of one file before moving on to the next. Requires samples to be
44  // shuffled across files. Uses the count of samples in the first file as
45  // the count in all the files to achieve high-speed random access. As a
46  // consequence, if subsequent files are smaller, they get entries used more
47  // than once, and if subsequent files are larger, some entries are not used.
48  // Best for larger data sets that don't fit in memory.
50  // Reads one sample from each file in rotation. Does not require shuffled
51  // samples, but is extremely disk-intensive. Samples in smaller files also
52  // get used more often than samples in larger files.
53  // Best for smaller data sets that mostly fit in memory.
55 };

◆ CharSegmentationType

Enumerator
CST_FRAGMENT 
CST_WHOLE 
CST_IMPROPER 
CST_NGRAM 

Definition at line 96 of file classify.h.

96  {
97  CST_FRAGMENT, // A partial character.
98  CST_WHOLE, // A correctly segmented character.
99  CST_IMPROPER, // More than one but less than 2 characters.
100  CST_NGRAM // Multiple characters.
101 };

◆ CMD_EVENTS

Enumerator
ACTION_1_CMD_EVENT 
RECOG_WERDS 
RECOG_PSEUDO 
ACTION_2_CMD_EVENT 

Definition at line 487 of file tessedit.cpp.

◆ ColSegType

Enumerator
COL_UNKNOWN 
COL_TEXT 
COL_TABLE 
COL_MIXED 
COL_COUNT 

Definition at line 30 of file tablefind.h.

30  {
32  COL_TEXT,
33  COL_TABLE,
34  COL_MIXED,
35  COL_COUNT
36 };

◆ ColumnSpanningType

Enumerator
CST_NOISE 
CST_FLOWING 
CST_HEADING 
CST_PULLOUT 
CST_COUNT 

Definition at line 48 of file colpartition.h.

48  {
49  CST_NOISE, // Strictly between columns.
50  CST_FLOWING, // Strictly within a single column.
51  CST_HEADING, // Spans multiple columns.
52  CST_PULLOUT, // Touches multiple columns, but doesn't span them.
53  CST_COUNT // Number of entries.
54 };

◆ CountTypes

Enumerator
CT_UNICHAR_TOP_OK 
CT_UNICHAR_TOP1_ERR 
CT_UNICHAR_TOP2_ERR 
CT_UNICHAR_TOPN_ERR 
CT_UNICHAR_TOPTOP_ERR 
CT_OK_MULTI_UNICHAR 
CT_OK_JOINED 
CT_OK_BROKEN 
CT_REJECT 
CT_FONT_ATTR_ERR 
CT_OK_MULTI_FONT 
CT_NUM_RESULTS 
CT_RANK 
CT_REJECTED_JUNK 
CT_ACCEPTED_JUNK 
CT_SIZE 

Definition at line 69 of file errorcounter.h.

69  {
70  CT_UNICHAR_TOP_OK, // Top shape contains correct unichar id.
71  // The rank of the results in TOP1, TOP2, TOPN is determined by a gap of
72  // kRatingEpsilon from the first result in each group. The real top choice
73  // is measured using TOPTOP.
74  CT_UNICHAR_TOP1_ERR, // Top shape does not contain correct unichar id.
75  CT_UNICHAR_TOP2_ERR, // Top 2 shapes don't contain correct unichar id.
76  CT_UNICHAR_TOPN_ERR, // No output shape contains correct unichar id.
77  CT_UNICHAR_TOPTOP_ERR, // Very top choice not correct.
78  CT_OK_MULTI_UNICHAR, // Top shape id has correct unichar id, and others.
79  CT_OK_JOINED, // Top shape id is correct but marked joined.
80  CT_OK_BROKEN, // Top shape id is correct but marked broken.
81  CT_REJECT, // Classifier hates this.
82  CT_FONT_ATTR_ERR, // Top unichar OK, but font attributes incorrect.
83  CT_OK_MULTI_FONT, // CT_FONT_ATTR_OK but there are multiple font attrs.
84  CT_NUM_RESULTS, // Number of answers produced.
85  CT_RANK, // Rank of correct answer.
86  CT_REJECTED_JUNK, // Junk that was correctly rejected.
87  CT_ACCEPTED_JUNK, // Junk that was incorrectly classified otherwise.
88 
89  CT_SIZE // Number of types for array sizing.
90 };

◆ DawgType

Enumerator
DAWG_TYPE_PUNCTUATION 
DAWG_TYPE_WORD 
DAWG_TYPE_NUMBER 
DAWG_TYPE_PATTERN 
DAWG_TYPE_COUNT 

Definition at line 68 of file dawg.h.

68  {
73 
74  DAWG_TYPE_COUNT // number of enum entries
75 };

◆ ErrorTypes

Enumerator
ET_RMS 
ET_DELTA 
ET_WORD_RECERR 
ET_CHAR_ERROR 
ET_SKIP_RATIO 
ET_COUNT 

Definition at line 37 of file lstmtrainer.h.

37  {
38  ET_RMS, // RMS activation error.
39  ET_DELTA, // Number of big errors in deltas.
40  ET_WORD_RECERR, // Output text string word recall error.
41  ET_CHAR_ERROR, // Output text string total char error.
42  ET_SKIP_RATIO, // Fraction of samples skipped.
43  ET_COUNT // For array sizing.
44 };

◆ FactorNames

Enumerator
FN_INCOLOR 
FN_Y0 
FN_Y1 
FN_Y2 
FN_Y3 
FN_X0 
FN_X1 
FN_SHEAR 
FN_NUM_FACTORS 

Definition at line 59 of file degradeimage.cpp.

92  {

◆ FlexDimensions

Enumerator
FD_BATCH 
FD_HEIGHT 
FD_WIDTH 
FD_DIMSIZE 

Definition at line 32 of file stridemap.h.

32  {
33  FD_BATCH, // Index of multiple images.
34  FD_HEIGHT, // y-coordinate in image.
35  FD_WIDTH, // x-coordinate in image.
36  FD_DIMSIZE, // Number of flexible non-depth dimensions.
37 };

◆ GraphemeNorm

Enumerator
kNone 
kNormalize 

Definition at line 67 of file normstrngs.h.

◆ GraphemeNormMode

Enumerator
kSingleString 
kCombined 
kGlyphSplit 
kIndividualUnicodes 

Definition at line 50 of file validator.h.

52  : char32 {
53  kNonVirama = 0,
54  kDevanagari = 0x900,
55  kBengali = 0x980,
56  kGurmukhi = 0xa00,
57  kGujarati = 0xa80,
58  kOriya = 0xb00,
59  kTamil = 0xb80,
60  kTelugu = 0xc00,
61  kKannada = 0xc80,
62  kMalayalam = 0xd00,
63  kSinhala = 0xd80,

◆ kParamsTrainingFeatureType

Enumerator
PTRAIN_DIGITS_SHORT 
PTRAIN_DIGITS_MED 
PTRAIN_DIGITS_LONG 
PTRAIN_NUM_SHORT 
PTRAIN_NUM_MED 
PTRAIN_NUM_LONG 
PTRAIN_DOC_SHORT 
PTRAIN_DOC_MED 
PTRAIN_DOC_LONG 
PTRAIN_DICT_SHORT 
PTRAIN_DICT_MED 
PTRAIN_DICT_LONG 
PTRAIN_FREQ_SHORT 
PTRAIN_FREQ_MED 
PTRAIN_FREQ_LONG 
PTRAIN_SHAPE_COST_PER_CHAR 
PTRAIN_NGRAM_COST_PER_CHAR 
PTRAIN_NUM_BAD_PUNC 
PTRAIN_NUM_BAD_CASE 
PTRAIN_XHEIGHT_CONSISTENCY 
PTRAIN_NUM_BAD_CHAR_TYPE 
PTRAIN_NUM_BAD_SPACING 
PTRAIN_NUM_BAD_FONT 
PTRAIN_RATING_PER_CHAR 
PTRAIN_NUM_FEATURE_TYPES 

Definition at line 39 of file params_training_featdef.h.

39  {
40  // Digits
42  PTRAIN_DIGITS_MED, // 1
43  PTRAIN_DIGITS_LONG, // 2
44  // Number or pattern (NUMBER_PERM, USER_PATTERN_PERM)
45  PTRAIN_NUM_SHORT, // 3
46  PTRAIN_NUM_MED, // 4
47  PTRAIN_NUM_LONG, // 5
48  // Document word (DOC_DAWG_PERM)
49  PTRAIN_DOC_SHORT, // 6
50  PTRAIN_DOC_MED, // 7
51  PTRAIN_DOC_LONG, // 8
52  // Word (SYSTEM_DAWG_PERM, USER_DAWG_PERM, COMPOUND_PERM)
53  PTRAIN_DICT_SHORT, // 9
54  PTRAIN_DICT_MED, // 10
55  PTRAIN_DICT_LONG, // 11
56  // Frequent word (FREQ_DAWG_PERM)
57  PTRAIN_FREQ_SHORT, // 12
58  PTRAIN_FREQ_MED, // 13
59  PTRAIN_FREQ_LONG, // 14
62  PTRAIN_NUM_BAD_PUNC, // 17
63  PTRAIN_NUM_BAD_CASE, // 18
67  PTRAIN_NUM_BAD_FONT, // 22
69 
71 };

◆ LeftOrRight

Enumerator
LR_LEFT 
LR_RIGHT 

Definition at line 39 of file strokewidth.h.

39  {
40  LR_LEFT,
41  LR_RIGHT
42 };

◆ LineType

Enumerator
LT_START 
LT_BODY 
LT_UNKNOWN 
LT_MULTIPLE 

Definition at line 50 of file paragraphs_internal.h.

50  {
51  LT_START = 'S', // First line of a paragraph.
52  LT_BODY = 'C', // Continuation line of a paragraph.
53  LT_UNKNOWN = 'U', // No clues.
54  LT_MULTIPLE = 'M', // Matches for both LT_START and LT_BODY.
55 };

◆ LMPainPointsType

Enumerator
LM_PPTYPE_BLAMER 
LM_PPTYPE_AMBIG 
LM_PPTYPE_PATH 
LM_PPTYPE_SHAPE 
LM_PPTYPE_NUM 

Definition at line 41 of file lm_pain_points.h.

41  {
46 
48 };

◆ LossType

Enumerator
LT_NONE 
LT_CTC 
LT_SOFTMAX 
LT_LOGISTIC 

Definition at line 29 of file static_shape.h.

29  {
30  LT_NONE, // Undefined.
31  LT_CTC, // Softmax with standard CTC for training/decoding.
32  LT_SOFTMAX, // Outputs sum to 1 in fixed positions.
33  LT_LOGISTIC, // Logistic outputs with independent values.
34 };

◆ NeighbourPartitionType

Enumerator
NPT_HTEXT 
NPT_VTEXT 
NPT_WEAK_HTEXT 
NPT_WEAK_VTEXT 
NPT_IMAGE 
NPT_COUNT 

Definition at line 1505 of file colpartitiongrid.cpp.

1505  {
1506  NPT_HTEXT, // Definite horizontal text.
1507  NPT_VTEXT, // Definite vertical text.
1508  NPT_WEAK_HTEXT, // Weakly horizontal text. Counts as HTEXT for HTEXT, but
1509  // image for image and VTEXT.
1510  NPT_WEAK_VTEXT, // Weakly vertical text. Counts as VTEXT for VTEXT, but
1511  // image for image and HTEXT.
1512  NPT_IMAGE, // Defininte non-text.
1513  NPT_COUNT // Number of array elements.
1514 };

◆ NetworkFlags

Enumerator
NF_LAYER_SPECIFIC_LR 
NF_ADAM 

Definition at line 85 of file network.h.

85  {
86  // Network forward/backprop behavior.
87  NF_LAYER_SPECIFIC_LR = 64, // Separate learning rate for each layer.
88  NF_ADAM = 128, // Weight-specific learning rate.
89 };

◆ NetworkType

Enumerator
NT_NONE 
NT_INPUT 
NT_CONVOLVE 
NT_MAXPOOL 
NT_PARALLEL 
NT_REPLICATED 
NT_PAR_RL_LSTM 
NT_PAR_UD_LSTM 
NT_PAR_2D_LSTM 
NT_SERIES 
NT_RECONFIG 
NT_XREVERSED 
NT_YREVERSED 
NT_XYTRANSPOSE 
NT_LSTM 
NT_LSTM_SUMMARY 
NT_LOGISTIC 
NT_POSCLIP 
NT_SYMCLIP 
NT_TANH 
NT_RELU 
NT_LINEAR 
NT_SOFTMAX 
NT_SOFTMAX_NO_CTC 
NT_LSTM_SOFTMAX 
NT_LSTM_SOFTMAX_ENCODED 
NT_TENSORFLOW 
NT_COUNT 

Definition at line 43 of file network.h.

43  {
44  NT_NONE, // The naked base class.
45  NT_INPUT, // Inputs from an image.
46  // Plumbing networks combine other networks or rearrange the inputs.
47  NT_CONVOLVE, // Duplicates inputs in a sliding window neighborhood.
48  NT_MAXPOOL, // Chooses the max result from a rectangle.
49  NT_PARALLEL, // Runs networks in parallel.
50  NT_REPLICATED, // Runs identical networks in parallel.
51  NT_PAR_RL_LSTM, // Runs LTR and RTL LSTMs in parallel.
52  NT_PAR_UD_LSTM, // Runs Up and Down LSTMs in parallel.
53  NT_PAR_2D_LSTM, // Runs 4 LSTMs in parallel.
54  NT_SERIES, // Executes a sequence of layers.
55  NT_RECONFIG, // Scales the time/y size but makes the output deeper.
56  NT_XREVERSED, // Reverses the x direction of the inputs/outputs.
57  NT_YREVERSED, // Reverses the y-direction of the inputs/outputs.
58  NT_XYTRANSPOSE, // Transposes x and y (for just a single op).
59  // Functional networks actually calculate stuff.
60  NT_LSTM, // Long-Short-Term-Memory block.
61  NT_LSTM_SUMMARY, // LSTM that only keeps its last output.
62  NT_LOGISTIC, // Fully connected logistic nonlinearity.
63  NT_POSCLIP, // Fully connected rect lin version of logistic.
64  NT_SYMCLIP, // Fully connected rect lin version of tanh.
65  NT_TANH, // Fully connected with tanh nonlinearity.
66  NT_RELU, // Fully connected with rectifier nonlinearity.
67  NT_LINEAR, // Fully connected with no nonlinearity.
68  NT_SOFTMAX, // Softmax uses exponential normalization, with CTC.
69  NT_SOFTMAX_NO_CTC, // Softmax uses exponential normalization, no CTC.
70  // The SOFTMAX LSTMs both have an extra softmax layer on top, but inside, with
71  // the outputs fed back to the input of the LSTM at the next timestep.
72  // The ENCODED version binary encodes the softmax outputs, providing log2 of
73  // the number of outputs as additional inputs, and the other version just
74  // provides all the softmax outputs as additional inputs.
75  NT_LSTM_SOFTMAX, // 1-d LSTM with built-in fully connected softmax.
76  NT_LSTM_SOFTMAX_ENCODED, // 1-d LSTM with built-in binary encoded softmax.
77  // A TensorFlow graph encapsulated as a Tesseract network.
79 
80  NT_COUNT // Array size.
81 };

◆ NodeContinuation

Enumerator
NC_ANYTHING 
NC_ONLY_DUP 
NC_NO_DUP 
NC_COUNT 

Definition at line 72 of file recodebeam.h.

72  {
73  NC_ANYTHING, // This node used just its own score, so anything can follow.
74  NC_ONLY_DUP, // The current node combined another score with the score for
75  // itself, without a stand-alone duplicate before, so must be
76  // followed by a stand-alone duplicate.
77  NC_NO_DUP, // The current node combined another score with the score for
78  // itself, after a stand-alone, so can only be followed by
79  // something other than a duplicate of the current node.
80  NC_COUNT
81 };

◆ NormalizationMode

Enumerator
NM_BASELINE 
NM_CHAR_ISOTROPIC 
NM_CHAR_ANISOTROPIC 

Definition at line 42 of file normalis.h.

42  {
43  NM_BASELINE = -3, // The original BL normalization mode.
44  NM_CHAR_ISOTROPIC = -2, // Character normalization but isotropic.
45  NM_CHAR_ANISOTROPIC = -1 // The original CN normalization mode.
46 };

◆ OcrEngineMode

When Tesseract/Cube is initialized we can choose to instantiate/load/run only the Tesseract part, only the Cube part or both along with the combiner. The preference of which engine to use is stored in tessedit_ocr_engine_mode.

ATTENTION: When modifying this enum, please make sure to make the appropriate changes to all the enums mirroring it (e.g. OCREngine in cityblock/workflow/detection/detection_storage.proto). Such enums will mention the connection to OcrEngineMode in the comments.

Enumerator
OEM_TESSERACT_ONLY 
OEM_LSTM_ONLY 
OEM_TESSERACT_LSTM_COMBINED 
OEM_DEFAULT 
OEM_COUNT 

Definition at line 268 of file publictypes.h.

268  {
269  OEM_TESSERACT_ONLY, // Run Tesseract only - fastest; deprecated
270  OEM_LSTM_ONLY, // Run just the LSTM line recognizer.
271  OEM_TESSERACT_LSTM_COMBINED, // Run the LSTM recognizer, but allow fallback
272  // to Tesseract when things get difficult.
273  // deprecated
274  OEM_DEFAULT, // Specify this mode when calling init_*(),
275  // to indicate that any of the above modes
276  // should be automatically inferred from the
277  // variables in the language-specific config,
278  // command-line configs, or if not specified
279  // in any of the above should be set to the
280  // default OEM_TESSERACT_ONLY.
281  OEM_COUNT // Number of OEMs
282 };

◆ OCRNorm

enum tesseract::OCRNorm
strong
Enumerator
kNone 
kNormalize 

Definition at line 59 of file normstrngs.h.

◆ Orientation

+---------------—+ Orientation Example: | 1 Aaaa Aaaa Aaaa | ==================== | Aaa aa aaa aa | To left is a diagram of some (1) English and | aaaaaa A aa aaa. | (2) Chinese text and a (3) photo credit. | 2 | | ####### c c C | Upright Latin characters are represented as A and a. | ####### c c c | '<' represents a latin character rotated | < ####### c c c | anti-clockwise 90 degrees. | < ####### c c | | < ####### . c | Upright Chinese characters are represented C and c. | 3 ####### c | +---------------—+ NOTA BENE: enum values here should match goodoc.proto

If you orient your head so that "up" aligns with Orientation, then the characters will appear "right side up" and readable.

In the example above, both the English and Chinese paragraphs are oriented so their "up" is the top of the page (page up). The photo credit is read with one's head turned leftward ("up" is to page left).

The values of this enum match the convention of Tesseract's osdetect.h

Enumerator
ORIENTATION_PAGE_UP 
ORIENTATION_PAGE_RIGHT 
ORIENTATION_PAGE_DOWN 
ORIENTATION_PAGE_LEFT 

Definition at line 120 of file publictypes.h.

120  {
125 };

◆ PageIteratorLevel

enum of the elements of the page hierarchy, used in ResultIterator to provide functions that operate on each level without having to have 5x as many functions.

Enumerator
RIL_BLOCK 
RIL_PARA 
RIL_TEXTLINE 
RIL_WORD 
RIL_SYMBOL 

Definition at line 219 of file publictypes.h.

219  {
220  RIL_BLOCK, // Block of text/image/separator line.
221  RIL_PARA, // Paragraph within a block.
222  RIL_TEXTLINE, // Line within a paragraph.
223  RIL_WORD, // Word within a textline.
224  RIL_SYMBOL // Symbol/character within a word.
225 };

◆ PageSegMode

Possible modes for page layout analysis. These must be kept in order of decreasing amount of layout analysis to be done, except for OSD_ONLY, so that the inequality test macros below work.

Enumerator
PSM_OSD_ONLY 

Orientation and script detection only.

PSM_AUTO_OSD 

script detection. (OSD)

Automatic page segmentation with orientation and

PSM_AUTO_ONLY 

Automatic page segmentation, but no OSD, or OCR.

PSM_AUTO 

Fully automatic page segmentation, but no OSD.

PSM_SINGLE_COLUMN 

Assume a single column of text of variable sizes.

PSM_SINGLE_BLOCK_VERT_TEXT 

aligned text.

Assume a single uniform block of vertically

PSM_SINGLE_BLOCK 

Assume a single uniform block of text. (Default.)

PSM_SINGLE_LINE 

Treat the image as a single text line.

PSM_SINGLE_WORD 

Treat the image as a single word.

PSM_CIRCLE_WORD 

Treat the image as a single word in a circle.

PSM_SINGLE_CHAR 

Treat the image as a single character.

PSM_SPARSE_TEXT 

Find as much text as possible in no particular order.

PSM_SPARSE_TEXT_OSD 

Sparse text with orientation and script det.

PSM_RAW_LINE 

hacks that are Tesseract-specific.

Treat the image as a single text line, bypassing

PSM_COUNT 

Number of enum entries.

Definition at line 163 of file publictypes.h.

163  {
164  PSM_OSD_ONLY = 0,
165  PSM_AUTO_OSD = 1,
166  PSM_AUTO_ONLY = 2,
168  PSM_AUTO = 3,
169  PSM_SINGLE_COLUMN = 4,
171  PSM_SINGLE_BLOCK = 6,
173  PSM_SINGLE_LINE = 7,
174  PSM_SINGLE_WORD = 8,
175  PSM_CIRCLE_WORD = 9,
176  PSM_SINGLE_CHAR = 10,
177  PSM_SPARSE_TEXT = 11,
178  PSM_SPARSE_TEXT_OSD = 12,
179  PSM_RAW_LINE = 13,
180 
182  PSM_COUNT
183 };

◆ ParagraphJustification

JUSTIFICATION_UNKNOWN The alignment is not clearly one of the other options. This could happen for example if there are only one or two lines of text or the text looks like source code or poetry.

NOTA BENE: Fully justified paragraphs (text aligned to both left and right margins) are marked by Tesseract with JUSTIFICATION_LEFT if their text is written with a left-to-right script and with JUSTIFICATION_RIGHT if their text is written in a right-to-left script.

Interpretation for text read in vertical lines: "Left" is wherever the starting reading position is.

JUSTIFICATION_LEFT Each line, except possibly the first, is flush to the same left tab stop.

JUSTIFICATION_CENTER The text lines of the paragraph are centered about a line going down through their middle of the text lines.

JUSTIFICATION_RIGHT Each line, except possibly the first, is flush to the same right tab stop.

Enumerator
JUSTIFICATION_UNKNOWN 
JUSTIFICATION_LEFT 
JUSTIFICATION_CENTER 
JUSTIFICATION_RIGHT 

Definition at line 251 of file publictypes.h.

◆ PartitionFindResult

Enumerator
PFR_OK 
PFR_SKEW 
PFR_NOISE 

Definition at line 46 of file strokewidth.h.

46  {
47  PFR_OK, // Everything is OK.
48  PFR_SKEW, // Skew was detected and rotated.
49  PFR_NOISE // Noise was detected and removed.
50 };

◆ ScriptPos

Enumerator
SP_NORMAL 
SP_SUBSCRIPT 
SP_SUPERSCRIPT 
SP_DROPCAP 

Definition at line 252 of file ratngs.h.

252  {
253  SP_NORMAL,
254  SP_SUBSCRIPT,
256  SP_DROPCAP
257 };

◆ SerializeAmount

Enumerator
LIGHT 
NO_BEST_TRAINER 
FULL 

Definition at line 56 of file lstmtrainer.h.

56  {
57  LIGHT, // Minimal data for remote training.
58  NO_BEST_TRAINER, // Save an empty vector in place of best_trainer_.
59  FULL, // All data including best_trainer_.
60 };

◆ SetParamConstraint

Enumerator
SET_PARAM_CONSTRAINT_NONE 
SET_PARAM_CONSTRAINT_DEBUG_ONLY 
SET_PARAM_CONSTRAINT_NON_DEBUG_ONLY 
SET_PARAM_CONSTRAINT_NON_INIT_ONLY 

Definition at line 51 of file params.h.

51  :
52  // Reads a file of parameter definitions and set/modify the values therein.
53  // If the filename begins with a + or -, the BoolVariables will be
54  // ORed or ANDed with any current values.
55  // Blank lines and lines beginning # are ignored.
56  // Values may have any whitespace after the name and are the rest of line.

◆ SubTrainerResult

Enumerator
STR_NONE 
STR_UPDATED 
STR_REPLACED 

Definition at line 63 of file lstmtrainer.h.

63  {
64  STR_NONE, // Did nothing as not good enough.
65  STR_UPDATED, // Subtrainer was updated, but didn't replace *this.
66  STR_REPLACED // Subtrainer replaced *this.
67 };

◆ TabAlignment

Enumerator
TA_LEFT_ALIGNED 
TA_LEFT_RAGGED 
TA_CENTER_JUSTIFIED 
TA_RIGHT_ALIGNED 
TA_RIGHT_RAGGED 
TA_SEPARATOR 
TA_COUNT 

Definition at line 44 of file tabvector.h.

◆ TessdataType

Enumerator
TESSDATA_LANG_CONFIG 
TESSDATA_UNICHARSET 
TESSDATA_AMBIGS 
TESSDATA_INTTEMP 
TESSDATA_PFFMTABLE 
TESSDATA_NORMPROTO 
TESSDATA_PUNC_DAWG 
TESSDATA_SYSTEM_DAWG 
TESSDATA_NUMBER_DAWG 
TESSDATA_FREQ_DAWG 
TESSDATA_FIXED_LENGTH_DAWGS 
TESSDATA_CUBE_UNICHARSET 
TESSDATA_CUBE_SYSTEM_DAWG 
TESSDATA_SHAPE_TABLE 
TESSDATA_BIGRAM_DAWG 
TESSDATA_UNAMBIG_DAWG 
TESSDATA_PARAMS_MODEL 
TESSDATA_LSTM 
TESSDATA_LSTM_PUNC_DAWG 
TESSDATA_LSTM_SYSTEM_DAWG 
TESSDATA_LSTM_NUMBER_DAWG 
TESSDATA_LSTM_UNICHARSET 
TESSDATA_LSTM_RECODER 
TESSDATA_VERSION 
TESSDATA_NUM_ENTRIES 

Definition at line 56 of file tessdatamanager.h.

56  {
59  TESSDATA_AMBIGS, // 2
60  TESSDATA_INTTEMP, // 3
61  TESSDATA_PFFMTABLE, // 4
62  TESSDATA_NORMPROTO, // 5
63  TESSDATA_PUNC_DAWG, // 6
66  TESSDATA_FREQ_DAWG, // 9
67  TESSDATA_FIXED_LENGTH_DAWGS, // 10 // deprecated
68  TESSDATA_CUBE_UNICHARSET, // 11 // deprecated
69  TESSDATA_CUBE_SYSTEM_DAWG, // 12 // deprecated
74  TESSDATA_LSTM, // 17
80  TESSDATA_VERSION, // 23
81 
83 };

◆ TextlineOrder

The text lines are read in the given sequence.

In English, the order is top-to-bottom. In Chinese, vertical text lines are read right-to-left. Mongolian is written in vertical columns top to bottom like Chinese, but the lines order left-to right.

Note that only some combinations make sense. For example, WRITING_DIRECTION_LEFT_TO_RIGHT implies TEXTLINE_ORDER_TOP_TO_BOTTOM

Enumerator
TEXTLINE_ORDER_LEFT_TO_RIGHT 
TEXTLINE_ORDER_RIGHT_TO_LEFT 
TEXTLINE_ORDER_TOP_TO_BOTTOM 

Definition at line 152 of file publictypes.h.

◆ TopNState

Enumerator
TN_TOP2 
TN_TOPN 
TN_ALSO_RAN 
TN_COUNT 

Definition at line 84 of file recodebeam.h.

84  {
85  TN_TOP2, // Winner or 2nd.
86  TN_TOPN, // Runner up in top-n, but not 1st or 2nd.
87  TN_ALSO_RAN, // Not in the top-n.
88  TN_COUNT
89 };

◆ Trainability

Enumerator
TRAINABLE 
PERFECT 
UNENCODABLE 
HI_PRECISION_ERR 
NOT_BOXED 

Definition at line 47 of file lstmtrainer.h.

47  {
48  TRAINABLE, // Non-zero delta error.
49  PERFECT, // Zero delta error.
50  UNENCODABLE, // Not trainable due to coding/alignment trouble.
51  HI_PRECISION_ERR, // Hi confidence disagreement.
52  NOT_BOXED, // Early in training and has no character boxes.
53 };

◆ TrainingFlags

Enumerator
TF_INT_MODE 
TF_COMPRESS_UNICHARSET 

Definition at line 47 of file lstmrecognizer.h.

47  {
48  TF_INT_MODE = 1,
50 };

◆ TrainingState

Enumerator
TS_DISABLED 
TS_ENABLED 
TS_TEMP_DISABLE 
TS_RE_ENABLE 

Definition at line 92 of file network.h.

92  {
93  // Valid states of training_.
94  TS_DISABLED, // Disabled permanently.
95  TS_ENABLED, // Enabled for backprop and to write a training dump.
96  // Re-enable from ANY disabled state.
97  TS_TEMP_DISABLE, // Temporarily disabled to write a recognition dump.
98  // Valid only for SetEnableTraining.
99  TS_RE_ENABLE, // Re-Enable from TS_TEMP_DISABLE, but not TS_DISABLED.
100 };

◆ UnicodeNormMode

Enumerator
kNFD 
kNFC 
kNFKD 
kNFKC 

Definition at line 50 of file normstrngs.h.

◆ ViramaScript

Enumerator
kNonVirama 
kDevanagari 
kBengali 
kGurmukhi 
kGujarati 
kOriya 
kTamil 
kTelugu 
kKannada 
kMalayalam 
kSinhala 
kMyanmar 
kKhmer 
kJavanese 

Definition at line 69 of file validator.h.

71  {
72  public:
73  // Validates and cleans the src vector of unicodes to the *dest, according to
74  // g_mode. In the case of kSingleString, a single vector containing the whole
75  // result is added to *dest. With kCombined, multiple vectors are added to
76  // *dest with one grapheme in each. With kGlyphSplit, multiple vectors are
77  // added to *dest with a smaller unit representing a glyph in each.
78  // In case of validation error, returns false and as much as possible of the
79  // input, without discarding invalid text.
80  static bool ValidateCleanAndSegment(GraphemeNormMode g_mode,
81  bool report_errors,
82  const std::vector<char32>& src,
83  std::vector<std::vector<char32>>* dest);
84 

◆ WritingDirection

The grapheme clusters within a line of text are laid out logically in this direction, judged when looking at the text line rotated so that its Orientation is "page up".

For English text, the writing direction is left-to-right. For the Chinese text in the above example, the writing direction is top-to-bottom.

Enumerator
WRITING_DIRECTION_LEFT_TO_RIGHT 
WRITING_DIRECTION_RIGHT_TO_LEFT 
WRITING_DIRECTION_TOP_TO_BOTTOM 

Definition at line 135 of file publictypes.h.

◆ XHeightConsistencyEnum

Enumerator
XH_GOOD 
XH_SUBNORMAL 
XH_INCONSISTENT 

Definition at line 78 of file dict.h.

Function Documentation

◆ AccumulateVector()

void tesseract::AccumulateVector ( int  n,
const double *  src,
double *  dest 
)
inline

Definition at line 174 of file functions.h.

174  {
175  for (int i = 0; i < n; ++i) dest[i] += src[i];
176 }

◆ AsciiLikelyListItem()

bool tesseract::AsciiLikelyListItem ( const STRING word)

Definition at line 297 of file paragraphs.cpp.

297  :
298  UnicodeSpanSkipper(const UNICHARSET *unicharset, const WERD_CHOICE *word)
299  : u_(unicharset), word_(word) { wordlen_ = word->length(); }

◆ assign_blobs_to_blocks2()

void tesseract::assign_blobs_to_blocks2 ( Pix *  pix,
BLOCK_LIST *  blocks,
TO_BLOCK_LIST *  port_blocks 
)

Definition at line 168 of file tordmain.cpp.

170  { // output list
171  BLOCK *block; // current block
172  BLOBNBOX *newblob; // created blob
173  C_BLOB *blob; // current blob
174  BLOCK_IT block_it = blocks;
175  C_BLOB_IT blob_it; // iterator
176  BLOBNBOX_IT port_box_it; // iterator
177  // destination iterator
178  TO_BLOCK_IT port_block_it = port_blocks;
179  TO_BLOCK *port_block; // created block
180 
181  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
182  block = block_it.data();
183  port_block = new TO_BLOCK(block);
184 
185  // Convert the good outlines to block->blob_list
186  port_box_it.set_to_list(&port_block->blobs);
187  blob_it.set_to_list(block->blob_list());
188  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
189  blob = blob_it.extract();
190  newblob = new BLOBNBOX(blob); // Convert blob to BLOBNBOX.
191  SetBlobStrokeWidth(pix, newblob);
192  port_box_it.add_after_then_move(newblob);
193  }
194 
195  // Put the rejected outlines in block->noise_blobs, which allows them to
196  // be reconsidered and sorted back into rows and recover outlines mistakenly
197  // rejected.
198  port_box_it.set_to_list(&port_block->noise_blobs);
199  blob_it.set_to_list(block->reject_blobs());
200  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
201  blob = blob_it.extract();
202  newblob = new BLOBNBOX(blob); // Convert blob to BLOBNBOX.
203  SetBlobStrokeWidth(pix, newblob);
204  port_box_it.add_after_then_move(newblob);
205  }
206 
207  port_block_it.add_after_then_move(port_block);
208  }
209 }

◆ BlobToTrainingSample()

TrainingSample * tesseract::BlobToTrainingSample ( const TBLOB blob,
bool  nonlinear_norm,
INT_FX_RESULT_STRUCT fx_info,
GenericVector< INT_FEATURE_STRUCT > *  bl_features 
)

Definition at line 77 of file intfx.cpp.

79  {
81  Classify::ExtractFeatures(blob, nonlinear_norm, bl_features,
82  &cn_features, fx_info, nullptr);
83  // TODO(rays) Use blob->PreciseBoundingBox() instead.
84  TBOX box = blob.bounding_box();
85  TrainingSample* sample = nullptr;
86  int num_features = fx_info->NumCN;
87  if (num_features > 0) {
88  sample = TrainingSample::CopyFromFeatures(*fx_info, box, &cn_features[0],
89  num_features);
90  }
91  if (sample != nullptr) {
92  // Set the bounding box (in original image coordinates) in the sample.
93  TPOINT topleft, botright;
94  topleft.x = box.left();
95  topleft.y = box.top();
96  botright.x = box.right();
97  botright.y = box.bottom();
98  TPOINT original_topleft, original_botright;
99  blob.denorm().DenormTransform(nullptr, topleft, &original_topleft);
100  blob.denorm().DenormTransform(nullptr, botright, &original_botright);
101  sample->set_bounding_box(TBOX(original_topleft.x, original_botright.y,
102  original_botright.x, original_topleft.y));
103  }
104  return sample;
105 }

◆ CanonicalizeDetectionResults()

void tesseract::CanonicalizeDetectionResults ( GenericVector< PARA * > *  row_owners,
PARA_LIST *  paragraphs 
)

Definition at line 2253 of file paragraphs.cpp.

2273  {

◆ ClearFeatureSpaceWindow()

void tesseract::ClearFeatureSpaceWindow ( NORM_METHOD  norm_method,
ScrollView window 
)

Clears the given window and draws the featurespace guides for the appropriate normalization method.

Definition at line 987 of file intproto.cpp.

987  {
988  window->Clear();
989 
990  window->Pen(ScrollView::GREY);
991  // Draw the feature space limit rectangle.
992  window->Rectangle(0, 0, INT_MAX_X, INT_MAX_Y);
993  if (norm_method == baseline) {
994  window->SetCursor(0, INT_DESCENDER);
995  window->DrawTo(INT_MAX_X, INT_DESCENDER);
996  window->SetCursor(0, INT_BASELINE);
997  window->DrawTo(INT_MAX_X, INT_BASELINE);
998  window->SetCursor(0, INT_XHEIGHT);
999  window->DrawTo(INT_MAX_X, INT_XHEIGHT);
1000  window->SetCursor(0, INT_CAPHEIGHT);
1001  window->DrawTo(INT_MAX_X, INT_CAPHEIGHT);
1002  } else {
1005  }
1006 }

◆ ClipVector()

template<typename T >
void tesseract::ClipVector ( int  n,
lower,
upper,
T *  vec 
)
inline

Definition at line 208 of file functions.h.

208  {
209  for (int i = 0; i < n; ++i) vec[i] = ClipToRange(vec[i], lower, upper);
210 }

◆ cmp_eq()

template<typename T >
bool tesseract::cmp_eq ( T const &  t1,
T const &  t2 
)

Definition at line 414 of file genericvector.h.

414  {
415  return t1 == t2;
416 }

◆ CodeInBinary()

void tesseract::CodeInBinary ( int  n,
int  nf,
double *  vec 
)
inline

Definition at line 214 of file functions.h.

214  {
215  if (nf <= 0 || n < nf) return;
216  int index = 0;
217  double best_score = vec[0];
218  for (int i = 1; i < n; ++i) {
219  if (vec[i] > best_score) {
220  best_score = vec[i];
221  index = i;
222  }
223  }
224  int mask = 1;
225  for (int i = 0; i < nf; ++i, mask *= 2) {
226  vec[i] = (index & mask) ? 1.0 : 0.0;
227  }
228 }

◆ CombineLangModel()

int tesseract::CombineLangModel ( const UNICHARSET unicharset,
const std::string &  script_dir,
const std::string &  version_str,
const std::string &  output_dir,
const std::string &  lang,
bool  pass_through_recoder,
const GenericVector< STRING > &  words,
const GenericVector< STRING > &  puncs,
const GenericVector< STRING > &  numbers,
bool  lang_is_rtl,
FileReader  reader,
FileWriter  writer 
)

Definition at line 185 of file lang_model_helpers.cpp.

191  {
192  // Build the traineddata file.
193  TessdataManager traineddata;
194  if (!version_str.empty()) {
195  traineddata.SetVersionString(traineddata.VersionString() + ":" +
196  version_str);
197  }
198  // Unicharset and recoder.
199  if (!WriteUnicharset(unicharset, output_dir, lang, writer, &traineddata)) {
200  tprintf("Error writing unicharset!!\n");
201  return EXIT_FAILURE;
202  } else {
203  tprintf("Config file is optional, continuing...\n");
204  }
205  // If there is a config file, read it and add to traineddata.
206  std::string config_filename = script_dir + "/" + lang + "/" + lang + ".config";
207  STRING config_file = ReadFile(config_filename, reader);
208  if (config_file.length() > 0) {
209  traineddata.OverwriteEntry(TESSDATA_LANG_CONFIG, &config_file[0],
210  config_file.length());
211  }
212  std::string radical_filename = script_dir + "/radical-stroke.txt";
213  STRING radical_data = ReadFile(radical_filename, reader);
214  if (radical_data.length() == 0) {
215  tprintf("Error reading radical code table %s\n", radical_filename.c_str());
216  return EXIT_FAILURE;
217  }
218  if (!WriteRecoder(unicharset, pass_through_recoder, output_dir, lang, writer,
219  &radical_data, &traineddata)) {
220  tprintf("Error writing recoder!!\n");
221  }
222  if (!words.empty() || !puncs.empty() || !numbers.empty()) {
223  if (!WriteDawgs(words, puncs, numbers, lang_is_rtl, unicharset,
224  &traineddata)) {
225  tprintf("Error during conversion of wordlists to DAWGs!!\n");
226  return EXIT_FAILURE;
227  }
228  }
229 
230  // Traineddata file.
231  GenericVector<char> traineddata_data;
232  traineddata.Serialize(&traineddata_data);
233  if (!WriteFile(output_dir, lang, ".traineddata", traineddata_data, writer)) {
234  tprintf("Error writing output traineddata file!!\n");
235  return EXIT_FAILURE;
236  }
237  return EXIT_SUCCESS;
238 }

◆ CompareFontInfo()

bool tesseract::CompareFontInfo ( const FontInfo fi1,
const FontInfo fi2 
)

Definition at line 119 of file fontinfo.cpp.

119  {
120  // The font properties are required to be the same for two font with the same
121  // name, so there is no need to test them.
122  // Consequently, querying the table with only its font name as information is
123  // enough to retrieve its properties.
124  return strcmp(fi1.name, fi2.name) == 0;
125 }

◆ CompareFontSet()

bool tesseract::CompareFontSet ( const FontSet fs1,
const FontSet fs2 
)

Definition at line 127 of file fontinfo.cpp.

127  {
128  if (fs1.size != fs2.size)
129  return false;
130  for (int i = 0; i < fs1.size; ++i) {
131  if (fs1.configs[i] != fs2.configs[i])
132  return false;
133  }
134  return true;
135 }

◆ CopyVector()

void tesseract::CopyVector ( int  n,
const double *  src,
double *  dest 
)
inline

Definition at line 169 of file functions.h.

169  {
170  memcpy(dest, src, n * sizeof(dest[0]));
171 }

◆ countof()

template<typename T , size_t N>
constexpr size_t tesseract::countof ( T   const(&)[N])
constexprnoexcept

Definition at line 43 of file serialis.h.

43  {
44  return N;
45 }

◆ CrownCompatible()

bool tesseract::CrownCompatible ( const GenericVector< RowScratchRegisters > *  rows,
int  a,
int  b,
const ParagraphModel model 
)

Definition at line 1315 of file paragraphs.cpp.

1323  : theory_(theory), rows_(rows), row_start_(row_start),
1324  row_end_(row_end) {
1325  if (!AcceptableRowArgs(0, 0, __func__, rows, row_start, row_end)) {
1326  row_start_ = 0;
1327  row_end_ = 0;
1328  return;
1329  }
1330  SetOfModels no_models;
1331  for (int row = row_start - 1; row <= row_end; row++) {

◆ DegradeImage()

struct Pix * tesseract::DegradeImage ( Pix *  input,
int  exposure,
TRand randomizer,
float *  rotation 
)

Definition at line 109 of file degradeimage.cpp.

112  {
113  float radians_clockwise = 0.0f;
114  if (*rotation) {
115  radians_clockwise = *rotation;
116  } else if (randomizer != nullptr) {
117  radians_clockwise = randomizer->SignedRand(kRotationRange);
118  }
119 
120  input = pixRotate(pix, radians_clockwise,
121  L_ROTATE_AREA_MAP, L_BRING_IN_WHITE,
122  0, 0);
123  // Rotate the boxes to match.
124  *rotation = radians_clockwise;
125  pixDestroy(&pix);
126  } else {
127  input = pix;
128  }
129 
130  if (exposure >= 3 || exposure == 1) {
131  // Erosion after the convolution is not as heavy as before, so it is
132  // good for level 1 and in addition as a level 3.
133  // This is backwards to binary morphology,
134  // see http://www.leptonica.com/grayscale-morphology.html
135  pix = input;
136  input = pixErodeGray(pix, 3, 3);
137  pixDestroy(&pix);
138  }
139  // The convolution really needed to be 2x2 to be realistic enough, but
140  // we only have 3x3, so we have to bias the image darker or lose thin
141  // strokes.
142  int erosion_offset = 0;
143  // For light and 0 exposure, there is no dilation, so compensate for the
144  // convolution with a big darkening bias which is undone for lighter
145  // exposures.
146  if (exposure <= 0)
147  erosion_offset = -3 * kExposureFactor;
148  // Add in a general offset of the greyscales for the exposure level so
149  // a threshold of 128 gives a reasonable binary result.
150  erosion_offset -= exposure * kExposureFactor;
151  // Add a gradual fade over the page and a small amount of salt and pepper
152  // noise to simulate noise in the sensor/paper fibres and varying
153  // illumination.
154  l_uint32* data = pixGetData(input);
155  for (int y = 0; y < height; ++y) {
156  for (int x = 0; x < width; ++x) {
157  int pixel = GET_DATA_BYTE(data, x);
158  if (randomizer != nullptr)
159  pixel += randomizer->IntRand() % (kSaltnPepper*2 + 1) - kSaltnPepper;
160  if (height + width > kMinRampSize)
161  pixel -= (2*x + y) * 32 / (height + width);
162  pixel += erosion_offset;
163  if (pixel < 0)
164  pixel = 0;
165  if (pixel > 255)
166  pixel = 255;
167  SET_DATA_BYTE(data, x, pixel);
168  }
169  data += input->wpl;
170  }
171  return input;
172 }
173 
174 // Creates and returns a Pix distorted by various means according to the bool
175 // flags. If boxes is not nullptr, the boxes are resized/positioned according to
176 // any spatial distortion and also by the integer reduction factor box_scale
177 // so they will match what the network will output.
178 // Returns nullptr on error. The returned Pix must be pixDestroyed.
179 Pix* PrepareDistortedPix(const Pix* pix, bool perspective, bool invert,
180  bool white_noise, bool smooth_noise, bool blur,
181  int box_reduction, TRand* randomizer,
182  GenericVector<TBOX>* boxes) {
183  Pix* distorted = pixCopy(nullptr, const_cast<Pix*>(pix));
184  // Things to do to synthetic training data.
185  if ((white_noise || smooth_noise) && randomizer->SignedRand(1.0) > 0.0) {
186  // TODO(rays) Cook noise in a more thread-safe manner than rand().
187  // Attempt to make the sequences reproducible.
188  srand(randomizer->IntRand());
189  Pix* pixn = pixAddGaussianNoise(distorted, 8.0);
190  pixDestroy(&distorted);

◆ DeleteObject()

template<typename T >
void tesseract::DeleteObject ( T *  object)

Definition at line 155 of file tablefind.cpp.

155  {
156  delete object;
157 }

◆ DeSerialize() [1/8]

bool tesseract::DeSerialize ( FILE *  fp,
char *  data,
size_t  n 
)

Definition at line 45 of file serialis.cpp.

48  {

◆ DeSerialize() [2/8]

bool tesseract::DeSerialize ( FILE *  fp,
float *  data,
size_t  n 
)

Definition at line 49 of file serialis.cpp.

52  {

◆ DeSerialize() [3/8]

bool tesseract::DeSerialize ( FILE *  fp,
int16_t *  data,
size_t  n 
)

Definition at line 57 of file serialis.cpp.

60  {

◆ DeSerialize() [4/8]

bool tesseract::DeSerialize ( FILE *  fp,
int32_t *  data,
size_t  n 
)

Definition at line 61 of file serialis.cpp.

64  {

◆ DeSerialize() [5/8]

bool tesseract::DeSerialize ( FILE *  fp,
int8_t *  data,
size_t  n 
)

Definition at line 53 of file serialis.cpp.

56  {

◆ DeSerialize() [6/8]

bool tesseract::DeSerialize ( FILE *  fp,
uint16_t *  data,
size_t  n 
)

Definition at line 69 of file serialis.cpp.

72  {

◆ DeSerialize() [7/8]

bool tesseract::DeSerialize ( FILE *  fp,
uint32_t *  data,
size_t  n 
)

Definition at line 73 of file serialis.cpp.

76  {

◆ DeSerialize() [8/8]

bool tesseract::DeSerialize ( FILE *  fp,
uint8_t *  data,
size_t  n 
)

Definition at line 65 of file serialis.cpp.

68  {

◆ DetectParagraphs() [1/2]

void tesseract::DetectParagraphs ( int  debug_level,
bool  after_text_recognition,
const MutableIterator block_start,
GenericVector< ParagraphModel * > *  models 
)

Definition at line 2528 of file paragraphs.cpp.

2531  {
2532  if (!row.PageResIt()->row())
2533  continue; // empty row.
2534  row.PageResIt()->row()->row->set_para(nullptr);
2535  row_infos.push_back(RowInfo());
2536  RowInfo &ri = row_infos.back();
2537  InitializeRowInfo(after_text_recognition, row, &ri);
2538  } while (!row.IsAtFinalElement(RIL_BLOCK, RIL_TEXTLINE) &&
2539  row.Next(RIL_TEXTLINE));
2540 
2541  // If we're called before text recognition, we might not have
2542  // tight block bounding boxes, so trim by the minimum on each side.
2543  if (!row_infos.empty()) {
2544  int min_lmargin = row_infos[0].pix_ldistance;
2545  int min_rmargin = row_infos[0].pix_rdistance;
2546  for (int i = 1; i < row_infos.size(); i++) {
2547  if (row_infos[i].pix_ldistance < min_lmargin)
2548  min_lmargin = row_infos[i].pix_ldistance;
2549  if (row_infos[i].pix_rdistance < min_rmargin)
2550  min_rmargin = row_infos[i].pix_rdistance;
2551  }
2552  if (min_lmargin > 0 || min_rmargin > 0) {
2553  for (int i = 0; i < row_infos.size(); i++) {
2554  row_infos[i].pix_ldistance -= min_lmargin;
2555  row_infos[i].pix_rdistance -= min_rmargin;
2556  }
2557  }
2558  }
2559 
2560  // Run the paragraph detection algorithm.
2561  GenericVector<PARA *> row_owners;
2562  GenericVector<PARA *> the_paragraphs;
2563  if (!is_image_block) {
2564  DetectParagraphs(debug_level, &row_infos, &row_owners, block->para_list(),
2565  models);
2566  } else {
2567  row_owners.init_to_size(row_infos.size(), nullptr);
2568  CanonicalizeDetectionResults(&row_owners, block->para_list());
2569  }
2570 
2571  // Now stitch in the row_owners into the rows.
2572  row = *block_start;
2573  for (int i = 0; i < row_owners.size(); i++) {
2574  while (!row.PageResIt()->row())
2575  row.Next(RIL_TEXTLINE);
2576  row.PageResIt()->row()->row->set_para(row_owners[i]);
2577  row.Next(RIL_TEXTLINE);
2578  }
2579 }
2580 
2581 } // namespace

◆ DetectParagraphs() [2/2]

void tesseract::DetectParagraphs ( int  debug_level,
GenericVector< RowInfo > *  row_infos,
GenericVector< PARA * > *  row_owners,
PARA_LIST *  paragraphs,
GenericVector< ParagraphModel * > *  models 
)

Definition at line 2285 of file paragraphs.cpp.

2297  {
2298  // Pass 2a:
2299  // Find any strongly evidenced start-of-paragraph lines. If they're
2300  // followed by two lines that look like body lines, make a paragraph
2301  // model for that and see if that model applies throughout the text
2302  // (that is, "smear" it).
2303  StrongEvidenceClassify(debug_level, &rows,
2304  leftovers[i].begin, leftovers[i].end, &theory);
2305 
2306  // Pass 2b:
2307  // If we had any luck in pass 2a, we got part of the page and didn't
2308  // know how to classify a few runs of rows. Take the segments that
2309  // didn't find a model and reprocess them individually.
2310  GenericVector<Interval> leftovers2;
2311  LeftoverSegments(rows, &leftovers2, leftovers[i].begin, leftovers[i].end);
2312  bool pass2a_was_useful = leftovers2.size() > 1 ||
2313  (leftovers2.size() == 1 &&
2314  (leftovers2[0].begin != 0 || leftovers2[0].end != rows.size()));
2315  if (pass2a_was_useful) {
2316  for (int j = 0; j < leftovers2.size(); j++) {
2317  StrongEvidenceClassify(debug_level, &rows,
2318  leftovers2[j].begin, leftovers2[j].end,
2319  &theory);
2320  }
2321  }
2322  }
2323 
2324  DebugDump(debug_level > 1, "End of Pass 2", theory, rows);
2325 
2326  // Pass 3:
2327  // These are the dregs for which we didn't have enough strong textual
2328  // and geometric clues to form matching models for. Let's see if
2329  // the geometric clues are simple enough that we could just use those.
2330  LeftoverSegments(rows, &leftovers, 0, rows.size());
2331  for (int i = 0; i < leftovers.size(); i++) {
2332  GeometricClassify(debug_level, &rows,
2333  leftovers[i].begin, leftovers[i].end, &theory);
2334  }
2335 
2336  // Undo any flush models for which there's little evidence.
2337  DowngradeWeakestToCrowns(debug_level, &theory, &rows);
2338 
2339  DebugDump(debug_level > 1, "End of Pass 3", theory, rows);
2340 
2341  // Pass 4:
2342  // Take everything that's still not marked up well and clear all markings.
2343  LeftoverSegments(rows, &leftovers, 0, rows.size());
2344  for (int i = 0; i < leftovers.size(); i++) {
2345  for (int j = leftovers[i].begin; j < leftovers[i].end; j++) {
2346  rows[j].SetUnknown();
2347  }
2348  }
2349 
2350  DebugDump(debug_level > 1, "End of Pass 4", theory, rows);
2351 
2352  // Convert all of the unique hypothesis runs to PARAs.
2353  ConvertHypothesizedModelRunsToParagraphs(debug_level, rows, row_owners,
2354  &theory);
2355 
2356  DebugDump(debug_level > 0, "Final Paragraph Segmentation", theory, rows);
2357 
2358  // Finally, clean up any dangling nullptr row paragraph parents.
2359  CanonicalizeDetectionResults(row_owners, paragraphs);
2360 }
2361 
2362 // ============ Code interfacing with the rest of Tesseract ==================
2363 
2364 static void InitializeTextAndBoxesPreRecognition(const MutableIterator &it,
2365  RowInfo *info) {
2366  // Set up text, lword_text, and rword_text (mostly for debug printing).
2367  STRING fake_text;
2368  PageIterator pit(static_cast<const PageIterator&>(it));
2369  bool first_word = true;
2370  if (!pit.Empty(RIL_WORD)) {
2371  do {
2372  fake_text += "x";
2373  if (first_word) info->lword_text += "x";
2374  info->rword_text += "x";
2375  if (pit.IsAtFinalElement(RIL_WORD, RIL_SYMBOL) &&
2376  !pit.IsAtFinalElement(RIL_TEXTLINE, RIL_SYMBOL)) {

◆ DotProductAVX()

double tesseract::DotProductAVX ( const double *  u,
const double *  v,
int  n 
)

Definition at line 30 of file dotproductavx.cpp.

30  {
31  const unsigned quot = n / 8;
32  const unsigned rem = n % 8;
33  __m256d t0 = _mm256_setzero_pd();
34  __m256d t1 = _mm256_setzero_pd();
35  for (unsigned k = 0; k < quot; k++) {
36  __m256d f0 = _mm256_loadu_pd(u);
37  __m256d f1 = _mm256_loadu_pd(v);
38  f0 = _mm256_mul_pd(f0, f1);
39  t0 = _mm256_add_pd(t0, f0);
40  u += 4;
41  v += 4;
42  __m256d f2 = _mm256_loadu_pd(u);
43  __m256d f3 = _mm256_loadu_pd(v);
44  f2 = _mm256_mul_pd(f2, f3);
45  t1 = _mm256_add_pd(t1, f2);
46  u += 4;
47  v += 4;
48  }
49  t0 = _mm256_hadd_pd(t0, t1);
50  alignas(32) double tmp[4];
51  _mm256_store_pd(tmp, t0);
52  double result = tmp[0] + tmp[1] + tmp[2] + tmp[3];
53  for (unsigned k = 0; k < rem; k++) {
54  result += *u++ * *v++;
55  }
56  return result;
57 }

◆ DotProductFMA()

double tesseract::DotProductFMA ( const double *  u,
const double *  v,
int  n 
)

Definition at line 30 of file dotproductfma.cpp.

30  {
31  const unsigned quot = n / 8;
32  const unsigned rem = n % 8;
33  __m256d t0 = _mm256_setzero_pd();
34  __m256d t1 = _mm256_setzero_pd();
35  for (unsigned k = 0; k < quot; k++) {
36  __m256d f0 = _mm256_loadu_pd(u);
37  __m256d f1 = _mm256_loadu_pd(v);
38  t0 = _mm256_fmadd_pd(f0, f1, t0);
39  u += 4;
40  v += 4;
41  __m256d f2 = _mm256_loadu_pd(u);
42  __m256d f3 = _mm256_loadu_pd(v);
43  t1 = _mm256_fmadd_pd(f2, f3, t1);
44  u += 4;
45  v += 4;
46  }
47  t0 = _mm256_hadd_pd(t0, t1);
48  alignas(32) double tmp[4];
49  _mm256_store_pd(tmp, t0);
50  double result = tmp[0] + tmp[1] + tmp[2] + tmp[3];
51  for (unsigned k = 0; k < rem; k++) {
52  result += *u++ * *v++;
53  }
54  return result;
55 }

◆ DotProductNative()

double tesseract::DotProductNative ( const double *  u,
const double *  v,
int  n 
)

Definition at line 22 of file dotproduct.cpp.

22  {
23  double total = 0.0;
24  for (int k = 0; k < n; ++k) total += u[k] * v[k];
25  return total;
26 }

◆ DotProductSSE()

double tesseract::DotProductSSE ( const double *  u,
const double *  v,
int  n 
)

Definition at line 31 of file dotproductsse.cpp.

31  {
32  int max_offset = n - 2;
33  int offset = 0;
34  // Accumulate a set of 2 sums in sum, by loading pairs of 2 values from u and
35  // v, and multiplying them together in parallel.
36  __m128d sum = _mm_setzero_pd();
37  if (offset <= max_offset) {
38  offset = 2;
39  // Aligned load is reputedly faster but requires 16 byte aligned input.
40  if ((reinterpret_cast<uintptr_t>(u) & 15) == 0 &&
41  (reinterpret_cast<uintptr_t>(v) & 15) == 0) {
42  // Use aligned load.
43  sum = _mm_load_pd(u);
44  __m128d floats2 = _mm_load_pd(v);
45  // Multiply.
46  sum = _mm_mul_pd(sum, floats2);
47  while (offset <= max_offset) {
48  __m128d floats1 = _mm_load_pd(u + offset);
49  floats2 = _mm_load_pd(v + offset);
50  offset += 2;
51  floats1 = _mm_mul_pd(floats1, floats2);
52  sum = _mm_add_pd(sum, floats1);
53  }
54  } else {
55  // Use unaligned load.
56  sum = _mm_loadu_pd(u);
57  __m128d floats2 = _mm_loadu_pd(v);
58  // Multiply.
59  sum = _mm_mul_pd(sum, floats2);
60  while (offset <= max_offset) {
61  __m128d floats1 = _mm_loadu_pd(u + offset);
62  floats2 = _mm_loadu_pd(v + offset);
63  offset += 2;
64  floats1 = _mm_mul_pd(floats1, floats2);
65  sum = _mm_add_pd(sum, floats1);
66  }
67  }
68  }
69  // Add the 2 sums in sum horizontally.
70  sum = _mm_hadd_pd(sum, sum);
71  // Extract the low result.
72  double result = _mm_cvtsd_f64(sum);
73  // Add on any left-over products.
74  while (offset < n) {
75  result += u[offset] * v[offset];
76  ++offset;
77  }
78  return result;
79 }

◆ ExtractFontName()

void tesseract::ExtractFontName ( const STRING filename,
STRING fontname 
)

Public Code

Definition at line 45 of file blobclass.cpp.

45  {
46  *fontname = classify_font_name;
47  if (*fontname == kUnknownFontName) {
48  // filename is expected to be of the form [lang].[fontname].exp[num]
49  // The [lang], [fontname] and [num] fields should not have '.' characters.
50  const char *basename = strrchr(filename.string(), '/');
51  const char *firstdot = strchr(basename ? basename : filename.string(), '.');
52  const char *lastdot = strrchr(filename.string(), '.');
53  if (firstdot != lastdot && firstdot != nullptr && lastdot != nullptr) {
54  ++firstdot;
55  *fontname = firstdot;
56  fontname->truncate_at(lastdot - firstdot);
57  }
58  }
59 }

◆ FirstWordWouldHaveFit() [1/2]

bool tesseract::FirstWordWouldHaveFit ( const RowScratchRegisters before,
const RowScratchRegisters after 
)

Definition at line 1672 of file paragraphs.cpp.

1672  {
1673  if (before.ri_->ltr) {
1674  return before.ri_->rword_likely_ends_idea &&
1675  after.ri_->lword_likely_starts_idea;
1676  } else {
1677  return before.ri_->lword_likely_ends_idea &&
1678  after.ri_->rword_likely_starts_idea;
1679  }
1680 }
1681 
1682 static bool LikelyParagraphStart(const RowScratchRegisters &before,
1683  const RowScratchRegisters &after,
1685  return before.ri_->num_words == 0 ||

◆ FirstWordWouldHaveFit() [2/2]

bool tesseract::FirstWordWouldHaveFit ( const RowScratchRegisters before,
const RowScratchRegisters after,
tesseract::ParagraphJustification  justification 
)

Definition at line 1647 of file paragraphs.cpp.

1657  {
1658  if (before.ri_->num_words == 0 || after.ri_->num_words == 0)
1659  return true;
1660 
1661  int available_space = before.lindent_;
1662  if (before.rindent_ > available_space)
1663  available_space = before.rindent_;
1664  available_space -= before.ri_->average_interword_space;
1665 
1666  if (before.ri_->ltr)
1667  return after.ri_->lword_box.width() < available_space;

◆ FontInfoDeleteCallback()

void tesseract::FontInfoDeleteCallback ( FontInfo  f)

Definition at line 138 of file fontinfo.cpp.

138  {
139  if (f.spacing_vec != nullptr) {
140  f.spacing_vec->delete_data_pointers();
141  delete f.spacing_vec;
142  f.spacing_vec = nullptr;
143  }
144  delete[] f.name;
145  f.name = nullptr;
146 }

◆ FontSetDeleteCallback()

void tesseract::FontSetDeleteCallback ( FontSet  fs)

Definition at line 147 of file fontinfo.cpp.

147  {
148  delete[] fs.configs;
149 }

◆ FullwidthToHalfwidth()

char32 tesseract::FullwidthToHalfwidth ( const char32  ch)

Definition at line 300 of file normstrngs.cpp.

◆ FuncInplace()

template<class Func >
void tesseract::FuncInplace ( int  n,
double *  inout 
)
inline

Definition at line 129 of file functions.h.

129  {
130  Func f;
131  for (int i = 0; i < n; ++i) {
132  inout[i] = f(inout[i]);
133  }
134 }

◆ FuncMultiply()

template<class Func >
void tesseract::FuncMultiply ( const double *  u,
const double *  v,
int  n,
double *  out 
)
inline

Definition at line 138 of file functions.h.

138  {
139  Func f;
140  for (int i = 0; i < n; ++i) {
141  out[i] = f(u[i]) * v[i];
142  }
143 }

◆ GeneratePerspectiveDistortion()

void tesseract::GeneratePerspectiveDistortion ( int  width,
int  height,
TRand randomizer,
Pix **  pix,
GenericVector< TBOX > *  boxes 
)

Definition at line 238 of file degradeimage.cpp.

240  {
241  // Transform the boxes.
242  for (int b = 0; b < boxes->size(); ++b) {
243  int x1, y1, x2, y2;
244  const TBOX& box = (*boxes)[b];
245  projectiveXformSampledPt(box_coeffs, box.left(), height - box.top(), &x1,
246  &y1);
247  projectiveXformSampledPt(box_coeffs, box.right(), height - box.bottom(),
248  &x2, &y2);
249  TBOX new_box1(x1, height - y2, x2, height - y1);
250  projectiveXformSampledPt(box_coeffs, box.left(), height - box.bottom(),
251  &x1, &y1);
252  projectiveXformSampledPt(box_coeffs, box.right(), height - box.top(), &x2,
253  &y2);
254  TBOX new_box2(x1, height - y1, x2, height - y2);
255  (*boxes)[b] = new_box1.bounding_union(new_box2);
256  }
257  }
258  free(im_coeffs);
259  free(box_coeffs);
260 }
261 
262 // Computes the coefficients of a randomized projective transformation.
263 // The image transform requires backward transformation coefficient, and the
264 // box transform the forward coefficients.
265 // Returns the incolor arg to pixProjective.
266 int ProjectiveCoeffs(int width, int height, TRand* randomizer,
267  float** im_coeffs, float** box_coeffs) {
268  // Setup "from" points.
269  Pta* src_pts = ptaCreate(4);
270  ptaAddPt(src_pts, 0.0f, 0.0f);
271  ptaAddPt(src_pts, width, 0.0f);
272  ptaAddPt(src_pts, width, height);
273  ptaAddPt(src_pts, 0.0f, height);
274  // Extract factors from pseudo-random sequence.
275  float factors[FN_NUM_FACTORS];
276  float shear = 0.0f; // Shear is signed.
277  for (int i = 0; i < FN_NUM_FACTORS; ++i) {
278  // Everything is squared to make wild values rarer.

◆ GetXheightString()

std::string tesseract::GetXheightString ( const std::string &  script_dir,
const UNICHARSET unicharset 
)

Definition at line 164 of file unicharset_training_utils.cpp.

165  {
166  std::string xheights_str;
167  for (int s = 0; s < unicharset.get_script_table_size(); ++s) {
168  // Load the xheights for the script if available.
169  std::string filename = script_dir + "/" +
170  unicharset.get_script_from_script_id(s) + ".xheights";
171  std::string script_heights;
172  if (File::ReadFileToString(filename, &script_heights))
173  xheights_str += script_heights;
174  }
175  return xheights_str;
176 }

◆ HistogramRect()

void tesseract::HistogramRect ( Pix *  src_pix,
int  channel,
int  left,
int  top,
int  width,
int  height,
int *  histogram 
)

Definition at line 167 of file otsuthr.cpp.

171  {
172  int H = 0;
173  double mu_T = 0.0;
174  for (int i = 0; i < kHistogramSize; ++i) {
175  H += histogram[i];
176  mu_T += static_cast<double>(i) * histogram[i];
177  }
178 
179  // Now maximize sig_sq_B over t.
180  // http://www.ctie.monash.edu.au/hargreave/Cornall_Terry_328.pdf
181  int best_t = -1;
182  int omega_0, omega_1;
183  int best_omega_0 = 0;

◆ HOcrEscape()

STRING tesseract::HOcrEscape ( const char *  text)

Escape a char string - remove <>&"' with HTML codes.

Escape a char string - remove &<>"' with HTML codes.

Definition at line 2310 of file baseapi.cpp.

2310  {
2311  STRING ret;
2312  const char *ptr;
2313  for (ptr = text; *ptr; ptr++) {
2314  switch (*ptr) {
2315  case '<': ret += "&lt;"; break;
2316  case '>': ret += "&gt;"; break;
2317  case '&': ret += "&amp;"; break;
2318  case '"': ret += "&quot;"; break;
2319  case '\'': ret += "&#39;"; break;
2320  default: ret += *ptr;
2321  }
2322  }
2323  return ret;
2324 }

◆ InterwordSpace()

int tesseract::InterwordSpace ( const GenericVector< RowScratchRegisters > &  rows,
int  row_start,
int  row_end 
)

Definition at line 1624 of file paragraphs.cpp.

1626  : minimum_reasonable_space;
1627 }
1628 
1629 // Return whether the first word on the after line can fit in the space at
1630 // the end of the before line (knowing which way the text is aligned and read).
1631 bool FirstWordWouldHaveFit(const RowScratchRegisters &before,
1632  const RowScratchRegisters &after,
1633  tesseract::ParagraphJustification justification) {
1634  if (before.ri_->num_words == 0 || after.ri_->num_words == 0)
1635  return true;
1636 
1637  if (justification == JUSTIFICATION_UNKNOWN) {
1638  tprintf("Don't call FirstWordWouldHaveFit(r, s, JUSTIFICATION_UNKNOWN).\n");
1639  }
1640  int available_space;
1641  if (justification == JUSTIFICATION_CENTER) {
1642  available_space = before.lindent_ + before.rindent_;
1643  } else {

◆ IsInterchangeValid()

bool tesseract::IsInterchangeValid ( const char32  ch)

Definition at line 271 of file normstrngs.cpp.

276  {
277  return IsValidCodepoint(ch) && ch <= 128 &&
278  (!u_isISOControl(static_cast<UChar32>(ch)) || ch == '\n' ||
279  ch == '\f' || ch == '\t' || ch == '\r');
280 }
281 
283  // Return unchanged if not in the fullwidth-halfwidth Unicode block.
284  if (ch < 0xFF00 || ch > 0xFFEF || !IsValidCodepoint(ch)) {
285  if (ch != 0x3000) return ch;
286  }
287  // Special case for fullwidth left and right "white parentheses".
288  if (ch == 0xFF5F) return 0x2985;
289  if (ch == 0xFF60) return 0x2986;
290  // Construct a full-to-half width transliterator.
291  IcuErrorCode error_code;
292  icu::UnicodeString uch_str(static_cast<UChar32>(ch));

◆ IsInterchangeValid7BitAscii()

bool tesseract::IsInterchangeValid7BitAscii ( const char32  ch)

Definition at line 294 of file normstrngs.cpp.

◆ IsLeftIndented()

bool tesseract::IsLeftIndented ( const EquationDetect::IndentType  type)
inline

Definition at line 92 of file equationdetect.cpp.

92  {
93  return type == EquationDetect::LEFT_INDENT ||
94  type == EquationDetect::BOTH_INDENT;
95 }

◆ IsOCREquivalent()

bool tesseract::IsOCREquivalent ( char32  ch1,
char32  ch2 
)

Definition at line 232 of file normstrngs.cpp.

233  {
234  int n_white = 0;

◆ IsRightIndented()

bool tesseract::IsRightIndented ( const EquationDetect::IndentType  type)
inline

Definition at line 97 of file equationdetect.cpp.

97  {
98  return type == EquationDetect::RIGHT_INDENT ||
99  type == EquationDetect::BOTH_INDENT;
100 }

◆ IsTextOrEquationType()

bool tesseract::IsTextOrEquationType ( PolyBlockType  type)
inline

Definition at line 88 of file equationdetect.cpp.

88  {
89  return PTIsTextType(type) || type == PT_EQUATION;
90 }

◆ IsUTF8Whitespace()

bool tesseract::IsUTF8Whitespace ( const char *  text)

Definition at line 247 of file normstrngs.cpp.

253  {

◆ IsValidCodepoint()

bool tesseract::IsValidCodepoint ( const char32  ch)

Definition at line 236 of file normstrngs.cpp.

236  {
237  if (!IsWhitespace(*it)) break;
238  n_white += it.utf8_len();
239  }

◆ IsWhitespace()

bool tesseract::IsWhitespace ( const char32  ch)

Definition at line 241 of file normstrngs.cpp.

243  {
244  int n_notwhite = 0;
245  for (UNICHAR::const_iterator it = UNICHAR::begin(text, strlen(text));

◆ LeftWordAttributes()

void tesseract::LeftWordAttributes ( const UNICHARSET unicharset,
const WERD_CHOICE werd,
const STRING utf8,
bool *  is_list,
bool *  starts_idea,
bool *  ends_idea 
)

Definition at line 424 of file paragraphs.cpp.

425  {
426  *starts_idea = true;
427  }
428  if (unicharset->get_ispunctuation(werd->unichar_id(0))) {
429  *starts_idea = true;
430  *ends_idea = true;
431  }
432  } else { // Assume utf8 is mostly ASCII
433  if (AsciiLikelyListItem(utf8)) {
434  *is_list = true;
435  *starts_idea = true;
436  }
437  int start_letter = utf8[0];
438  if (IsOpeningPunct(start_letter)) {
439  *starts_idea = true;
440  }
441  if (IsTerminalPunct(start_letter)) {
442  *ends_idea = true;
443  }
444  if (start_letter >= 'A' && start_letter <= 'Z') {
445  *starts_idea = true;
446  }
447  }
448 }
449 
450 // Given the rightmost word of a line either as a Tesseract unicharset + werd
451 // or a utf8 string, set the following attributes for it:
452 // is_list - this word might be a list number or bullet.
453 // starts_idea - this word is likely to start a sentence.
454 // ends_idea - this word is likely to end a sentence.
455 void RightWordAttributes(const UNICHARSET *unicharset, const WERD_CHOICE *werd,
456  const STRING &utf8,
457  bool *is_list, bool *starts_idea, bool *ends_idea) {
458  *is_list = false;
459  *starts_idea = false;
460  *ends_idea = false;
461  if (utf8.size() == 0 || (werd != nullptr && werd->length() == 0)) { // Empty
462  *ends_idea = true;
463  return;
464  }

◆ LoadDataFromFile() [1/2]

bool tesseract::LoadDataFromFile ( const char *  filename,
GenericVector< char > *  data 
)
inline

Definition at line 375 of file genericvector.h.

375  {
376  bool result = false;
377  FILE* fp = fopen(filename, "rb");
378  if (fp != nullptr) {
379  fseek(fp, 0, SEEK_END);
380  auto size = std::ftell(fp);
381  fseek(fp, 0, SEEK_SET);
382  // Trying to open a directory on Linux sets size to LONG_MAX. Catch it here.
383  if (size > 0 && size < LONG_MAX) {
384  // reserve an extra byte in case caller wants to append a '\0' character
385  data->reserve(size + 1);
386  data->resize_no_init(size);
387  result = static_cast<long>(fread(&(*data)[0], 1, size, fp)) == size;
388  }
389  fclose(fp);
390  }
391  return result;
392 }

◆ LoadDataFromFile() [2/2]

bool tesseract::LoadDataFromFile ( const STRING filename,
GenericVector< char > *  data 
)
inline

Definition at line 394 of file genericvector.h.

395  {
396  return LoadDataFromFile(filename.string(), data);
397 }

◆ LoadFileLinesToStrings()

bool tesseract::LoadFileLinesToStrings ( const char *  filename,
GenericVector< STRING > *  lines 
)
inline

Definition at line 45 of file fileio.h.

◆ LoadShapeTable()

ShapeTable * tesseract::LoadShapeTable ( const STRING file_prefix)

Definition at line 154 of file commontraining.cpp.

154  {
155  ShapeTable* shape_table = nullptr;
156  STRING shape_table_file = file_prefix;
157  shape_table_file += kShapeTableFileSuffix;
158  TFile shape_fp;
159  if (shape_fp.Open(shape_table_file.string(), nullptr)) {
160  shape_table = new ShapeTable;
161  if (!shape_table->DeSerialize(&shape_fp)) {
162  delete shape_table;
163  shape_table = nullptr;
164  tprintf("Error: Failed to read shape table %s\n",
165  shape_table_file.string());
166  } else {
167  int num_shapes = shape_table->NumShapes();
168  tprintf("Read shape table %s of %d shapes\n",
169  shape_table_file.string(), num_shapes);
170  }
171  } else {
172  tprintf("Warning: No shape table file present: %s\n",
173  shape_table_file.string());
174  }
175  return shape_table;
176 }

◆ LoadTrainingData()

MasterTrainer * tesseract::LoadTrainingData ( int  argc,
const char *const *  argv,
bool  replication,
ShapeTable **  shape_table,
STRING file_prefix 
)

Creates a MasterTrainer and loads the training data into it: Initializes feature_defs and IntegerFX. Loads the shape_table if shape_table != nullptr. Loads initial unicharset from -U command-line option. If FLAGS_T is set, loads the majority of data from there, else:

  • Loads font info from -F option.
  • Loads xheights from -X option.
  • Loads samples from .tr files in remaining command-line args.
  • Deletes outliers and computes canonical samples.
  • If FLAGS_output_trainer is set, saves the trainer for future use. TODO: Who uses that? There is currently no code which reads it. Computes canonical and cloud features. If shape_table is not nullptr, but failed to load, make a fake flat one, as shape clustering was not run.

Definition at line 211 of file commontraining.cpp.

214  {
216  InitIntegerFX();
217  *file_prefix = "";
218  if (!FLAGS_D.empty()) {
219  *file_prefix += FLAGS_D.c_str();
220  *file_prefix += "/";
221  }
222  // If we are shape clustering (nullptr shape_table) or we successfully load
223  // a shape_table written by a previous shape clustering, then
224  // shape_analysis will be true, meaning that the MasterTrainer will replace
225  // some members of the unicharset with their fragments.
226  bool shape_analysis = false;
227  if (shape_table != nullptr) {
228  *shape_table = LoadShapeTable(*file_prefix);
229  if (*shape_table != nullptr) shape_analysis = true;
230  } else {
231  shape_analysis = true;
232  }
233  MasterTrainer* trainer = new MasterTrainer(NM_CHAR_ANISOTROPIC,
234  shape_analysis,
235  replication,
236  FLAGS_debug_level);
237  IntFeatureSpace fs;
239  trainer->LoadUnicharset(FLAGS_U.c_str());
240  // Get basic font information from font_properties.
241  if (!FLAGS_F.empty()) {
242  if (!trainer->LoadFontInfo(FLAGS_F.c_str())) {
243  delete trainer;
244  return nullptr;
245  }
246  }
247  if (!FLAGS_X.empty()) {
248  if (!trainer->LoadXHeights(FLAGS_X.c_str())) {
249  delete trainer;
250  return nullptr;
251  }
252  }
253  trainer->SetFeatureSpace(fs);
254  const char* page_name;
255  // Load training data from .tr files on the command line.
256  while ((page_name = GetNextFilename(argc, argv)) != nullptr) {
257  tprintf("Reading %s ...\n", page_name);
258  trainer->ReadTrainingSamples(page_name, feature_defs, false);
259 
260  // If there is a file with [lang].[fontname].exp[num].fontinfo present,
261  // read font spacing information in to fontinfo_table.
262  int pagename_len = strlen(page_name);
263  char* fontinfo_file_name = new char[pagename_len + 7];
264  strncpy(fontinfo_file_name, page_name, pagename_len - 2); // remove "tr"
265  strcpy(fontinfo_file_name + pagename_len - 2, "fontinfo"); // +"fontinfo"
266  trainer->AddSpacingInfo(fontinfo_file_name);
267  delete[] fontinfo_file_name;
268 
269  // Load the images into memory if required by the classifier.
270  if (FLAGS_load_images) {
271  STRING image_name = page_name;
272  // Chop off the tr and replace with tif. Extension must be tif!
273  image_name.truncate_at(image_name.length() - 2);
274  image_name += "tif";
275  trainer->LoadPageImages(image_name.string());
276  }
277  }
278  trainer->PostLoadCleanup();
279  // Write the master trainer if required.
280  if (!FLAGS_output_trainer.empty()) {
281  FILE* fp = fopen(FLAGS_output_trainer.c_str(), "wb");
282  if (fp == nullptr) {
283  tprintf("Can't create saved trainer data!\n");
284  } else {
285  trainer->Serialize(fp);
286  fclose(fp);
287  }
288  }
289  trainer->PreTrainingSetup();
290  if (!FLAGS_O.empty() &&
291  !trainer->unicharset().save_to_file(FLAGS_O.c_str())) {
292  fprintf(stderr, "Failed to save unicharset to file %s\n", FLAGS_O.c_str());
293  delete trainer;
294  return nullptr;
295  }
296  if (shape_table != nullptr) {
297  // If we previously failed to load a shapetable, then shape clustering
298  // wasn't run so make a flat one now.
299  if (*shape_table == nullptr) {
300  *shape_table = new ShapeTable;
301  trainer->SetupFlatShapeTable(*shape_table);
302  tprintf("Flat shape table summary: %s\n",
303  (*shape_table)->SummaryStr().string());
304  }
305  (*shape_table)->set_unicharset(trainer->unicharset());
306  }
307  return trainer;
308 }

◆ Logistic()

double tesseract::Logistic ( double  x)
inline

Definition at line 54 of file functions.h.

54  {
55  if (x < 0.0) return 1.0 - Logistic(-x);
56  x *= kScaleFactor;
57  unsigned index = static_cast<unsigned>(x);
58  if (index >= (kTableSize - 1)) return 1.0;
59  double l0 = LogisticTable[index];
60  double l1 = LogisticTable[index + 1];
61  // Linear interpolation.
62  return l0 + (l1 - l0) * (x - index);
63 }

◆ MultiplyAccumulate()

void tesseract::MultiplyAccumulate ( int  n,
const double *  u,
const double *  v,
double *  out 
)
inline

Definition at line 184 of file functions.h.

185  {
186  for (int i = 0; i < n; i++) {
187  out[i] += u[i] * v[i];
188  }
189 }

◆ MultiplyVectorsInPlace()

void tesseract::MultiplyVectorsInPlace ( int  n,
const double *  src,
double *  inout 
)
inline

Definition at line 179 of file functions.h.

179  {
180  for (int i = 0; i < n; ++i) inout[i] *= src[i];
181 }

◆ NormalizeCleanAndSegmentUTF8()

bool tesseract::NormalizeCleanAndSegmentUTF8 ( UnicodeNormMode  u_mode,
OCRNorm  ocr_normalize,
GraphemeNormMode  g_mode,
bool  report_errors,
const char *  str8,
std::vector< std::string > *  graphemes 
)

Definition at line 190 of file normstrngs.cpp.

197  : graphemes32) {
198  graphemes->push_back(UNICHAR::UTF32ToUTF8(grapheme));
199  }
200  return success;
201 }
202 
203 // Apply just the OCR-specific normalizations and return the normalized char.
205  if (is_hyphen_punc(ch))
206  return '-';
207  else if (is_single_quote(ch))
208  return '\'';
209  else if (is_double_quote(ch))
210  return '"';
211  return ch;
212 }
213 
214 bool IsOCREquivalent(char32 ch1, char32 ch2) {
215  return OCRNormalize(ch1) == OCRNormalize(ch2);
216 }
217 
218 bool IsValidCodepoint(const char32 ch) {
219  // In the range [0, 0xD800) or [0xE000, 0x10FFFF]

◆ NormalizeUTF8String()

bool tesseract::NormalizeUTF8String ( UnicodeNormMode  u_mode,
OCRNorm  ocr_normalize,
GraphemeNorm  grapheme_normalize,
const char *  str8,
std::string *  normalized 
)

Definition at line 165 of file normstrngs.cpp.

175  {
176  std::vector<char32> normed32;
177  NormalizeUTF8ToUTF32(u_mode, ocr_normalize, str8, &normed32);
178  StripJoiners(&normed32);
179  std::vector<std::vector<char32>> graphemes32;
180  bool success = Validator::ValidateCleanAndSegment(g_mode, report_errors,
181  normed32, &graphemes32);
182  if (g_mode != GraphemeNormMode::kSingleString && success) {
183  // If we modified the string to clean it up, the segmentation may not be
184  // correct, so check for changes and do it again.

◆ OCRNormalize()

char32 tesseract::OCRNormalize ( char32  ch)

Definition at line 222 of file normstrngs.cpp.

223  {
224  ASSERT_HOST_MSG(IsValidCodepoint(ch), "Invalid Unicode codepoint: 0x%x\n",
225  ch);
226  return u_isUWhiteSpace(static_cast<UChar32>(ch));
227 }
228 
229 bool IsUTF8Whitespace(const char* text) {
230  return SpanUTF8Whitespace(text) == strlen(text);

◆ OtsuStats()

int tesseract::OtsuStats ( const int *  histogram,
int *  H_out,
int *  omega0_out 
)

Definition at line 188 of file otsuthr.cpp.

188  {
189  omega_0 += histogram[t];
190  mu_t += t * static_cast<double>(histogram[t]);
191  if (omega_0 == 0)
192  continue;
193  omega_1 = H - omega_0;
194  if (omega_1 == 0)
195  break;
196  mu_0 = mu_t / omega_0;
197  mu_1 = (mu_T - mu_t) / omega_1;
198  double sig_sq_B = mu_1 - mu_0;
199  sig_sq_B *= sig_sq_B * omega_0 * omega_1;
200  if (best_t < 0 || sig_sq_B > best_sig_sq_B) {
201  best_sig_sq_B = sig_sq_B;
202  best_t = t;
203  best_omega_0 = omega_0;
204  }
205  }
206  if (H_out != nullptr) *H_out = H;
207  if (omega0_out != nullptr) *omega0_out = best_omega_0;
208  return best_t;
209 }
210 
211 } // namespace tesseract.

◆ OtsuThreshold()

int tesseract::OtsuThreshold ( Pix *  src_pix,
int  left,
int  top,
int  width,
int  height,
int **  thresholds,
int **  hi_values 
)

Definition at line 57 of file otsuthr.cpp.

60  {
61  od.HistogramRectOCL(pixGetData(src_pix), num_channels,
62  pixGetWpl(src_pix) * 4, left, top, width, height,
63  kHistogramSize, histogramAllChannels);
64 
65  // Calculate Threshold from Histogram on cpu
66  for (int ch = 0; ch < num_channels; ++ch) {
67  (*thresholds)[ch] = -1;
68  (*hi_values)[ch] = -1;
69  int *histogram = &histogramAllChannels[kHistogramSize * ch];
70  int H;
71  int best_omega_0;
72  int best_t = OtsuStats(histogram, &H, &best_omega_0);
73  if (best_omega_0 == 0 || best_omega_0 == H) {
74  // This channel is empty.
75  continue;
76  }
77  // To be a convincing foreground we must have a small fraction of H
78  // or to be a convincing background we must have a large fraction of H.
79  // In between we assume this channel contains no thresholding information.
80  int hi_value = best_omega_0 < H * 0.5;
81  (*thresholds)[ch] = best_t;
82  if (best_omega_0 > H * 0.75) {
83  any_good_hivalue = true;
84  (*hi_values)[ch] = 0;
85  } else if (best_omega_0 < H * 0.25) {
86  any_good_hivalue = true;
87  (*hi_values)[ch] = 1;
88  } else {
89  // In case all channels are like this, keep the best of the bad lot.
90  double hi_dist = hi_value ? (H - best_omega_0) : best_omega_0;
91  if (hi_dist > best_hi_dist) {
92  best_hi_dist = hi_dist;
93  best_hi_value = hi_value;
94  best_hi_index = ch;
95  }
96  }
97  }
98  } else {
99 #endif
100  for (int ch = 0; ch < num_channels; ++ch) {
101  (*thresholds)[ch] = -1;
102  (*hi_values)[ch] = -1;
103  // Compute the histogram of the image rectangle.
104  int histogram[kHistogramSize];
105  HistogramRect(src_pix, ch, left, top, width, height, histogram);
106  int H;
107  int best_omega_0;
108  int best_t = OtsuStats(histogram, &H, &best_omega_0);
109  if (best_omega_0 == 0 || best_omega_0 == H) {
110  // This channel is empty.
111  continue;
112  }
113  // To be a convincing foreground we must have a small fraction of H
114  // or to be a convincing background we must have a large fraction of H.
115  // In between we assume this channel contains no thresholding information.
116  int hi_value = best_omega_0 < H * 0.5;
117  (*thresholds)[ch] = best_t;
118  if (best_omega_0 > H * 0.75) {
119  any_good_hivalue = true;
120  (*hi_values)[ch] = 0;
121  } else if (best_omega_0 < H * 0.25) {
122  any_good_hivalue = true;
123  (*hi_values)[ch] = 1;
124  } else {
125  // In case all channels are like this, keep the best of the bad lot.
126  double hi_dist = hi_value ? (H - best_omega_0) : best_omega_0;
127  if (hi_dist > best_hi_dist) {
128  best_hi_dist = hi_dist;
129  best_hi_value = hi_value;
130  best_hi_index = ch;
131  }
132  }
133  }
134 #ifdef USE_OPENCL
135  }
136  delete[] histogramAllChannels;
137 #endif // USE_OPENCL
138 
139  if (!any_good_hivalue) {
140  // Use the best of the ones that were not good enough.
141  (*hi_values)[best_hi_index] = best_hi_value;
142  }
143  return num_channels;
144 }
145 
146 // Computes the histogram for the given image rectangle, and the given
147 // single channel. Each channel is always one byte per pixel.
148 // Histogram is always a kHistogramSize(256) element array to count
149 // occurrences of each pixel value.
150 void HistogramRect(Pix* src_pix, int channel,
151  int left, int top, int width, int height,
152  int* histogram) {
153  int num_channels = pixGetDepth(src_pix) / 8;
154  channel = ClipToRange(channel, 0, num_channels - 1);
155  int bottom = top + height;
156  memset(histogram, 0, sizeof(*histogram) * kHistogramSize);
157  int src_wpl = pixGetWpl(src_pix);
158  l_uint32* srcdata = pixGetData(src_pix);
159  for (int y = top; y < bottom; ++y) {
160  const l_uint32* linedata = srcdata + y * src_wpl;
161  for (int x = 0; x < width; ++x) {

◆ ParamsTrainingFeatureByName()

int tesseract::ParamsTrainingFeatureByName ( const char *  name)

Definition at line 26 of file params_training_featdef.cpp.

26  {
27  if (name == nullptr)
28  return -1;
29  int array_size = sizeof(kParamsTrainingFeatureTypeName) /
30  sizeof(kParamsTrainingFeatureTypeName[0]);
31  for (int i = 0; i < array_size; i++) {
32  if (kParamsTrainingFeatureTypeName[i] == nullptr)
33  continue;
34  if (strcmp(name, kParamsTrainingFeatureTypeName[i]) == 0)
35  return i;
36  }
37  return -1;
38 }

◆ ParseCommandLineFlags()

void tesseract::ParseCommandLineFlags ( const char *  usage,
int *  argc,
char ***  argv,
const bool  remove_flags 
)

Definition at line 166 of file commandlineflags.cpp.

168  {
169  if (*argc == 1) {
170  printf("USAGE: %s\n", usage);
171  PrintCommandLineFlags();
172  exit(0);
173  }
174 
175  if (*argc > 1 && (!strcmp((*argv)[1], "-v") || !strcmp((*argv)[1], "--version"))) {
176  printf("%s\n", TessBaseAPI::Version());
177  exit(0);
178  }
179 
180  int i;
181  for (i = 1; i < *argc; ++i) {
182  const char* current_arg = (*argv)[i];
183  // If argument does not start with a hyphen then break.
184  if (current_arg[0] != '-') {
185  break;
186  }
187  // Position current_arg after startings hyphens. We treat a sequence of
188  // one or two consecutive hyphens identically.
189  ++current_arg;
190  if (current_arg[0] == '-') {
191  ++current_arg;
192  }
193  // If this is asking for usage, print the help message and abort.
194  if (!strcmp(current_arg, "help")) {
195  printf("Usage:\n %s [OPTION ...]\n\n", usage);
196  PrintCommandLineFlags();
197  exit(0);
198  }
199  // Find the starting position of the value if it was specified in this
200  // string.
201  const char* equals_position = strchr(current_arg, '=');
202  const char* rhs = nullptr;
203  if (equals_position != nullptr) {
204  rhs = equals_position + 1;
205  }
206  // Extract the flag name.
207  STRING lhs;
208  if (equals_position == nullptr) {
209  lhs = current_arg;
210  } else {
211  lhs.assign(current_arg, equals_position - current_arg);
212  }
213  if (!lhs.length()) {
214  tprintf("ERROR: Bad argument: %s\n", (*argv)[i]);
215  exit(1);
216  }
217 
218  // Find the flag name in the list of global flags.
219  // int32_t flag
220  int32_t int_val;
221  if (IntFlagExists(lhs.string(), &int_val)) {
222  if (rhs != nullptr) {
223  if (!strlen(rhs)) {
224  // Bad input of the format --int_flag=
225  tprintf("ERROR: Bad argument: %s\n", (*argv)[i]);
226  exit(1);
227  }
228  if (!SafeAtoi(rhs, &int_val)) {
229  tprintf("ERROR: Could not parse int from %s in flag %s\n",
230  rhs, (*argv)[i]);
231  exit(1);
232  }
233  } else {
234  // We need to parse the next argument
235  if (i + 1 >= *argc) {
236  tprintf("ERROR: Could not find value argument for flag %s\n",
237  lhs.string());
238  exit(1);
239  } else {
240  ++i;
241  if (!SafeAtoi((*argv)[i], &int_val)) {
242  tprintf("ERROR: Could not parse int32_t from %s\n", (*argv)[i]);
243  exit(1);
244  }
245  }
246  }
247  SetIntFlagValue(lhs.string(), int_val);
248  continue;
249  }
250 
251  // double flag
252  double double_val;
253  if (DoubleFlagExists(lhs.string(), &double_val)) {
254  if (rhs != nullptr) {
255  if (!strlen(rhs)) {
256  // Bad input of the format --double_flag=
257  tprintf("ERROR: Bad argument: %s\n", (*argv)[i]);
258  exit(1);
259  }
260  if (!SafeAtod(rhs, &double_val)) {
261  tprintf("ERROR: Could not parse double from %s in flag %s\n",
262  rhs, (*argv)[i]);
263  exit(1);
264  }
265  } else {
266  // We need to parse the next argument
267  if (i + 1 >= *argc) {
268  tprintf("ERROR: Could not find value argument for flag %s\n",
269  lhs.string());
270  exit(1);
271  } else {
272  ++i;
273  if (!SafeAtod((*argv)[i], &double_val)) {
274  tprintf("ERROR: Could not parse double from %s\n", (*argv)[i]);
275  exit(1);
276  }
277  }
278  }
279  SetDoubleFlagValue(lhs.string(), double_val);
280  continue;
281  }
282 
283  // Bool flag. Allow input forms --flag (equivalent to --flag=true),
284  // --flag=false, --flag=true, --flag=0 and --flag=1
285  bool bool_val;
286  if (BoolFlagExists(lhs.string(), &bool_val)) {
287  if (rhs == nullptr) {
288  // --flag form
289  bool_val = true;
290  } else {
291  if (!strlen(rhs)) {
292  // Bad input of the format --bool_flag=
293  tprintf("ERROR: Bad argument: %s\n", (*argv)[i]);
294  exit(1);
295  }
296  if (!strcmp(rhs, "false") || !strcmp(rhs, "0")) {
297  bool_val = false;
298  } else if (!strcmp(rhs, "true") || !strcmp(rhs, "1")) {
299  bool_val = true;
300  } else {
301  tprintf("ERROR: Could not parse bool from flag %s\n", (*argv)[i]);
302  exit(1);
303  }
304  }
305  SetBoolFlagValue(lhs.string(), bool_val);
306  continue;
307  }
308 
309  // string flag
310  const char* string_val;
311  if (StringFlagExists(lhs.string(), &string_val)) {
312  if (rhs != nullptr) {
313  string_val = rhs;
314  } else {
315  // Pick the next argument
316  if (i + 1 >= *argc) {
317  tprintf("ERROR: Could not find string value for flag %s\n",
318  lhs.string());
319  exit(1);
320  } else {
321  string_val = (*argv)[++i];
322  }
323  }
324  SetStringFlagValue(lhs.string(), string_val);
325  continue;
326  }
327 
328  // Flag was not found. Exit with an error message.
329  tprintf("ERROR: Non-existent flag %s\n", (*argv)[i]);
330  exit(1);
331  } // for each argv
332  if (remove_flags) {
333  (*argv)[i - 1] = (*argv)[0];
334  (*argv) += (i - 1);
335  (*argc) -= (i - 1);
336  }
337 }

◆ PrepareDistortedPix()

Pix * tesseract::PrepareDistortedPix ( const Pix *  pix,
bool  perspective,
bool  invert,
bool  white_noise,
bool  smooth_noise,
bool  blur,
int  box_reduction,
TRand randomizer,
GenericVector< TBOX > *  boxes 
)

Definition at line 197 of file degradeimage.cpp.

198  {
199  Pix* blurred = pixBlockconv(distorted, 1, 1);
200  pixDestroy(&distorted);
201  distorted = blurred;
202  }
203  if (perspective)
204  GeneratePerspectiveDistortion(0, 0, randomizer, &distorted, boxes);
205  if (boxes != nullptr) {
206  for (int b = 0; b < boxes->size(); ++b) {
207  (*boxes)[b].scale(1.0f / box_reduction);
208  if ((*boxes)[b].width() <= 0)
209  (*boxes)[b].set_right((*boxes)[b].left() + 1);
210  }
211  }
212  if (invert && randomizer->SignedRand(1.0) < -0)
213  pixInvert(distorted, distorted);
214  return distorted;
215 }
216 
217 // Distorts anything that has a non-null pointer with the same pseudo-random
218 // perspective distortion. Width and height only need to be set if there
219 // is no pix. If there is a pix, then they will be taken from there.
220 void GeneratePerspectiveDistortion(int width, int height, TRand* randomizer,
221  Pix** pix, GenericVector<TBOX>* boxes) {
222  if (pix != nullptr && *pix != nullptr) {
223  width = pixGetWidth(*pix);
224  height = pixGetHeight(*pix);
225  }
226  float* im_coeffs = nullptr;
227  float* box_coeffs = nullptr;
228  l_int32 incolor =
229  ProjectiveCoeffs(width, height, randomizer, &im_coeffs, &box_coeffs);
230  if (pix != nullptr && *pix != nullptr) {
231  // Transform the image.
232  Pix* transformed = pixProjective(*pix, im_coeffs, incolor);
233  if (transformed == nullptr) {

◆ ProjectiveCoeffs()

int tesseract::ProjectiveCoeffs ( int  width,
int  height,
TRand randomizer,
float **  im_coeffs,
float **  box_coeffs 
)

Definition at line 284 of file degradeimage.cpp.

287  {
288  factors[i] = fabs(randomizer->SignedRand(1.0));
289  if (i <= FN_Y3)
290  factors[i] *= 5.0 / 8.0;
291  else
292  factors[i] *= 0.5;
293  factors[i] *= factors[i];
294  }
295  }
296  // Setup "to" points.
297  Pta* dest_pts = ptaCreate(4);
298  ptaAddPt(dest_pts, factors[FN_X0] * width, factors[FN_Y0] * height);
299  ptaAddPt(dest_pts, (1.0f - factors[FN_X1]) * width, factors[FN_Y1] * height);
300  ptaAddPt(dest_pts, (1.0f - factors[FN_X1] + shear) * width,
301  (1 - factors[FN_Y2]) * height);
302  ptaAddPt(dest_pts, (factors[FN_X0] + shear) * width,
303  (1 - factors[FN_Y3]) * height);
304  getProjectiveXformCoeffs(dest_pts, src_pts, im_coeffs);
305  getProjectiveXformCoeffs(src_pts, dest_pts, box_coeffs);
306  ptaDestroy(&src_pts);
307  ptaDestroy(&dest_pts);
308  return factors[FN_INCOLOR] > 0.5f ? L_BRING_IN_WHITE : L_BRING_IN_BLACK;
309 }
310 
311 } // namespace tesseract

◆ PSM_BLOCK_FIND_ENABLED()

bool tesseract::PSM_BLOCK_FIND_ENABLED ( int  pageseg_mode)
inline

Definition at line 203 of file publictypes.h.

203  {
204  return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_COLUMN;
205 }

◆ PSM_COL_FIND_ENABLED()

bool tesseract::PSM_COL_FIND_ENABLED ( int  pageseg_mode)
inline

Definition at line 197 of file publictypes.h.

197  {
198  return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_AUTO;
199 }

◆ PSM_LINE_FIND_ENABLED()

bool tesseract::PSM_LINE_FIND_ENABLED ( int  pageseg_mode)
inline

Definition at line 206 of file publictypes.h.

206  {
207  return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_BLOCK;
208 }

◆ PSM_ORIENTATION_ENABLED()

bool tesseract::PSM_ORIENTATION_ENABLED ( int  pageseg_mode)
inline

Definition at line 194 of file publictypes.h.

194  {
195  return pageseg_mode <= PSM_AUTO || pageseg_mode == PSM_SPARSE_TEXT_OSD;
196 }

◆ PSM_OSD_ENABLED()

bool tesseract::PSM_OSD_ENABLED ( int  pageseg_mode)
inline

Inline functions that act on a PageSegMode to determine whether components of layout analysis are enabled. Depend critically on the order of elements of PageSegMode. NOTE that arg is an int for compatibility with INT_PARAM.

Definition at line 191 of file publictypes.h.

191  {
192  return pageseg_mode <= PSM_AUTO_OSD || pageseg_mode == PSM_SPARSE_TEXT_OSD;
193 }

◆ PSM_SPARSE()

bool tesseract::PSM_SPARSE ( int  pageseg_mode)
inline

Definition at line 200 of file publictypes.h.

200  {
201  return pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD;
202 }

◆ PSM_WORD_FIND_ENABLED()

bool tesseract::PSM_WORD_FIND_ENABLED ( int  pageseg_mode)
inline

Definition at line 209 of file publictypes.h.

209  {
210  return (pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_LINE) ||
211  pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD;
212 }

◆ read_info()

bool tesseract::read_info ( TFile f,
FontInfo fi 
)

Definition at line 153 of file fontinfo.cpp.

153  {
154  uint32_t size;
155  if (!f->DeSerialize(&size)) return false;
156  char* font_name = new char[size + 1];
157  fi->name = font_name;
158  if (!f->DeSerialize(font_name, size)) return false;
159  font_name[size] = '\0';
160  return f->DeSerialize(&fi->properties);
161 }

◆ read_set()

bool tesseract::read_set ( TFile f,
FontSet fs 
)

Definition at line 226 of file fontinfo.cpp.

226  {
227  if (!f->DeSerialize(&fs->size)) return false;
228  fs->configs = new int[fs->size];
229  return f->DeSerialize(&fs->configs[0], fs->size);
230 }

◆ read_spacing_info()

bool tesseract::read_spacing_info ( TFile f,
FontInfo fi 
)

Definition at line 170 of file fontinfo.cpp.

170  {
171  int32_t vec_size, kern_size;
172  if (!f->DeSerialize(&vec_size)) return false;
173  ASSERT_HOST(vec_size >= 0);
174  if (vec_size == 0) return true;
175  fi->init_spacing(vec_size);
176  for (int i = 0; i < vec_size; ++i) {
177  auto *fs = new FontSpacingInfo();
178  if (!f->DeSerialize(&fs->x_gap_before) ||
179  !f->DeSerialize(&fs->x_gap_after) ||
180  !f->DeSerialize(&kern_size)) {
181  delete fs;
182  return false;
183  }
184  if (kern_size < 0) { // indication of a nullptr entry in fi->spacing_vec
185  delete fs;
186  continue;
187  }
188  if (kern_size > 0 && (!fs->kerned_unichar_ids.DeSerialize(f) ||
189  !fs->kerned_x_gaps.DeSerialize(f))) {
190  delete fs;
191  return false;
192  }
193  fi->add_spacing(i, fs);
194  }
195  return true;
196 }

◆ ReadFile()

STRING tesseract::ReadFile ( const std::string &  filename,
FileReader  reader 
)

Definition at line 57 of file lang_model_helpers.cpp.

57  {
58  if (filename.empty()) return STRING();
60  bool read_result;
61  if (reader == nullptr)
62  read_result = LoadDataFromFile(filename.c_str(), &data);
63  else
64  read_result = (*reader)(filename.c_str(), &data);
65  if (read_result) return STRING(&data[0], data.size());
66  tprintf("Failed to read data from: %s\n", filename.c_str());
67  return STRING();
68 }

◆ ReCachePagesFunc()

void* tesseract::ReCachePagesFunc ( void *  data)

Definition at line 377 of file imagedata.cpp.

377  {
378  auto* document_data = static_cast<DocumentData*>(data);
379  document_data->ReCachePages();
380  return nullptr;
381 }

◆ RecomputeMarginsAndClearHypotheses()

void tesseract::RecomputeMarginsAndClearHypotheses ( GenericVector< RowScratchRegisters > *  rows,
int  start,
int  end,
int  percentile 
)

Definition at line 1584 of file paragraphs.cpp.

1587  {
1588  RowScratchRegisters &sr = (*rows)[i];
1589  if (sr.ri_->num_words == 0)
1590  continue;
1591  lefts.add(sr.lmargin_ + sr.lindent_, 1);
1592  rights.add(sr.rmargin_ + sr.rindent_, 1);
1593  }
1594  int ignorable_left = lefts.ile(ClipToRange(percentile, 0, 100) / 100.0);
1595  int ignorable_right = rights.ile(ClipToRange(percentile, 0, 100) / 100.0);
1596  for (int i = start; i < end; i++) {
1597  RowScratchRegisters &sr = (*rows)[i];
1598  int ldelta = ignorable_left - sr.lmargin_;
1599  sr.lmargin_ += ldelta;
1600  sr.lindent_ -= ldelta;
1601  int rdelta = ignorable_right - sr.rmargin_;
1602  sr.rmargin_ += rdelta;
1603  sr.rindent_ -= rdelta;
1604  }
1605 }
1606 
1607 // Return the median inter-word space in rows[row_start, row_end).
1609  int row_start, int row_end) {
1610  if (row_end < row_start + 1) return 1;
1611  int word_height = (rows[row_start].ri_->lword_box.height() +
1612  rows[row_end - 1].ri_->lword_box.height()) / 2;
1613  int word_width = (rows[row_start].ri_->lword_box.width() +
1614  rows[row_end - 1].ri_->lword_box.width()) / 2;
1615  STATS spacing_widths(0, 5 + word_width);
1616  for (int i = row_start; i < row_end; i++) {
1617  if (rows[i].ri_->num_words > 1) {
1618  spacing_widths.add(rows[i].ri_->average_interword_space, 1);
1619  }
1620  }
1621  int minimum_reasonable_space = word_height / 3;

◆ RightWordAttributes()

void tesseract::RightWordAttributes ( const UNICHARSET unicharset,
const WERD_CHOICE werd,
const STRING utf8,
bool *  is_list,
bool *  starts_idea,
bool *  ends_idea 
)

Definition at line 471 of file paragraphs.cpp.

472  {
473  *ends_idea = true;
474  }
475  } else { // Assume utf8 is mostly ASCII
476  if (AsciiLikelyListItem(utf8)) {
477  *is_list = true;
478  *starts_idea = true;
479  }
480  int last_letter = utf8[utf8.size() - 1];
481  if (IsOpeningPunct(last_letter) || IsTerminalPunct(last_letter)) {
482  *ends_idea = true;
483  }
484  }
485 }
486 
487 // =============== Implementation of RowScratchRegisters =====================
488 /* static */
489 void RowScratchRegisters::AppendDebugHeaderFields(
490  GenericVector<STRING> *header) {
491  header->push_back("[lmarg,lind;rind,rmarg]");
492  header->push_back("model");
493 }
494 
495 void RowScratchRegisters::AppendDebugInfo(const ParagraphTheory &theory,
496  GenericVector<STRING> *dbg) const {
497  char s[30];
498  snprintf(s, sizeof(s), "[%3d,%3d;%3d,%3d]",
499  lmargin_, lindent_, rindent_, rmargin_);
500  dbg->push_back(s);
501  STRING model_string;

◆ RowsFitModel()

bool tesseract::RowsFitModel ( const GenericVector< RowScratchRegisters > *  rows,
int  start,
int  end,
const ParagraphModel model 
)

Definition at line 1827 of file paragraphs.cpp.

1834  {
1835  // Record patently obvious body text.
1836  for (int i = row_start + 1; i < row_end; i++) {

◆ SaveDataToFile()

bool tesseract::SaveDataToFile ( const GenericVector< char > &  data,
const STRING filename 
)
inline

Definition at line 401 of file genericvector.h.

402  {
403  FILE* fp = fopen(filename.string(), "wb");
404  if (fp == nullptr) {
405  return false;
406  }
407  bool result =
408  static_cast<int>(fwrite(&data[0], 1, data.size(), fp)) == data.size();
409  fclose(fp);
410  return result;
411 }

◆ ScriptPosToString()

const char * tesseract::ScriptPosToString ( enum ScriptPos  script_pos)

Definition at line 204 of file ratngs.cpp.

204  {
205  switch (script_pos) {
206  case SP_NORMAL: return "NORM";
207  case SP_SUBSCRIPT: return "SUB";
208  case SP_SUPERSCRIPT: return "SUPER";
209  case SP_DROPCAP: return "DROPC";
210  }
211  return "SP_UNKNOWN";
212 }

◆ Serialize() [1/8]

bool tesseract::Serialize ( FILE *  fp,
const char *  data,
size_t  n 
)

Definition at line 77 of file serialis.cpp.

80  {

◆ Serialize() [2/8]

bool tesseract::Serialize ( FILE *  fp,
const float *  data,
size_t  n 
)

Definition at line 81 of file serialis.cpp.

84  {

◆ Serialize() [3/8]

bool tesseract::Serialize ( FILE *  fp,
const int16_t *  data,
size_t  n 
)

Definition at line 89 of file serialis.cpp.

◆ Serialize() [4/8]

bool tesseract::Serialize ( FILE *  fp,
const int32_t *  data,
size_t  n 
)

Definition at line 93 of file serialis.cpp.

93  : offset_(0),
94  data_(nullptr),
95  data_is_owned_(false),

◆ Serialize() [5/8]

bool tesseract::Serialize ( FILE *  fp,
const int8_t *  data,
size_t  n 
)

Definition at line 85 of file serialis.cpp.

88  {

◆ Serialize() [6/8]

bool tesseract::Serialize ( FILE *  fp,
const uint16_t *  data,
size_t  n 
)

Definition at line 101 of file serialis.cpp.

◆ Serialize() [7/8]

bool tesseract::Serialize ( FILE *  fp,
const uint32_t *  data,
size_t  n 
)

Definition at line 105 of file serialis.cpp.

◆ Serialize() [8/8]

bool tesseract::Serialize ( FILE *  fp,
const uint8_t *  data,
size_t  n 
)

Definition at line 97 of file serialis.cpp.

97  {}
98 
99 TFile::~TFile() {

◆ SetBlobStrokeWidth()

void tesseract::SetBlobStrokeWidth ( Pix *  pix,
BLOBNBOX blob 
)

Definition at line 69 of file tordmain.cpp.

69  {
70  // Cut the blob rectangle into a Pix.
71  int pix_height = pixGetHeight(pix);
72  const TBOX& box = blob->bounding_box();
73  int width = box.width();
74  int height = box.height();
75  Box* blob_pix_box = boxCreate(box.left(), pix_height - box.top(),
76  width, height);
77  Pix* pix_blob = pixClipRectangle(pix, blob_pix_box, nullptr);
78  boxDestroy(&blob_pix_box);
79  Pix* dist_pix = pixDistanceFunction(pix_blob, 4, 8, L_BOUNDARY_BG);
80  pixDestroy(&pix_blob);
81  // Compute the stroke widths.
82  uint32_t* data = pixGetData(dist_pix);
83  int wpl = pixGetWpl(dist_pix);
84  // Horizontal width of stroke.
85  STATS h_stats(0, width + 1);
86  for (int y = 0; y < height; ++y) {
87  uint32_t* pixels = data + y*wpl;
88  int prev_pixel = 0;
89  int pixel = GET_DATA_BYTE(pixels, 0);
90  for (int x = 1; x < width; ++x) {
91  int next_pixel = GET_DATA_BYTE(pixels, x);
92  // We are looking for a pixel that is equal to its vertical neighbours,
93  // yet greater than its left neighbour.
94  if (prev_pixel < pixel &&
95  (y == 0 || pixel == GET_DATA_BYTE(pixels - wpl, x - 1)) &&
96  (y == height - 1 || pixel == GET_DATA_BYTE(pixels + wpl, x - 1))) {
97  if (pixel > next_pixel) {
98  // Single local max, so an odd width.
99  h_stats.add(pixel * 2 - 1, 1);
100  } else if (pixel == next_pixel && x + 1 < width &&
101  pixel > GET_DATA_BYTE(pixels, x + 1)) {
102  // Double local max, so an even width.
103  h_stats.add(pixel * 2, 1);
104  }
105  }
106  prev_pixel = pixel;
107  pixel = next_pixel;
108  }
109  }
110  // Vertical width of stroke.
111  STATS v_stats(0, height + 1);
112  for (int x = 0; x < width; ++x) {
113  int prev_pixel = 0;
114  int pixel = GET_DATA_BYTE(data, x);
115  for (int y = 1; y < height; ++y) {
116  uint32_t* pixels = data + y*wpl;
117  int next_pixel = GET_DATA_BYTE(pixels, x);
118  // We are looking for a pixel that is equal to its horizontal neighbours,
119  // yet greater than its upper neighbour.
120  if (prev_pixel < pixel &&
121  (x == 0 || pixel == GET_DATA_BYTE(pixels - wpl, x - 1)) &&
122  (x == width - 1 || pixel == GET_DATA_BYTE(pixels - wpl, x + 1))) {
123  if (pixel > next_pixel) {
124  // Single local max, so an odd width.
125  v_stats.add(pixel * 2 - 1, 1);
126  } else if (pixel == next_pixel && y + 1 < height &&
127  pixel > GET_DATA_BYTE(pixels + wpl, x)) {
128  // Double local max, so an even width.
129  v_stats.add(pixel * 2, 1);
130  }
131  }
132  prev_pixel = pixel;
133  pixel = next_pixel;
134  }
135  }
136  pixDestroy(&dist_pix);
137  // Store the horizontal and vertical width in the blob, keeping both
138  // widths if there is enough information, otherwise only the one with
139  // the most samples.
140  // If there are insufficient samples, store zero, rather than using
141  // 2*area/perimeter, as the numbers that gives do not match the numbers
142  // from the distance method.
143  if (h_stats.get_total() >= (width + height) / 4) {
144  blob->set_horz_stroke_width(h_stats.ile(0.5f));
145  if (v_stats.get_total() >= (width + height) / 4)
146  blob->set_vert_stroke_width(v_stats.ile(0.5f));
147  else
148  blob->set_vert_stroke_width(0.0f);
149  } else {
150  if (v_stats.get_total() >= (width + height) / 4 ||
151  v_stats.get_total() > h_stats.get_total()) {
152  blob->set_horz_stroke_width(0.0f);
153  blob->set_vert_stroke_width(v_stats.ile(0.5f));
154  } else {
155  blob->set_horz_stroke_width(h_stats.get_total() > 2 ? h_stats.ile(0.5f)
156  : 0.0f);
157  blob->set_vert_stroke_width(0.0f);
158  }
159  }
160 }

◆ SetPropertiesForInputFile()

void tesseract::SetPropertiesForInputFile ( const std::string &  script_dir,
const std::string &  input_unicharset_file,
const std::string &  output_unicharset_file,
const std::string &  output_xheights_file 
)

Definition at line 183 of file unicharset_training_utils.cpp.

186  {
187  UNICHARSET unicharset;
188 
189  // Load the input unicharset
190  unicharset.load_from_file(input_unicharset_file.c_str());
191  tprintf("Loaded unicharset of size %d from file %s\n", unicharset.size(),
192  input_unicharset_file.c_str());
193 
194  // Set unichar properties
195  tprintf("Setting unichar properties\n");
196  SetupBasicProperties(true, false, &unicharset);
197  tprintf("Setting script properties\n");
198  SetScriptProperties(script_dir, &unicharset);
199  if (!output_xheights_file.empty()) {
200  std::string xheights_str = GetXheightString(script_dir, unicharset);
201  File::WriteStringToFileOrDie(xheights_str, output_xheights_file);
202  }
203 
204  // Write the output unicharset
205  tprintf("Writing unicharset to file %s\n", output_unicharset_file.c_str());
206  unicharset.save_to_file(output_unicharset_file.c_str());
207 }

◆ SetScriptProperties()

void tesseract::SetScriptProperties ( const std::string &  script_dir,
UNICHARSET unicharset 
)

Definition at line 143 of file unicharset_training_utils.cpp.

143  {
144  for (int s = 0; s < unicharset->get_script_table_size(); ++s) {
145  // Load the unicharset for the script if available.
146  std::string filename = script_dir + "/" +
147  unicharset->get_script_from_script_id(s) + ".unicharset";
148  UNICHARSET script_set;
149  if (script_set.load_from_file(filename.c_str())) {
150  unicharset->SetPropertiesFromOther(script_set);
151  } else if (s != unicharset->common_sid() && s != unicharset->null_sid()) {
152  tprintf("Failed to load script unicharset from:%s\n", filename.c_str());
153  }
154  }
155  for (int c = SPECIAL_UNICHAR_CODES_COUNT; c < unicharset->size(); ++c) {
156  if (unicharset->PropertiesIncomplete(c)) {
157  tprintf("Warning: properties incomplete for index %d = %s\n", c,
158  unicharset->id_to_unichar(c));
159  }
160  }
161 }

◆ SetupBasicProperties() [1/2]

void tesseract::SetupBasicProperties ( bool  report_errors,
bool  decompose,
UNICHARSET unicharset 
)

Definition at line 40 of file unicharset_training_utils.cpp.

41  {
42  for (int unichar_id = 0; unichar_id < unicharset->size(); ++unichar_id) {
43  // Convert any custom ligatures.
44  const char* unichar_str = unicharset->id_to_unichar(unichar_id);
45  for (int i = 0; UNICHARSET::kCustomLigatures[i][0] != nullptr; ++i) {
46  if (!strcmp(UNICHARSET::kCustomLigatures[i][1], unichar_str)) {
47  unichar_str = UNICHARSET::kCustomLigatures[i][0];
48  break;
49  }
50  }
51 
52  // Convert the unichar to UTF32 representation
53  std::vector<char32> uni_vector = UNICHAR::UTF8ToUTF32(unichar_str);
54 
55  // Assume that if the property is true for any character in the string,
56  // then it holds for the whole "character".
57  bool unichar_isalpha = false;
58  bool unichar_islower = false;
59  bool unichar_isupper = false;
60  bool unichar_isdigit = false;
61  bool unichar_ispunct = false;
62 
63  for (char32 u_ch : uni_vector) {
64  if (u_isalpha(u_ch)) unichar_isalpha = true;
65  if (u_islower(u_ch)) unichar_islower = true;
66  if (u_isupper(u_ch)) unichar_isupper = true;
67  if (u_isdigit(u_ch)) unichar_isdigit = true;
68  if (u_ispunct(u_ch)) unichar_ispunct = true;
69  }
70 
71  unicharset->set_isalpha(unichar_id, unichar_isalpha);
72  unicharset->set_islower(unichar_id, unichar_islower);
73  unicharset->set_isupper(unichar_id, unichar_isupper);
74  unicharset->set_isdigit(unichar_id, unichar_isdigit);
75  unicharset->set_ispunctuation(unichar_id, unichar_ispunct);
76 
78  unicharset->set_script(unichar_id, uscript_getName(
79  uscript_getScript(uni_vector[0], err)));
80 
81  const int num_code_points = uni_vector.size();
82  // Obtain the lower/upper case if needed and record it in the properties.
83  unicharset->set_other_case(unichar_id, unichar_id);
84  if (unichar_islower || unichar_isupper) {
85  std::vector<char32> other_case(num_code_points, 0);
86  for (int i = 0; i < num_code_points; ++i) {
87  // TODO(daria): Ideally u_strToLower()/ustrToUpper() should be used.
88  // However since they deal with UChars (so need a conversion function
89  // from char32 or UTF8string) and require a meaningful locale string,
90  // for now u_tolower()/u_toupper() are used.
91  other_case[i] = unichar_islower ? u_toupper(uni_vector[i]) :
92  u_tolower(uni_vector[i]);
93  }
94  std::string other_case_uch = UNICHAR::UTF32ToUTF8(other_case);
95  UNICHAR_ID other_case_id =
96  unicharset->unichar_to_id(other_case_uch.c_str());
97  if (other_case_id != INVALID_UNICHAR_ID) {
98  unicharset->set_other_case(unichar_id, other_case_id);
99  } else if (unichar_id >= SPECIAL_UNICHAR_CODES_COUNT && report_errors) {
100  tprintf("Other case %s of %s is not in unicharset\n",
101  other_case_uch.c_str(), unichar_str);
102  }
103  }
104 
105  // Set RTL property and obtain mirror unichar ID from ICU.
106  std::vector<char32> mirrors(num_code_points, 0);
107  for (int i = 0; i < num_code_points; ++i) {
108  mirrors[i] = u_charMirror(uni_vector[i]);
109  if (i == 0) { // set directionality to that of the 1st code point
110  unicharset->set_direction(unichar_id,
111  static_cast<UNICHARSET::Direction>(
112  u_charDirection(uni_vector[i])));
113  }
114  }
115  std::string mirror_uch = UNICHAR::UTF32ToUTF8(mirrors);
116  UNICHAR_ID mirror_uch_id = unicharset->unichar_to_id(mirror_uch.c_str());
117  if (mirror_uch_id != INVALID_UNICHAR_ID) {
118  unicharset->set_mirror(unichar_id, mirror_uch_id);
119  } else if (report_errors) {
120  tprintf("Mirror %s of %s is not in unicharset\n",
121  mirror_uch.c_str(), unichar_str);
122  }
123 
124  // Record normalized version of this unichar.
125  std::string normed_str;
126  if (unichar_id != 0 &&
131  unichar_str, &normed_str) &&
132  !normed_str.empty()) {
133  unicharset->set_normed(unichar_id, normed_str.c_str());
134  } else {
135  unicharset->set_normed(unichar_id, unichar_str);
136  }
137  ASSERT_HOST(unicharset->get_other_case(unichar_id) < unicharset->size());
138  }
139  unicharset->post_load_setup();
140 }

◆ SetupBasicProperties() [2/2]

void tesseract::SetupBasicProperties ( bool  report_errors,
UNICHARSET unicharset 
)
inline

Definition at line 38 of file unicharset_training_utils.h.

38  {
39  SetupBasicProperties(report_errors, false, unicharset);
40 }

◆ SoftmaxInPlace()

template<typename T >
void tesseract::SoftmaxInPlace ( int  n,
T *  inout 
)
inline

Definition at line 146 of file functions.h.

146  {
147  if (n <= 0) return;
148  // A limit on the negative range input to exp to guarantee non-zero output.
149  const T kMaxSoftmaxActivation = 86.0f;
150 
151  T max_output = inout[0];
152  for (int i = 1; i < n; i++) {
153  T output = inout[i];
154  if (output > max_output) max_output = output;
155  }
156  T prob_total = 0.0;
157  for (int i = 0; i < n; i++) {
158  T prob = inout[i] - max_output;
159  prob = exp(ClipToRange(prob, -kMaxSoftmaxActivation, static_cast<T>(0)));
160  prob_total += prob;
161  inout[i] = prob;
162  }
163  if (prob_total > 0.0) {
164  for (int i = 0; i < n; i++) inout[i] /= prob_total;
165  }
166 }

◆ sort_cmp()

template<typename T >
int tesseract::sort_cmp ( const void *  t1,
const void *  t2 
)

Definition at line 423 of file genericvector.h.

423  {
424  const T* a = static_cast<const T*>(t1);
425  const T* b = static_cast<const T*>(t2);
426  if (*a < *b) {
427  return -1;
428  }
429  if (*b < *a) {
430  return 1;
431  }
432  return 0;
433 }

◆ sort_ptr_cmp()

template<typename T >
int tesseract::sort_ptr_cmp ( const void *  t1,
const void *  t2 
)

Definition at line 440 of file genericvector.h.

440  {
441  const T* a = *static_cast<T* const*>(t1);
442  const T* b = *static_cast<T* const*>(t2);
443  if (*a < *b) {
444  return -1;
445  }
446  if (*b < *a) {
447  return 1;
448  }
449  return 0;
450 }

◆ SortByBoxBottom()

template<class BBC >
int tesseract::SortByBoxBottom ( const void *  void1,
const void *  void2 
)

Definition at line 407 of file bbgrid.h.

407  {
408  // The void*s are actually doubly indirected, so get rid of one level.
409  const BBC* p1 = *static_cast<const BBC* const*>(void1);
410  const BBC* p2 = *static_cast<const BBC* const*>(void2);
411  int result = p1->bounding_box().bottom() - p2->bounding_box().bottom();
412  if (result != 0)
413  return result;
414  result = p1->bounding_box().top() - p2->bounding_box().top();
415  if (result != 0)
416  return result;
417  result = p1->bounding_box().left() - p2->bounding_box().left();
418  if (result != 0)
419  return result;
420  return p1->bounding_box().right() - p2->bounding_box().right();
421 }

◆ SortByBoxLeft()

template<class BBC >
int tesseract::SortByBoxLeft ( const void *  void1,
const void *  void2 
)

Definition at line 371 of file bbgrid.h.

371  {
372  // The void*s are actually doubly indirected, so get rid of one level.
373  const BBC* p1 = *static_cast<const BBC* const*>(void1);
374  const BBC* p2 = *static_cast<const BBC* const*>(void2);
375  int result = p1->bounding_box().left() - p2->bounding_box().left();
376  if (result != 0)
377  return result;
378  result = p1->bounding_box().right() - p2->bounding_box().right();
379  if (result != 0)
380  return result;
381  result = p1->bounding_box().bottom() - p2->bounding_box().bottom();
382  if (result != 0)
383  return result;
384  return p1->bounding_box().top() - p2->bounding_box().top();
385 }

◆ SortByRating()

template<class BLOB_CHOICE >
int tesseract::SortByRating ( const void *  void1,
const void *  void2 
)

Definition at line 85 of file pieces.cpp.

◆ SortByUnicharID()

template<class BLOB_CHOICE >
int tesseract::SortByUnicharID ( const void *  void1,
const void *  void2 
)

Definition at line 77 of file pieces.cpp.

79  {
80  const BLOB_CHOICE *p1 = *static_cast<const BLOB_CHOICE *const *>(void1);
81  const BLOB_CHOICE *p2 = *static_cast<const BLOB_CHOICE *const *>(void2);
82 

◆ SortRightToLeft()

template<class BBC >
int tesseract::SortRightToLeft ( const void *  void1,
const void *  void2 
)

Definition at line 389 of file bbgrid.h.

389  {
390  // The void*s are actually doubly indirected, so get rid of one level.
391  const BBC* p1 = *static_cast<const BBC* const*>(void1);
392  const BBC* p2 = *static_cast<const BBC* const*>(void2);
393  int result = p2->bounding_box().right() - p1->bounding_box().right();
394  if (result != 0)
395  return result;
396  result = p2->bounding_box().left() - p1->bounding_box().left();
397  if (result != 0)
398  return result;
399  result = p1->bounding_box().bottom() - p2->bounding_box().bottom();
400  if (result != 0)
401  return result;
402  return p1->bounding_box().top() - p2->bounding_box().top();
403 }

◆ SpanUTF8NotWhitespace()

unsigned int tesseract::SpanUTF8NotWhitespace ( const char *  text)

Definition at line 261 of file normstrngs.cpp.

276  {

◆ SpanUTF8Whitespace()

unsigned int tesseract::SpanUTF8Whitespace ( const char *  text)

Definition at line 251 of file normstrngs.cpp.

253  {
254  return IsValidCodepoint(ch) &&
255  !(ch >= 0xFDD0 && ch <= 0xFDEF) && // Noncharacters.
256  !(ch >= 0xFFFE && ch <= 0xFFFF) && !(ch >= 0x1FFFE && ch <= 0x1FFFF) &&
257  !(ch >= 0x2FFFE && ch <= 0x2FFFF) &&
258  !(ch >= 0x3FFFE && ch <= 0x3FFFF) &&
259  !(ch >= 0x4FFFE && ch <= 0x4FFFF) &&

◆ StrongModel()

bool tesseract::StrongModel ( const ParagraphModel model)
inline

Definition at line 71 of file paragraphs_internal.h.

71  {
72  return model != nullptr && model != kCrownLeft && model != kCrownRight;
73 }

◆ SumVectors()

void tesseract::SumVectors ( int  n,
const double *  v1,
const double *  v2,
const double *  v3,
const double *  v4,
const double *  v5,
double *  sum 
)
inline

Definition at line 192 of file functions.h.

194  {
195  for (int i = 0; i < n; ++i) {
196  sum[i] = v1[i] + v2[i] + v3[i] + v4[i] + v5[i];
197  }
198 }

◆ Tanh()

double tesseract::Tanh ( double  x)
inline

Definition at line 43 of file functions.h.

43  {
44  if (x < 0.0) return -Tanh(-x);
45  x *= kScaleFactor;
46  unsigned index = static_cast<unsigned>(x);
47  if (index >= (kTableSize - 1)) return 1.0;
48  double tanh_i0 = TanhTable[index];
49  double tanh_i1 = TanhTable[index + 1];
50  // Linear interpolation.
51  return tanh_i0 + (tanh_i1 - tanh_i0) * (x - index);
52 }

◆ TraceBlockOnReducedPix()

Pix * tesseract::TraceBlockOnReducedPix ( BLOCK block,
int  gridsize,
ICOORD  bleft,
int *  left,
int *  bottom 
)

Definition at line 254 of file bbgrid.cpp.

255  {
256  const TBOX& box = block->pdblk.bounding_box();
257  Pix* pix = GridReducedPix(box, gridsize, bleft, left, bottom);
258  int wpl = pixGetWpl(pix);
259  l_uint32* data = pixGetData(pix);
260  ICOORDELT_IT it(block->pdblk.poly_block()->points());
261  for (it.mark_cycle_pt(); !it.cycled_list();) {
262  ICOORD pos = *it.data();
263  it.forward();
264  ICOORD next_pos = *it.data();
265  ICOORD line_vector = next_pos - pos;
266  int major, minor;
267  ICOORD major_step, minor_step;
268  line_vector.setup_render(&major_step, &minor_step, &major, &minor);
269  int accumulator = major / 2;
270  while (pos != next_pos) {
271  int grid_x = (pos.x() - bleft.x()) / gridsize - *left;
272  int grid_y = (pos.y() - bleft.y()) / gridsize - *bottom;
273  SET_DATA_BIT(data + grid_y * wpl, grid_x);
274  pos += major_step;
275  accumulator += minor;
276  if (accumulator >= major) {
277  accumulator -= major;
278  pos += minor_step;
279  }
280  }
281  }
282  return pix;
283 }

◆ TraceOutlineOnReducedPix()

Pix * tesseract::TraceOutlineOnReducedPix ( C_OUTLINE outline,
int  gridsize,
ICOORD  bleft,
int *  left,
int *  bottom 
)

Definition at line 228 of file bbgrid.cpp.

229  {
230  const TBOX& box = outline->bounding_box();
231  Pix* pix = GridReducedPix(box, gridsize, bleft, left, bottom);
232  int wpl = pixGetWpl(pix);
233  l_uint32* data = pixGetData(pix);
234  int length = outline->pathlength();
235  ICOORD pos = outline->start_pos();
236  for (int i = 0; i < length; ++i) {
237  int grid_x = (pos.x() - bleft.x()) / gridsize - *left;
238  int grid_y = (pos.y() - bleft.y()) / gridsize - *bottom;
239  SET_DATA_BIT(data + grid_y * wpl, grid_x);
240  pos += outline->step(i);
241  }
242  return pix;
243 }

◆ UnicodeFor()

int tesseract::UnicodeFor ( const UNICHARSET u,
const WERD_CHOICE werd,
int  pos 
)

Definition at line 304 of file paragraphs.cpp.

310  :
311  const UNICHARSET *u_;

◆ ValidBodyLine()

bool tesseract::ValidBodyLine ( const GenericVector< RowScratchRegisters > *  rows,
int  row,
const ParagraphModel model 
)

Definition at line 1304 of file paragraphs.cpp.

1307  {
1308  return NearlyEqual(row_a.rindent_ + row_a.rmargin_,
1309  row_b.rindent_ + row_b.rmargin_,
1310  Epsilon(row_a.ri_->average_interword_space));
1311  }
1312  return NearlyEqual(row_a.lindent_ + row_a.lmargin_,
1313  row_b.lindent_ + row_b.lmargin_,

◆ ValidFirstLine()

bool tesseract::ValidFirstLine ( const GenericVector< RowScratchRegisters > *  rows,
int  row,
const ParagraphModel model 
)

Definition at line 1293 of file paragraphs.cpp.

1300  {
1301  if (model != kCrownRight && model != kCrownLeft) {
1302  tprintf("CrownCompatible() should only be called with crown models!\n");

◆ write_info()

bool tesseract::write_info ( FILE *  f,
const FontInfo fi 
)

Definition at line 163 of file fontinfo.cpp.

163  {
164  int32_t size = strlen(fi.name);
165  return tesseract::Serialize(f, &size) &&
166  tesseract::Serialize(f, &fi.name[0], size) &&
167  tesseract::Serialize(f, &fi.properties);
168 }

◆ write_set()

bool tesseract::write_set ( FILE *  f,
const FontSet fs 
)

Definition at line 232 of file fontinfo.cpp.

232  {
233  return tesseract::Serialize(f, &fs.size) &&
234  tesseract::Serialize(f, &fs.configs[0], fs.size);
235 }

◆ write_spacing_info()

bool tesseract::write_spacing_info ( FILE *  f,
const FontInfo fi 
)

Definition at line 198 of file fontinfo.cpp.

198  {
199  int32_t vec_size = (fi.spacing_vec == nullptr) ? 0 : fi.spacing_vec->size();
200  if (!tesseract::Serialize(f, &vec_size)) return false;
201  int16_t x_gap_invalid = -1;
202  for (int i = 0; i < vec_size; ++i) {
203  FontSpacingInfo *fs = fi.spacing_vec->get(i);
204  int32_t kern_size = (fs == nullptr) ? -1 : fs->kerned_x_gaps.size();
205  if (fs == nullptr) {
206  // Writing two invalid x-gaps.
207  if (!tesseract::Serialize(f, &x_gap_invalid, 2) ||
208  !tesseract::Serialize(f, &kern_size)) {
209  return false;
210  }
211  } else {
212  if (!tesseract::Serialize(f, &fs->x_gap_before) ||
213  !tesseract::Serialize(f, &fs->x_gap_after) ||
214  !tesseract::Serialize(f, &kern_size)) {
215  return false;
216  }
217  }
218  if (kern_size > 0 && (!fs->kerned_unichar_ids.Serialize(f) ||
219  !fs->kerned_x_gaps.Serialize(f))) {
220  return false;
221  }
222  }
223  return true;
224 }

◆ WriteFile()

bool tesseract::WriteFile ( const std::string &  output_dir,
const std::string &  lang,
const std::string &  suffix,
const GenericVector< char > &  data,
FileWriter  writer 
)

Definition at line 36 of file lang_model_helpers.cpp.

38  {
39  if (lang.empty()) return true;
40  std::string dirname = output_dir + "/" + lang;
41  // Attempt to make the directory, but ignore errors, as it may not be a
42  // standard filesystem, and the writer will complain if not successful.
43 #if defined(_WIN32)
44  _mkdir(dirname.c_str());
45 #else
46  mkdir(dirname.c_str(), S_IRWXU | S_IRWXG);
47 #endif
48  std::string filename = dirname + "/" + lang + suffix;
49  if (writer == nullptr)
50  return SaveDataToFile(data, filename.c_str());
51  else
52  return (*writer)(data, filename.c_str());
53 }

◆ WriteRecoder()

bool tesseract::WriteRecoder ( const UNICHARSET unicharset,
bool  pass_through,
const std::string &  output_dir,
const std::string &  lang,
FileWriter  writer,
STRING radical_table_data,
TessdataManager traineddata 
)

Definition at line 85 of file lang_model_helpers.cpp.

88  {
89  UnicharCompress recoder;
90  // Where the unicharset is carefully setup already to contain a good
91  // compact encoding, use a pass-through recoder that does nothing.
92  // For scripts that have a large number of unicodes (Han, Hangul) we want
93  // to use the recoder to compress the symbol space by re-encoding each
94  // unicode as multiple codes from a smaller 'alphabet' that are related to the
95  // shapes in the character. Hangul Jamo is a perfect example of this.
96  // See the Hangul Syllables section, sub-section "Equivalence" in:
97  // http://www.unicode.org/versions/Unicode10.0.0/ch18.pdf
98  if (pass_through) {
99  recoder.SetupPassThrough(unicharset);
100  } else {
101  int null_char =
102  unicharset.has_special_codes() ? UNICHAR_BROKEN : unicharset.size();
103  tprintf("Null char=%d\n", null_char);
104  if (!recoder.ComputeEncoding(unicharset, null_char, radical_table_data)) {
105  tprintf("Creation of encoded unicharset failed!!\n");
106  return false;
107  }
108  }
109  TFile fp;
110  GenericVector<char> recoder_data;
111  fp.OpenWrite(&recoder_data);
112  if (!recoder.Serialize(&fp)) return false;
113  traineddata->OverwriteEntry(TESSDATA_LSTM_RECODER, &recoder_data[0],
114  recoder_data.size());
115  STRING encoding = recoder.GetEncodingAsString(unicharset);
116  recoder_data.init_to_size(encoding.length(), 0);
117  memcpy(&recoder_data[0], &encoding[0], encoding.length());
118  STRING suffix;
119  suffix.add_str_int(".charset_size=", recoder.code_range());
120  suffix += ".txt";
121  return WriteFile(output_dir, lang, suffix.string(), recoder_data, writer);
122 }

◆ WriteShapeTable()

void tesseract::WriteShapeTable ( const STRING file_prefix,
const ShapeTable shape_table 
)

Definition at line 179 of file commontraining.cpp.

179  {
180  STRING shape_table_file = file_prefix;
181  shape_table_file += kShapeTableFileSuffix;
182  FILE* fp = fopen(shape_table_file.string(), "wb");
183  if (fp != nullptr) {
184  if (!shape_table.Serialize(fp)) {
185  fprintf(stderr, "Error writing shape table: %s\n",
186  shape_table_file.string());
187  }
188  fclose(fp);
189  } else {
190  fprintf(stderr, "Error creating shape table: %s\n",
191  shape_table_file.string());
192  }
193 }

◆ WriteUnicharset()

bool tesseract::WriteUnicharset ( const UNICHARSET unicharset,
const std::string &  output_dir,
const std::string &  lang,
FileWriter  writer,
TessdataManager traineddata 
)

Definition at line 71 of file lang_model_helpers.cpp.

73  {
74  GenericVector<char> unicharset_data;
75  TFile fp;
76  fp.OpenWrite(&unicharset_data);
77  if (!unicharset.save_to_file(&fp)) return false;
78  traineddata->OverwriteEntry(TESSDATA_LSTM_UNICHARSET, &unicharset_data[0],
79  unicharset_data.size());
80  return WriteFile(output_dir, lang, ".unicharset", unicharset_data, writer);
81 }

◆ ZeroVector()

template<typename T >
void tesseract::ZeroVector ( int  n,
T *  vec 
)
inline

Definition at line 202 of file functions.h.

202  {
203  memset(vec, 0, n * sizeof(*vec));
204 }

Variable Documentation

◆ _TFNetworkModel_default_instance_

TFNetworkModelDefaultTypeInternal tesseract::_TFNetworkModel_default_instance_

Definition at line 52 of file tfnetwork.pb.h.

◆ case_state_table

const int tesseract::case_state_table[6][4]
Initial value:
= {
{
0, 1, 5, 4},
{
0, 3, 2, 4},
{
0, -1, 2, -1},
{
0, 3, -1, 4},
{
0, -1, -1, 4},
{
5, -1, 2, -1},
}

Definition at line 29 of file context.cpp.

◆ DotProduct

DotProductFunction tesseract::DotProduct

Definition at line 49 of file simddetect.cpp.

◆ kAdamCorrectionIterations

const int tesseract::kAdamCorrectionIterations = 200000

Definition at line 35 of file weightmatrix.cpp.

◆ kAdamEpsilon

const double tesseract::kAdamEpsilon = 1e-8

Definition at line 37 of file weightmatrix.cpp.

◆ kAdamFlag

const int tesseract::kAdamFlag = 4

Definition at line 165 of file weightmatrix.cpp.

◆ kAdjacentLeaderSearchPadding

const int tesseract::kAdjacentLeaderSearchPadding = 2

Definition at line 116 of file tablefind.cpp.

◆ kAlignedFraction

const double tesseract::kAlignedFraction = 0.03125

Definition at line 38 of file alignedblob.cpp.

◆ kAlignedGapFraction

const double tesseract::kAlignedGapFraction = 0.75

Definition at line 42 of file alignedblob.cpp.

◆ kAllowBlobArea

const double tesseract::kAllowBlobArea = 0.05

Definition at line 57 of file tablefind.cpp.

◆ kAllowBlobHeight

const double tesseract::kAllowBlobHeight = 0.3

Definition at line 55 of file tablefind.cpp.

◆ kAllowBlobWidth

const double tesseract::kAllowBlobWidth = 0.4

Definition at line 56 of file tablefind.cpp.

◆ kAllowTextArea

const double tesseract::kAllowTextArea = 0.8

Definition at line 50 of file tablefind.cpp.

◆ kAllowTextHeight

const double tesseract::kAllowTextHeight = 0.5

Definition at line 48 of file tablefind.cpp.

◆ kAllowTextWidth

const double tesseract::kAllowTextWidth = 0.6

Definition at line 49 of file tablefind.cpp.

◆ kBestCheckpointFraction

const double tesseract::kBestCheckpointFraction = 31.0 / 32.0

Definition at line 69 of file lstmtrainer.cpp.

◆ kBigPartSizeRatio

const double tesseract::kBigPartSizeRatio = 1.75

Definition at line 46 of file colpartitiongrid.cpp.

◆ kBoxClipTolerance

const int tesseract::kBoxClipTolerance = 2

Definition at line 31 of file boxword.cpp.

◆ kBrokenCJKIterationFraction

const double tesseract::kBrokenCJKIterationFraction = 0.125

Definition at line 67 of file strokewidth.cpp.

◆ kBytesPer64BitNumber

const int tesseract::kBytesPer64BitNumber = 20

Max bytes in the decimal representation of int64_t.

Definition at line 1504 of file baseapi.cpp.

◆ kBytesPerBoxFileLine

const int tesseract::kBytesPerBoxFileLine = (kBytesPerNumber + 1) * kNumbersPerBlob + 1

Multiplier for max expected textlength assumes (kBytesPerNumber + space)

  • kNumbersPerBlob plus the newline. Add to this the original UTF8 characters, and one kMaxBytesPerLine for safety.

Definition at line 1502 of file baseapi.cpp.

◆ kBytesPerNumber

const int tesseract::kBytesPerNumber = 5

The number of bytes taken by each number. Since we use int16_t for ICOORD, assume only 5 digits max.

Definition at line 1496 of file baseapi.cpp.

◆ kCellSplitColumnThreshold

const int tesseract::kCellSplitColumnThreshold = 0

Definition at line 42 of file tablerecog.cpp.

◆ kCellSplitRowThreshold

const int tesseract::kCellSplitRowThreshold = 0

Definition at line 41 of file tablerecog.cpp.

◆ kCertaintyScale

const float tesseract::kCertaintyScale = 7.0f

Definition at line 36 of file linerec.cpp.

◆ kCertOffset

const double tesseract::kCertOffset = -0.085

Definition at line 47 of file lstmrecognizer.cpp.

◆ kCJKAspectRatio

const double tesseract::kCJKAspectRatio = 1.25

Definition at line 61 of file strokewidth.cpp.

◆ kCJKAspectRatioIncrease

const double tesseract::kCJKAspectRatioIncrease = 1.0625

Definition at line 63 of file strokewidth.cpp.

◆ kCJKBrokenDistanceFraction

const double tesseract::kCJKBrokenDistanceFraction = 0.25

Definition at line 57 of file strokewidth.cpp.

◆ kCJKMaxComponents

const int tesseract::kCJKMaxComponents = 8

Definition at line 59 of file strokewidth.cpp.

◆ kCJKRadius

const int tesseract::kCJKRadius = 2

Definition at line 55 of file strokewidth.cpp.

◆ kColumnWidthFactor

const int tesseract::kColumnWidthFactor = 20

Pixel resolution of column width estimates.

Definition at line 42 of file tabfind.h.

◆ kCosMaxSkewAngle

const double tesseract::kCosMaxSkewAngle = 0.866025

Definition at line 60 of file tabfind.cpp.

◆ kCrackSpacing

const int tesseract::kCrackSpacing = 100

Spacing of cracks across the page to break up tall vertical lines.

Definition at line 45 of file linefind.cpp.

◆ kCrownLeft

const ParagraphModel * tesseract::kCrownLeft = reinterpret_cast<ParagraphModel *>(static_cast<uintptr_t>(0xDEAD111F))

Definition at line 69 of file paragraphs.cpp.

◆ kCrownRight

const ParagraphModel * tesseract::kCrownRight = reinterpret_cast<ParagraphModel *>(static_cast<uintptr_t>(0xDEAD888F))

Definition at line 71 of file paragraphs.cpp.

◆ kDefaultResolution

const int tesseract::kDefaultResolution = 300

Definition at line 70 of file pango_font_info.cpp.

◆ kDiacriticXPadRatio

const double tesseract::kDiacriticXPadRatio = 7.0

Definition at line 70 of file strokewidth.cpp.

◆ kDiacriticYPadRatio

const double tesseract::kDiacriticYPadRatio = 1.75

Definition at line 73 of file strokewidth.cpp.

◆ kDictRatio

const double tesseract::kDictRatio = 2.25

Definition at line 45 of file lstmrecognizer.cpp.

◆ kDoNotReverse

const char tesseract::kDoNotReverse[] = "RRP_DO_NO_REVERSE"

Definition at line 35 of file trie.cpp.

◆ kDoubleFlag

const int tesseract::kDoubleFlag = 128

Definition at line 169 of file weightmatrix.cpp.

◆ kErrClip

const double tesseract::kErrClip = 1.0f

Definition at line 72 of file lstm.cpp.

◆ kErrorGraphInterval

const int tesseract::kErrorGraphInterval = 1000

Definition at line 57 of file lstmtrainer.cpp.

◆ kExposureFactor

const int tesseract::kExposureFactor = 16

Definition at line 76 of file degradeimage.cpp.

◆ kFeaturePadding

const int tesseract::kFeaturePadding = 2

Definition at line 37 of file imagedata.h.

◆ kFontMergeDistance

const float tesseract::kFontMergeDistance = 0.025

Definition at line 49 of file mastertrainer.cpp.

◆ kForceReverse

const char tesseract::kForceReverse[] = "RRP_FORCE_REVERSE"

Definition at line 37 of file trie.cpp.

◆ kGoodRowNumberOfColumnsLarge

const double tesseract::kGoodRowNumberOfColumnsLarge = 0.7

Definition at line 60 of file tablerecog.cpp.

◆ kGoodRowNumberOfColumnsSmall

const double tesseract::kGoodRowNumberOfColumnsSmall[] = { 2, 2, 2, 2, 2, 3, 3 }

Definition at line 56 of file tablerecog.cpp.

◆ kGoodRowNumberOfColumnsSmallSize

const int tesseract::kGoodRowNumberOfColumnsSmallSize
Initial value:
=
sizeof(kGoodRowNumberOfColumnsSmall) / sizeof(double) - 1

Definition at line 57 of file tablerecog.cpp.

◆ kGutterMultiple

const int tesseract::kGutterMultiple = 4

Definition at line 35 of file tabvector.cpp.

◆ kGutterToNeighbourRatio

const int tesseract::kGutterToNeighbourRatio = 3

Definition at line 37 of file tabvector.cpp.

◆ kHighConfidence

const double tesseract::kHighConfidence = 0.9375

Definition at line 65 of file lstmtrainer.cpp.

◆ kHistogramBuckets

const int tesseract::kHistogramBuckets = 16

Definition at line 367 of file weightmatrix.cpp.

◆ kHistogramSize

const int tesseract::kHistogramSize = 256

Definition at line 27 of file otsuthr.h.

◆ kHorizontalGapMergeFraction

const double tesseract::kHorizontalGapMergeFraction = 0.5

Definition at line 49 of file colfind.cpp.

◆ kHorizontalSpacing

const double tesseract::kHorizontalSpacing = 0.30

Definition at line 35 of file tablerecog.cpp.

◆ kHorzStrongTextlineAspect

const int tesseract::kHorzStrongTextlineAspect = 5

Definition at line 67 of file colpartition.cpp.

◆ kHorzStrongTextlineCount

const int tesseract::kHorzStrongTextlineCount = 8

Definition at line 63 of file colpartition.cpp.

◆ kHorzStrongTextlineHeight

const int tesseract::kHorzStrongTextlineHeight = 10

Definition at line 65 of file colpartition.cpp.

◆ kImagePadding

const int tesseract::kImagePadding = 4

Definition at line 39 of file imagedata.h.

◆ kImprovementFraction

const double tesseract::kImprovementFraction = 15.0 / 16.0

Definition at line 67 of file lstmtrainer.cpp.

◆ kInfiniteDist

const float tesseract::kInfiniteDist = 999.0f

Definition at line 906 of file mastertrainer.cpp.

◆ kInt8Flag

const int tesseract::kInt8Flag = 1

Definition at line 163 of file weightmatrix.cpp.

◆ kLargeTableProjectionThreshold

const double tesseract::kLargeTableProjectionThreshold = 0.45

Definition at line 106 of file tablefind.cpp.

◆ kLargeTableRowCount

const int tesseract::kLargeTableRowCount = 6

Definition at line 108 of file tablefind.cpp.

◆ kLatinChs

const int tesseract::kLatinChs[]
Initial value:
= {
0x00a2, 0x0022, 0x0022, 0x0027, 0x0027, 0x00b7, 0x002d, 0
}

Latin chars corresponding to the unicode chars above.

Definition at line 1565 of file baseapi.cpp.

◆ kLearningRateDecay

const double tesseract::kLearningRateDecay = M_SQRT1_2

Definition at line 53 of file lstmtrainer.cpp.

◆ kLeftIndentAlignmentCountTh

const int tesseract::kLeftIndentAlignmentCountTh = 1

Definition at line 85 of file equationdetect.cpp.

◆ kLineCountReciprocal

const double tesseract::kLineCountReciprocal = 4.0

Definition at line 48 of file tabvector.cpp.

◆ kLinedTableMinHorizontalLines

const int tesseract::kLinedTableMinHorizontalLines = 3

Definition at line 45 of file tablerecog.cpp.

◆ kLinedTableMinVerticalLines

const int tesseract::kLinedTableMinVerticalLines = 3

Definition at line 44 of file tablerecog.cpp.

◆ kLineFindGridSize

const int tesseract::kLineFindGridSize = 50

Grid size used by line finder. Not very critical.

Definition at line 47 of file linefind.cpp.

◆ kLineFragmentAspectRatio

const double tesseract::kLineFragmentAspectRatio = 10.0

Definition at line 54 of file tabfind.cpp.

◆ kLineResidueAspectRatio

const double tesseract::kLineResidueAspectRatio = 8.0

Definition at line 94 of file strokewidth.cpp.

◆ kLineResiduePadRatio

const int tesseract::kLineResiduePadRatio = 3

Definition at line 96 of file strokewidth.cpp.

◆ kLineResidueSizeRatio

const double tesseract::kLineResidueSizeRatio = 1.75

Definition at line 98 of file strokewidth.cpp.

◆ kLineTrapLongest

const int tesseract::kLineTrapLongest = 4

Definition at line 87 of file strokewidth.cpp.

◆ kLineTrapShortest

const int tesseract::kLineTrapShortest = 2

Definition at line 89 of file strokewidth.cpp.

◆ kLRM

const char *const tesseract::kLRM = "\u200E"

Left-to-Right Mark.

Definition at line 39 of file unicodes.cpp.

◆ kMarginFactor

const double tesseract::kMarginFactor = 1.1

Definition at line 50 of file tablerecog.cpp.

◆ kMarginOverlapFraction

const double tesseract::kMarginOverlapFraction = 0.25

Definition at line 44 of file colpartitiongrid.cpp.

◆ kMathDigitDensityTh1

const float tesseract::kMathDigitDensityTh1 = 0.25

Definition at line 80 of file equationdetect.cpp.

◆ kMathDigitDensityTh2

const float tesseract::kMathDigitDensityTh2 = 0.1

Definition at line 81 of file equationdetect.cpp.

◆ kMathItalicDensityTh

const float tesseract::kMathItalicDensityTh = 0.5

Definition at line 82 of file equationdetect.cpp.

◆ kMaxAmbigStringSize

const int tesseract::kMaxAmbigStringSize = UNICHAR_LEN * (MAX_AMBIG_SIZE + 1)

Definition at line 41 of file ambigs.cpp.

◆ kMaxBaselineError

const double tesseract::kMaxBaselineError = 0.4375

Definition at line 70 of file colpartition.cpp.

◆ kMaxBlobOverlapFactor

const double tesseract::kMaxBlobOverlapFactor = 4.0

Definition at line 76 of file tablefind.cpp.

◆ kMaxBlobWidth

const int tesseract::kMaxBlobWidth = 500

Definition at line 39 of file tablefind.cpp.

◆ kMaxBoxEdgeDiff

const int16_t tesseract::kMaxBoxEdgeDiff = 2

Definition at line 32 of file recogtraining.cpp.

◆ kMaxBoxesInDataPartition

const int tesseract::kMaxBoxesInDataPartition = 20

Definition at line 65 of file tablefind.cpp.

◆ kMaxBytesPerLine

const int tesseract::kMaxBytesPerLine
Initial value:

A maximal single box could occupy kNumbersPerBlob numbers at kBytesPer64BitNumber digits (if someone sneaks in a 64 bit value) and a space plus the newline and the maximum length of a UNICHAR. Test against this on each iteration for safety.

Definition at line 1511 of file baseapi.cpp.

◆ kMaxCaptionLines

const int tesseract::kMaxCaptionLines = 7

Definition at line 38 of file colpartitiongrid.cpp.

◆ kMaxCharTopRange

const int tesseract::kMaxCharTopRange = 48

Definition at line 84 of file fixxht.cpp.

◆ kMaxCircleErosions

const int tesseract::kMaxCircleErosions = 8

Definition at line 68 of file pagesegmain.cpp.

◆ kMaxCJKSizeRatio

const int tesseract::kMaxCJKSizeRatio = 5

Definition at line 65 of file strokewidth.cpp.

◆ kMaxColorDistance

const int tesseract::kMaxColorDistance = 900

Definition at line 77 of file colpartition.cpp.

◆ kMaxColumnHeaderDistance

const int tesseract::kMaxColumnHeaderDistance = 4

Definition at line 84 of file tablefind.cpp.

◆ kMaxDiacriticDistanceRatio

const double tesseract::kMaxDiacriticDistanceRatio = 1.25

Definition at line 79 of file strokewidth.cpp.

◆ kMaxDiacriticGapToBaseCharHeight

const double tesseract::kMaxDiacriticGapToBaseCharHeight = 1.0

Definition at line 82 of file strokewidth.cpp.

◆ kMaxDistToPartSizeRatio

const double tesseract::kMaxDistToPartSizeRatio = 1.5

Definition at line 54 of file colfind.cpp.

◆ kMaxFillinMultiple

const int tesseract::kMaxFillinMultiple = 11

Definition at line 44 of file tabvector.cpp.

◆ kMaxGapInTextPartition

const double tesseract::kMaxGapInTextPartition = 4.0

Definition at line 68 of file tablefind.cpp.

◆ kMaxGutterWidthAbsolute

const double tesseract::kMaxGutterWidthAbsolute = 2.00

Definition at line 49 of file tabfind.cpp.

◆ kMaxIncompatibleColumnCount

const int tesseract::kMaxIncompatibleColumnCount = 2

Definition at line 46 of file colfind.cpp.

◆ kMaxInputHeight

const int tesseract::kMaxInputHeight = 48

Definition at line 28 of file input.cpp.

◆ kMaxIntSize

const int tesseract::kMaxIntSize = 22

Max string length of an int.

Definition at line 121 of file baseapi.cpp.

◆ kMaxLargeOverlapsWithMedium

const int tesseract::kMaxLargeOverlapsWithMedium = 12

Definition at line 44 of file ccnontextdetect.cpp.

◆ kMaxLargeOverlapsWithSmall

const int tesseract::kMaxLargeOverlapsWithSmall = 3

Definition at line 35 of file ccnontextdetect.cpp.

◆ kMaxLeaderGapFractionOfMax

const double tesseract::kMaxLeaderGapFractionOfMax = 0.25

Definition at line 53 of file colpartition.cpp.

◆ kMaxLeaderGapFractionOfMin

const double tesseract::kMaxLeaderGapFractionOfMin = 0.5

Definition at line 55 of file colpartition.cpp.

◆ kMaxLigature

const int tesseract::kMaxLigature = 0xfb17

Definition at line 65 of file ligature_table.cpp.

◆ kMaxLineLength

const int tesseract::kMaxLineLength = 1024

Definition at line 318 of file boxchar.cpp.

◆ kMaxLineResidue

const int tesseract::kMaxLineResidue = 6

Definition at line 53 of file linefind.cpp.

◆ kMaxMediumOverlapsWithSmall

const int tesseract::kMaxMediumOverlapsWithSmall = 12

Definition at line 40 of file ccnontextdetect.cpp.

◆ kMaxNeighbourDistFactor

const int tesseract::kMaxNeighbourDistFactor = 4

Definition at line 36 of file colpartitiongrid.cpp.

◆ kMaxNonLineDensity

const double tesseract::kMaxNonLineDensity = 0.25

Definition at line 58 of file linefind.cpp.

◆ kMaxOffsetDist

const int tesseract::kMaxOffsetDist = 32

Definition at line 32 of file intfeaturemap.cpp.

◆ kMaxOutputRegisters

constexpr int tesseract::kMaxOutputRegisters = 8
constexpr

Definition at line 35 of file intsimdmatrixavx2.cpp.

◆ kMaxPadFactor

const int tesseract::kMaxPadFactor = 6

Definition at line 33 of file colpartitiongrid.cpp.

◆ kMaxParagraphEndingLeftSpaceMultiple

const double tesseract::kMaxParagraphEndingLeftSpaceMultiple = 3.0

Definition at line 125 of file tablefind.cpp.

◆ kMaxPartitionSpacing

const double tesseract::kMaxPartitionSpacing = 1.75

Definition at line 61 of file colpartitiongrid.cpp.

◆ kMaxRaggedSearch

const int tesseract::kMaxRaggedSearch = 25

Definition at line 39 of file tabfind.cpp.

◆ kMaxRealDistance

const int tesseract::kMaxRealDistance = 2.0

Definition at line 39 of file detlinefit.cpp.

◆ kMaxRectangularFraction

const double tesseract::kMaxRectangularFraction = 0.75

Definition at line 42 of file imagefind.cpp.

◆ kMaxRectangularGradient

const double tesseract::kMaxRectangularGradient = 0.1

Definition at line 45 of file imagefind.cpp.

◆ kMaxRMSColorNoise

const int tesseract::kMaxRMSColorNoise = 128

Definition at line 74 of file colpartition.cpp.

◆ kMaxRowSize

const double tesseract::kMaxRowSize = 2.5

Definition at line 53 of file tablerecog.cpp.

◆ kMaxSameBlockLineSpacing

const double tesseract::kMaxSameBlockLineSpacing = 3

Definition at line 49 of file colpartition.cpp.

◆ kMaxSizeRatio

const double tesseract::kMaxSizeRatio = 1.5

Definition at line 51 of file colpartition.cpp.

◆ kMaxSkewFactor

const int tesseract::kMaxSkewFactor = 15

Definition at line 64 of file alignedblob.cpp.

◆ kMaxSmallNeighboursPerPix

const double tesseract::kMaxSmallNeighboursPerPix = 1.0 / 32

Definition at line 32 of file ccnontextdetect.cpp.

◆ kMaxSpacingDrift

const double tesseract::kMaxSpacingDrift = 1.0 / 72

Definition at line 43 of file colpartition.cpp.

◆ kMaxStaveHeight

const double tesseract::kMaxStaveHeight = 1.0

Definition at line 60 of file linefind.cpp.

◆ kMaxTableCellXheight

const double tesseract::kMaxTableCellXheight = 2.0

Definition at line 80 of file tablefind.cpp.

◆ kMaxTopSpacingFraction

const double tesseract::kMaxTopSpacingFraction = 0.25

Definition at line 46 of file colpartition.cpp.

◆ kMaxUnicharsPerCluster

const int tesseract::kMaxUnicharsPerCluster = 2000

Definition at line 47 of file mastertrainer.cpp.

◆ kMaxVerticalSearch

const int tesseract::kMaxVerticalSearch = 12

Definition at line 38 of file tabfind.cpp.

◆ kMaxVerticalSpacing

const int tesseract::kMaxVerticalSpacing = 500

Definition at line 37 of file tablefind.cpp.

◆ kMaxWinSize

const int tesseract::kMaxWinSize = 2000

Definition at line 50 of file network.cpp.

◆ kMaxXProjectionGapFactor

const double tesseract::kMaxXProjectionGapFactor = 2.0

Definition at line 135 of file tablefind.cpp.

◆ kMinAlignedGutter

const double tesseract::kMinAlignedGutter = 0.25

Definition at line 50 of file tabvector.cpp.

◆ kMinAlignedTabs

const int tesseract::kMinAlignedTabs = 4

Definition at line 54 of file alignedblob.cpp.

◆ kMinBaselineCoverage

const double tesseract::kMinBaselineCoverage = 0.5

Definition at line 72 of file colpartition.cpp.

◆ kMinBoxesInTextPartition

const int tesseract::kMinBoxesInTextPartition = 10

Definition at line 62 of file tablefind.cpp.

◆ kMinCaptionGapHeightRatio

const double tesseract::kMinCaptionGapHeightRatio = 0.5

Definition at line 42 of file colpartitiongrid.cpp.

◆ kMinCaptionGapRatio

const double tesseract::kMinCaptionGapRatio = 2.0

Definition at line 40 of file colpartitiongrid.cpp.

◆ kMinCertainty

const float tesseract::kMinCertainty = -20.0f

Definition at line 30 of file networkio.cpp.

◆ kMinChainTextValue

const int tesseract::kMinChainTextValue = 3

Definition at line 61 of file colpartition.cpp.

◆ kMinClusteredShapes

const int tesseract::kMinClusteredShapes = 1

Definition at line 45 of file mastertrainer.cpp.

◆ kMinColorDifference

const int tesseract::kMinColorDifference = 16

Definition at line 51 of file imagefind.cpp.

◆ kMinColumnWidth

const int tesseract::kMinColumnWidth = 2.0 / 3

Definition at line 32 of file colpartitionset.cpp.

◆ kMinDiacriticSizeRatio

const double tesseract::kMinDiacriticSizeRatio = 1.0625

Definition at line 76 of file strokewidth.cpp.

◆ kMinDivergenceRate

const double tesseract::kMinDivergenceRate = 50.0

Definition at line 46 of file lstmtrainer.cpp.

◆ kMinEvaluatedTabs

const int tesseract::kMinEvaluatedTabs = 3

Definition at line 56 of file tabfind.cpp.

◆ kMinFilledArea

const double tesseract::kMinFilledArea = 0.35

Definition at line 63 of file tablerecog.cpp.

◆ kMinFractionalLinesInColumn

const double tesseract::kMinFractionalLinesInColumn = 0.125

Definition at line 45 of file tabfind.cpp.

◆ kMinGoodTextPARatio

const double tesseract::kMinGoodTextPARatio = 1.5

Definition at line 60 of file ccnontextdetect.cpp.

◆ kMinGutterFraction

const double tesseract::kMinGutterFraction = 0.5

Definition at line 46 of file tabvector.cpp.

◆ kMinGutterWidthGrid

const double tesseract::kMinGutterWidthGrid = 0.5

Definition at line 51 of file colfind.cpp.

◆ kMinImageFindSize

const int tesseract::kMinImageFindSize = 100

Definition at line 47 of file imagefind.cpp.

◆ kMinLeaderCount

const int tesseract::kMinLeaderCount = 5

Definition at line 57 of file colpartition.cpp.

◆ kMinLigature

const int tesseract::kMinLigature = 0xfb00

Definition at line 64 of file ligature_table.cpp.

◆ kMinLineLengthFraction

const int tesseract::kMinLineLengthFraction = 4

Denominator of resolution makes min pixels to demand line lengths to be.

Definition at line 43 of file linefind.cpp.

◆ kMinLinesInColumn

const int tesseract::kMinLinesInColumn = 10

Definition at line 41 of file tabfind.cpp.

◆ kMinMaxGapInTextPartition

const double tesseract::kMinMaxGapInTextPartition = 0.5

Definition at line 72 of file tablefind.cpp.

◆ kMinMusicPixelFraction

const double tesseract::kMinMusicPixelFraction = 0.75

Definition at line 62 of file linefind.cpp.

◆ kMinOverlapWithTable

const double tesseract::kMinOverlapWithTable = 0.6

Definition at line 96 of file tablefind.cpp.

◆ kMinParagraphEndingTextToWhitespaceRatio

const double tesseract::kMinParagraphEndingTextToWhitespaceRatio = 3.0

Definition at line 131 of file tablefind.cpp.

◆ kMinPointsForErrorCount

const int tesseract::kMinPointsForErrorCount = 16

Definition at line 36 of file detlinefit.cpp.

◆ kMinProb

const float tesseract::kMinProb = exp(kMinCertainty)

Definition at line 32 of file networkio.cpp.

◆ kMinRaggedGutter

const double tesseract::kMinRaggedGutter = 1.5

Definition at line 52 of file tabvector.cpp.

◆ kMinRaggedTabs

const int tesseract::kMinRaggedTabs = 5

Definition at line 52 of file alignedblob.cpp.

◆ kMinRampSize

const int tesseract::kMinRampSize = 1000

Definition at line 80 of file degradeimage.cpp.

◆ kMinRectangularFraction

const double tesseract::kMinRectangularFraction = 0.125

Definition at line 40 of file imagefind.cpp.

◆ kMinRectSize

const int tesseract::kMinRectSize = 10

Minimum sensible image size to be worth running tesseract.

Definition at line 104 of file baseapi.cpp.

◆ kMinRowsInTable

const int tesseract::kMinRowsInTable = 3

Definition at line 111 of file tablefind.cpp.

◆ kMinStallIterations

const int tesseract::kMinStallIterations = 10000

Definition at line 48 of file lstmtrainer.cpp.

◆ kMinStartedErrorRate

const int tesseract::kMinStartedErrorRate = 75

Definition at line 61 of file lstmtrainer.cpp.

◆ kMinStrongTextValue

const int tesseract::kMinStrongTextValue = 6

Definition at line 59 of file colpartition.cpp.

◆ kMinTabGradient

const double tesseract::kMinTabGradient = 4.0

Definition at line 60 of file alignedblob.cpp.

◆ kMinThickLineWidth

const int tesseract::kMinThickLineWidth = 12

Definition at line 49 of file linefind.cpp.

◆ kMinVerticalSearch

const int tesseract::kMinVerticalSearch = 3

Definition at line 37 of file tabfind.cpp.

◆ kMinWinSize

const int tesseract::kMinWinSize = 500

Definition at line 49 of file network.cpp.

◆ kMostlyOneDirRatio

const int tesseract::kMostlyOneDirRatio = 3

Definition at line 92 of file strokewidth.cpp.

◆ kNeighbourSearchFactor

const double tesseract::kNeighbourSearchFactor = 2.5

Definition at line 102 of file strokewidth.cpp.

◆ kNoiseOverlapAreaFactor

const double tesseract::kNoiseOverlapAreaFactor = 1.0 / 512

Definition at line 107 of file strokewidth.cpp.

◆ kNoiseOverlapGrowthFactor

const double tesseract::kNoiseOverlapGrowthFactor = 4.0

Definition at line 104 of file strokewidth.cpp.

◆ kNoisePadding

const int tesseract::kNoisePadding = 4

Definition at line 51 of file ccnontextdetect.cpp.

◆ kNumAdjustmentIterations

const int tesseract::kNumAdjustmentIterations = 100

Definition at line 55 of file lstmtrainer.cpp.

◆ kNumbersPerBlob

const int tesseract::kNumbersPerBlob = 5

The 5 numbers output for each box (the usual 4 and a page number.)

Definition at line 1491 of file baseapi.cpp.

◆ kNumEndPoints

const int tesseract::kNumEndPoints = 3

Definition at line 30 of file detlinefit.cpp.

◆ kNumInputGroups

constexpr int tesseract::kNumInputGroups = kNumInputsPerRegister / kNumInputsPerGroup
constexpr

Definition at line 41 of file intsimdmatrixavx2.cpp.

◆ kNumInputsPerGroup

constexpr int tesseract::kNumInputsPerGroup = 4
constexpr

Definition at line 39 of file intsimdmatrixavx2.cpp.

◆ kNumInputsPerRegister

constexpr int tesseract::kNumInputsPerRegister = 32
constexpr

Definition at line 37 of file intsimdmatrixavx2.cpp.

◆ kNumOutputsPerRegister

constexpr int tesseract::kNumOutputsPerRegister = 8
constexpr

Definition at line 33 of file intsimdmatrixavx2.cpp.

◆ kNumPagesPerBatch

const int tesseract::kNumPagesPerBatch = 100

Definition at line 59 of file lstmtrainer.cpp.

◆ kOriginalNoiseMultiple

const int tesseract::kOriginalNoiseMultiple = 8

Definition at line 47 of file ccnontextdetect.cpp.

◆ kParagraphEndingPreviousLineRatio

const double tesseract::kParagraphEndingPreviousLineRatio = 1.3

Definition at line 121 of file tablefind.cpp.

◆ kPDF

const char *const tesseract::kPDF = "\u202C"

Pop Directional Formatting.

Definition at line 42 of file unicodes.cpp.

◆ kPhotoOffsetFraction

const double tesseract::kPhotoOffsetFraction = 0.375

Definition at line 54 of file ccnontextdetect.cpp.

◆ kPrime1

const int tesseract::kPrime1 = 17

Definition at line 36 of file trainingsampleset.cpp.

◆ kPrime2

const int tesseract::kPrime2 = 13

Definition at line 37 of file trainingsampleset.cpp.

◆ kRadicalRadix

const int tesseract::kRadicalRadix = 29

Definition at line 31 of file unicharcompress.cpp.

◆ kRaggedFraction

const double tesseract::kRaggedFraction = 2.5

Definition at line 40 of file alignedblob.cpp.

◆ kRaggedGapFraction

const double tesseract::kRaggedGapFraction = 1.0

Definition at line 44 of file alignedblob.cpp.

◆ kRaggedGutterMultiple

const int tesseract::kRaggedGutterMultiple = 5

Definition at line 51 of file tabfind.cpp.

◆ kRandomizingCenter

const int tesseract::kRandomizingCenter = 128

Definition at line 36 of file trainingsample.cpp.

◆ kRatingEpsilon

const double tesseract::kRatingEpsilon = 1.0 / 32

Definition at line 31 of file errorcounter.cpp.

◆ kRequiredColumns

const double tesseract::kRequiredColumns = 0.7

Definition at line 48 of file tablerecog.cpp.

◆ kReverseIfHasRTL

const char tesseract::kReverseIfHasRTL[] = "RRP_REVERSE_IF_HAS_RTL"

Definition at line 36 of file trie.cpp.

◆ kRGBRMSColors

const int tesseract::kRGBRMSColors = 4

Definition at line 37 of file colpartition.h.

◆ kRLE

const char *const tesseract::kRLE = "\u202A"

Right-to-Left Embedding.

Definition at line 41 of file unicodes.cpp.

◆ kRLM

const char *const tesseract::kRLM = "\u200F"

Right-to-Left Mark.

Definition at line 40 of file unicodes.cpp.

◆ kRMSFitScaling

const double tesseract::kRMSFitScaling = 8.0

Definition at line 49 of file imagefind.cpp.

◆ kRotationRange

const float tesseract::kRotationRange = 0.02f

Definition at line 74 of file degradeimage.cpp.

◆ kSaltnPepper

const int tesseract::kSaltnPepper = 5

Definition at line 78 of file degradeimage.cpp.

◆ kScaleFactor

constexpr double tesseract::kScaleFactor = 256.0
constexpr

Definition at line 36 of file functions.h.

◆ kSeedBlobsCountTh

const int tesseract::kSeedBlobsCountTh = 10

Definition at line 84 of file equationdetect.cpp.

◆ kSideSpaceMargin

const int tesseract::kSideSpaceMargin = 10

Definition at line 101 of file tablefind.cpp.

◆ kSimilarRaggedDist

const int tesseract::kSimilarRaggedDist = 50

Definition at line 42 of file tabvector.cpp.

◆ kSimilarVectorDist

const int tesseract::kSimilarVectorDist = 10

Definition at line 39 of file tabvector.cpp.

◆ ksizeofUniversalAmbigsFile

const int tesseract::ksizeofUniversalAmbigsFile = sizeof(kUniversalAmbigsFile)

Definition at line 19036 of file universalambigs.cpp.

◆ kSizeRatioToReject

const float tesseract::kSizeRatioToReject = 2.0

Definition at line 100 of file strokewidth.cpp.

◆ kSmallTableProjectionThreshold

const double tesseract::kSmallTableProjectionThreshold = 0.35

Definition at line 105 of file tablefind.cpp.

◆ kSmoothDecisionMargin

const int tesseract::kSmoothDecisionMargin = 4

Definition at line 64 of file colpartitiongrid.cpp.

◆ kSplitPartitionSize

const double tesseract::kSplitPartitionSize = 2.0

Definition at line 43 of file tablefind.cpp.

◆ kSquareLimit

const int tesseract::kSquareLimit = 25

Definition at line 34 of file trainingsampleset.cpp.

◆ kStageTransitionThreshold

const double tesseract::kStageTransitionThreshold = 10.0

Definition at line 63 of file lstmtrainer.cpp.

◆ kStateClip

const double tesseract::kStateClip = 100.0

Definition at line 70 of file lstm.cpp.

◆ kStrokeWidthCJK

const double tesseract::kStrokeWidthCJK = 2.0

Definition at line 52 of file strokewidth.cpp.

◆ kStrokeWidthConstantTolerance

const double tesseract::kStrokeWidthConstantTolerance = 2.0

Definition at line 140 of file tablefind.cpp.

◆ kStrokeWidthFractionalTolerance

const double tesseract::kStrokeWidthFractionalTolerance = 0.25

Definition at line 139 of file tablefind.cpp.

◆ kStrokeWidthFractionCJK

const double tesseract::kStrokeWidthFractionCJK = 0.25

Definition at line 51 of file strokewidth.cpp.

◆ kStrokeWidthFractionTolerance

const double tesseract::kStrokeWidthFractionTolerance = 0.125

Allowed proportional change in stroke width to be the same font.

Definition at line 44 of file strokewidth.cpp.

◆ kStrokeWidthTolerance

const double tesseract::kStrokeWidthTolerance = 1.5

Allowed constant change in stroke width to be the same font. Really 1.5 pixels.

Definition at line 49 of file strokewidth.cpp.

◆ kSubTrainerMarginFraction

const double tesseract::kSubTrainerMarginFraction = 3.0 / 128

Definition at line 51 of file lstmtrainer.cpp.

◆ kTableColumnThreshold

const double tesseract::kTableColumnThreshold = 3.0

Definition at line 88 of file tablefind.cpp.

◆ kTableSize

constexpr int tesseract::kTableSize = 4096
constexpr

Definition at line 34 of file functions.h.

◆ kTabRadiusFactor

const int tesseract::kTabRadiusFactor = 5

Definition at line 35 of file tabfind.cpp.

◆ kTargetXScale

const int tesseract::kTargetXScale = 5

Definition at line 71 of file lstmtrainer.cpp.

◆ kTargetYScale

const int tesseract::kTargetYScale = 100

Definition at line 72 of file lstmtrainer.cpp.

◆ kTesseractReject

const char tesseract::kTesseractReject = '~'

Character returned when Tesseract couldn't recognize as anything.

Definition at line 106 of file baseapi.cpp.

◆ kTestChar

const int tesseract::kTestChar = -1

Definition at line 32 of file trainingsampleset.cpp.

◆ kThickLengthMultiple

const double tesseract::kThickLengthMultiple = 0.75

Definition at line 56 of file linefind.cpp.

◆ kThinLineFraction

const int tesseract::kThinLineFraction = 20

Denominator of resolution makes max pixel width to allow thin lines.

Definition at line 41 of file linefind.cpp.

◆ kTinyEnoughTextlineOverlapFraction

const double tesseract::kTinyEnoughTextlineOverlapFraction = 0.25

Definition at line 48 of file colpartitiongrid.cpp.

◆ kUnclearDensityTh

const float tesseract::kUnclearDensityTh = 0.25

Definition at line 83 of file equationdetect.cpp.

◆ kUniChs

const int tesseract::kUniChs[]
Initial value:
= {
0x20ac, 0x201c, 0x201d, 0x2018, 0x2019, 0x2022, 0x2014, 0
}

Conversion table for non-latin characters. Maps characters out of the latin set into the latin set. TODO(rays) incorporate this translation into unicharset.

Definition at line 1561 of file baseapi.cpp.

◆ kUniversalAmbigsFile

const char tesseract::kUniversalAmbigsFile

Definition at line 24 of file universalambigs.cpp.

◆ kUNLVReject

const char tesseract::kUNLVReject = '~'

Character used by UNLV error counter as a reject.

Definition at line 108 of file baseapi.cpp.

◆ kUNLVSuspect

const char tesseract::kUNLVSuspect = '^'

Character used by UNLV as a suspect marker.

Definition at line 110 of file baseapi.cpp.

◆ kVerticalSpacing

const double tesseract::kVerticalSpacing = -0.2

Definition at line 38 of file tablerecog.cpp.

◆ kVLineAlignment

const int tesseract::kVLineAlignment = 3

Definition at line 46 of file alignedblob.cpp.

◆ kVLineGutter

const int tesseract::kVLineGutter = 1

Definition at line 48 of file alignedblob.cpp.

◆ kVLineMinLength

const int tesseract::kVLineMinLength = 500

Definition at line 56 of file alignedblob.cpp.

◆ kVLineSearchSize

const int tesseract::kVLineSearchSize = 150

Definition at line 50 of file alignedblob.cpp.

◆ kWorstDictCertainty

const float tesseract::kWorstDictCertainty = -25.0f

Definition at line 38 of file linerec.cpp.

◆ kXWinFrameSize

const int tesseract::kXWinFrameSize = 30

Definition at line 52 of file network.cpp.

◆ kYWinFrameSize

const int tesseract::kYWinFrameSize = 80

Definition at line 53 of file network.cpp.

◆ LogisticTable

const double tesseract::LogisticTable

Definition at line 4102 of file functions.cpp.

◆ RTLReversePolicyNames

const char* const tesseract::RTLReversePolicyNames[]
Initial value:

Definition at line 39 of file trie.cpp.

◆ TanhTable

const double tesseract::TanhTable

Definition at line 4 of file functions.cpp.

◆ textord_tabvector_vertical_box_ratio

double tesseract::textord_tabvector_vertical_box_ratio = 0.5

"Fraction of box matches required to declare a line vertical"

Definition at line 58 of file tabvector.cpp.

◆ textord_tabvector_vertical_gap_fraction

double tesseract::textord_tabvector_vertical_gap_fraction = 0.5

"max fraction of mean blob width allowed for vertical gaps in vertical text"

"Max fraction of mean blob width allowed for vertical gaps in vertical text"

Definition at line 55 of file tabvector.cpp.

tesseract::TESSDATA_FIXED_LENGTH_DAWGS
@ TESSDATA_FIXED_LENGTH_DAWGS
Definition: tessdatamanager.h:67
PSM_AUTO
@ PSM_AUTO
Definition: capi.h:108
tesseract::PTRAIN_FREQ_MED
@ PTRAIN_FREQ_MED
Definition: params_training_featdef.h:58
tesseract::NC_ANYTHING
@ NC_ANYTHING
Definition: recodebeam.h:73
GenericVector::init_to_size
void init_to_size(int size, const T &t)
Definition: genericvector.h:744
TBOX
Definition: rect.h:34
tesseract::NT_RECONFIG
@ NT_RECONFIG
Definition: network.h:55
UNICHARSET::set_script
void set_script(UNICHAR_ID unichar_id, const char *value)
Definition: unicharset.h:462
INT_FX_RESULT_STRUCT::NumCN
int16_t NumCN
Definition: intfx.h:39
tesseract::UnicodeNormMode::kNFKD
@ kNFKD
tesseract::NT_SOFTMAX
@ NT_SOFTMAX
Definition: network.h:68
UNICHARSET::get_script_table_size
int get_script_table_size() const
Definition: unicharset.h:849
UNICHARSET::size
int size() const
Definition: unicharset.h:341
tesseract::AMBIG_TYPE_COUNT
@ AMBIG_TYPE_COUNT
Definition: ambigs.h:44
tesseract::TESSDATA_CUBE_SYSTEM_DAWG
@ TESSDATA_CUBE_SYSTEM_DAWG
Definition: tessdatamanager.h:69
tesstrain_utils.type
type
Definition: tesstrain_utils.py:141
tesseract::XH_SUBNORMAL
@ XH_SUBNORMAL
Definition: dict.h:78
tesseract::CT_SIZE
@ CT_SIZE
Definition: errorcounter.h:89
tesseract::TESSDATA_FREQ_DAWG
@ TESSDATA_FREQ_DAWG
Definition: tessdatamanager.h:66
tesseract::FN_INCOLOR
@ FN_INCOLOR
Definition: degradeimage.cpp:78
UNICHARSET::PropertiesIncomplete
bool PropertiesIncomplete(UNICHAR_ID unichar_id) const
Definition: unicharset.h:646
tesseract::CST_NOISE
@ CST_NOISE
Definition: colpartition.h:49
tesseract::CT_UNICHAR_TOP2_ERR
@ CT_UNICHAR_TOP2_ERR
Definition: errorcounter.h:75
tesseract::LoadDataFromFile
bool LoadDataFromFile(const char *filename, GenericVector< char > *data)
Definition: genericvector.h:375
tesseract::PTRAIN_FREQ_SHORT
@ PTRAIN_FREQ_SHORT
Definition: params_training_featdef.h:57
tesseract::NT_SOFTMAX_NO_CTC
@ NT_SOFTMAX_NO_CTC
Definition: network.h:69
ORIENTATION_PAGE_UP
@ ORIENTATION_PAGE_UP
Definition: capi.h:147
ORIENTATION_PAGE_RIGHT
@ ORIENTATION_PAGE_RIGHT
Definition: capi.h:148
tesseract::NT_PAR_RL_LSTM
@ NT_PAR_RL_LSTM
Definition: network.h:51
tesseract::NC_NO_DUP
@ NC_NO_DUP
Definition: recodebeam.h:77
tesseract::TESSDATA_SYSTEM_DAWG
@ TESSDATA_SYSTEM_DAWG
Definition: tessdatamanager.h:64
STRING::string
const char * string() const
Definition: strngs.cpp:194
tesseract::TS_ENABLED
@ TS_ENABLED
Definition: network.h:95
tesseract::TESSDATA_BIGRAM_DAWG
@ TESSDATA_BIGRAM_DAWG
Definition: tessdatamanager.h:71
PSM_RAW_LINE
@ PSM_RAW_LINE
Definition: capi.h:118
tesseract::NT_POSCLIP
@ NT_POSCLIP
Definition: network.h:63
tesseract::NT_COUNT
@ NT_COUNT
Definition: network.h:80
tesseract::STR_NONE
@ STR_NONE
Definition: lstmtrainer.h:64
tesstrain_utils.dest
dest
Definition: tesstrain_utils.py:139
BLOB_CHOICE
Definition: ratngs.h:52
TO_BLOCK::blobs
BLOBNBOX_LIST blobs
Definition: blobbox.h:772
tesseract::PTRAIN_DIGITS_SHORT
@ PTRAIN_DIGITS_SHORT
Definition: params_training_featdef.h:41
RIL_PARA
@ RIL_PARA
Definition: capi.h:123
INT_XRADIUS
#define INT_XRADIUS
Definition: intproto.cpp:57
tesseract::CST_FLOWING
@ CST_FLOWING
Definition: colpartition.h:50
tesseract::NT_PAR_2D_LSTM
@ NT_PAR_2D_LSTM
Definition: network.h:53
tesseract::SpanUTF8Whitespace
unsigned int SpanUTF8Whitespace(const char *text)
Definition: normstrngs.cpp:251
tesseract::WriteRecoder
bool WriteRecoder(const UNICHARSET &unicharset, bool pass_through, const std::string &output_dir, const std::string &lang, FileWriter writer, STRING *radical_table_data, TessdataManager *traineddata)
Definition: lang_model_helpers.cpp:85
tesseract::CST_PULLOUT
@ CST_PULLOUT
Definition: colpartition.h:52
tesseract::CST_IMPROPER
@ CST_IMPROPER
Definition: classify.h:99
tesseract::TA_SEPARATOR
@ TA_SEPARATOR
Definition: tabvector.h:50
tesseract::LT_CTC
@ LT_CTC
Definition: static_shape.h:31
INT_MAX_X
#define INT_MAX_X
Definition: intproto.cpp:61
tesseract::TESSDATA_PARAMS_MODEL
@ TESSDATA_PARAMS_MODEL
Definition: tessdatamanager.h:73
NearlyEqual
bool NearlyEqual(T x, T y, T tolerance)
Definition: host.h:37
UNICHARSET::Direction
Direction
Definition: unicharset.h:156
UNICHARSET::set_islower
void set_islower(UNICHAR_ID unichar_id, bool value)
Definition: unicharset.h:436
POLY_BLOCK::points
ICOORDELT_LIST * points()
Definition: polyblk.h:54
InitFeatureDefs
void InitFeatureDefs(FEATURE_DEFS_STRUCT *featuredefs)
Definition: featdefs.cpp:112
tesseract::NT_XYTRANSPOSE
@ NT_XYTRANSPOSE
Definition: network.h:58
STRING::assign
void assign(const char *cstr, int len)
Definition: strngs.cpp:420
SPECIAL_UNICHAR_CODES_COUNT
@ SPECIAL_UNICHAR_CODES_COUNT
Definition: unicharset.h:38
UNICHARSET::common_sid
int common_sid() const
Definition: unicharset.h:885
tesseract::IsOCREquivalent
bool IsOCREquivalent(char32 ch1, char32 ch2)
Definition: normstrngs.cpp:232
tesseract::PTRAIN_RATING_PER_CHAR
@ PTRAIN_RATING_PER_CHAR
Definition: params_training_featdef.h:68
OEM_DEFAULT
@ OEM_DEFAULT
Definition: capi.h:102
tesseract::FN_Y1
@ FN_Y1
Definition: degradeimage.cpp:80
tesseract::CASE_AMBIG
@ CASE_AMBIG
Definition: ambigs.h:42
tesseract::PTRAIN_NUM_MED
@ PTRAIN_NUM_MED
Definition: params_training_featdef.h:46
INT_YRADIUS
#define INT_YRADIUS
Definition: intproto.cpp:58
STRING::add_str_int
void add_str_int(const char *str, int number)
Definition: strngs.cpp:377
tesseract::TN_ALSO_RAN
@ TN_ALSO_RAN
Definition: recodebeam.h:87
tesseract::PTRAIN_NUM_BAD_FONT
@ PTRAIN_NUM_BAD_FONT
Definition: params_training_featdef.h:67
tesseract::DetectParagraphs
void DetectParagraphs(int debug_level, bool after_text_recognition, const MutableIterator *block_start, GenericVector< ParagraphModel * > *models)
Definition: paragraphs.cpp:2528
tesseract::COL_TEXT
@ COL_TEXT
Definition: tablefind.h:32
tesseract::LR_LEFT
@ LR_LEFT
Definition: strokewidth.h:40
TPOINT::y
int16_t y
Definition: blobs.h:94
tesseract::LoadShapeTable
ShapeTable * LoadShapeTable(const STRING &file_prefix)
Definition: commontraining.cpp:154
tesseract::SetupBasicProperties
void SetupBasicProperties(bool report_errors, UNICHARSET *unicharset)
Definition: unicharset_training_utils.h:38
tesseract::IsWhitespace
bool IsWhitespace(const char32 ch)
Definition: normstrngs.cpp:241
BLOBNBOX::bounding_box
const TBOX & bounding_box() const
Definition: blobbox.h:230
PSM_COUNT
@ PSM_COUNT
Definition: capi.h:119
tesseract::NM_BASELINE
@ NM_BASELINE
Definition: normalis.h:43
TBOX::right
int16_t right() const
Definition: rect.h:79
PSM_SINGLE_BLOCK_VERT_TEXT
@ PSM_SINGLE_BLOCK_VERT_TEXT
Definition: capi.h:110
tesseract::DEFINITE_AMBIG
@ DEFINITE_AMBIG
Definition: ambigs.h:40
tesseract::FN_X1
@ FN_X1
Definition: degradeimage.cpp:84
ScrollView::Pen
void Pen(Color color)
Definition: scrollview.cpp:719
BLOCK::blob_list
C_BLOB_LIST * blob_list()
get blobs
Definition: ocrblock.h:128
tesseract::CS_ROUND_ROBIN
@ CS_ROUND_ROBIN
Definition: imagedata.h:54
tesseract::TA_RIGHT_ALIGNED
@ TA_RIGHT_ALIGNED
Definition: tabvector.h:48
tesseract::LogisticTable
const double LogisticTable[]
Definition: functions.cpp:4102
UNICHARSET::get_other_case
UNICHAR_ID get_other_case(UNICHAR_ID unichar_id) const
Definition: unicharset.h:683
tesseract::ViramaScript::kGujarati
@ kGujarati
tesseract::CT_REJECT
@ CT_REJECT
Definition: errorcounter.h:81
tesseract::FN_X0
@ FN_X0
Definition: degradeimage.cpp:83
tesseract::CST_NGRAM
@ CST_NGRAM
Definition: classify.h:100
C_OUTLINE::step
ICOORD step(int index) const
Definition: coutln.h:144
tesseract::SetupBasicProperties
void SetupBasicProperties(bool report_errors, bool decompose, UNICHARSET *unicharset)
Definition: unicharset_training_utils.cpp:40
INT_DESCENDER
#define INT_DESCENDER
Definition: intproto.cpp:50
UNICHARSET::id_to_unichar
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:291
tesseract::CT_RANK
@ CT_RANK
Definition: errorcounter.h:85
tesseract::IcuErrorCode
Definition: icuerrorcode.h:79
tesseract::IsValidCodepoint
bool IsValidCodepoint(const char32 ch)
Definition: normstrngs.cpp:236
tesseract::CanonicalizeDetectionResults
void CanonicalizeDetectionResults(GenericVector< PARA * > *row_owners, PARA_LIST *paragraphs)
Definition: paragraphs.cpp:2253
INT_MAX_Y
#define INT_MAX_Y
Definition: intproto.cpp:62
tesseract::TrainingSample
Definition: trainingsample.h:53
tesseract::DAWG_TYPE_NUMBER
@ DAWG_TYPE_NUMBER
Definition: dawg.h:71
tesseract::FN_Y2
@ FN_Y2
Definition: degradeimage.cpp:81
PSM_SINGLE_CHAR
@ PSM_SINGLE_CHAR
Definition: capi.h:115
C_OUTLINE::pathlength
int32_t pathlength() const
Definition: coutln.h:135
tesseract::CS_SEQUENTIAL
@ CS_SEQUENTIAL
Definition: imagedata.h:49
BLOBNBOX::set_vert_stroke_width
void set_vert_stroke_width(float width)
Definition: blobbox.h:346
tesseract::kSaltnPepper
const int kSaltnPepper
Definition: degradeimage.cpp:78
tesseract::COL_COUNT
@ COL_COUNT
Definition: tablefind.h:35
RIL_TEXTLINE
@ RIL_TEXTLINE
Definition: capi.h:124
tesseract::NOT_AMBIG
@ NOT_AMBIG
Definition: ambigs.h:38
tesseract::PTRAIN_SHAPE_COST_PER_CHAR
@ PTRAIN_SHAPE_COST_PER_CHAR
Definition: params_training_featdef.h:60
PTIsTextType
bool PTIsTextType(PolyBlockType type)
Definition: publictypes.h:82
tesseract::NT_LSTM
@ NT_LSTM
Definition: network.h:60
STRING::length
int32_t length() const
Definition: strngs.cpp:189
PSM_SPARSE_TEXT
@ PSM_SPARSE_TEXT
Definition: capi.h:116
tesseract::NT_SYMCLIP
@ NT_SYMCLIP
Definition: network.h:64
ORIENTATION_PAGE_LEFT
@ ORIENTATION_PAGE_LEFT
Definition: capi.h:150
UNICHARSET::set_isdigit
void set_isdigit(UNICHAR_ID unichar_id, bool value)
Definition: unicharset.h:446
ICOORD::y
int16_t y() const
access_function
Definition: points.h:56
ORIENTATION_PAGE_DOWN
@ ORIENTATION_PAGE_DOWN
Definition: capi.h:149
tesseract::CT_UNICHAR_TOPN_ERR
@ CT_UNICHAR_TOPN_ERR
Definition: errorcounter.h:76
tesseract::CT_ACCEPTED_JUNK
@ CT_ACCEPTED_JUNK
Definition: errorcounter.h:87
tesseract::PTRAIN_FREQ_LONG
@ PTRAIN_FREQ_LONG
Definition: params_training_featdef.h:59
tesseract::NPT_COUNT
@ NPT_COUNT
Definition: colpartitiongrid.cpp:1513
TPOINT
Definition: blobs.h:51
PSM_CIRCLE_WORD
@ PSM_CIRCLE_WORD
Definition: capi.h:114
ASSERT_HOST_MSG
#define ASSERT_HOST_MSG(x,...)
Definition: errcode.h:92
tesseract::TESSDATA_PUNC_DAWG
@ TESSDATA_PUNC_DAWG
Definition: tessdatamanager.h:63
tesseract::ViramaScript::kTelugu
@ kTelugu
GenericVector
Definition: baseapi.h:37
TBLOB::denorm
const DENORM & denorm() const
Definition: blobs.h:363
tesseract::OCRNormalize
char32 OCRNormalize(char32 ch)
Definition: normstrngs.cpp:222
BLOBNBOX::set_horz_stroke_width
void set_horz_stroke_width(float width)
Definition: blobbox.h:340
tesseract::TESSDATA_SHAPE_TABLE
@ TESSDATA_SHAPE_TABLE
Definition: tessdatamanager.h:70
BLOBNBOX
Definition: blobbox.h:144
tesseract::ShapeTable
Definition: shapetable.h:261
tesseract::XH_GOOD
@ XH_GOOD
Definition: dict.h:78
ClipToRange
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
Definition: helpers.h:108
PDBLK::bounding_box
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
Definition: pdblock.h:59
tesseract::CST_HEADING
@ CST_HEADING
Definition: colpartition.h:51
tesseract::ViramaScript::kNonVirama
@ kNonVirama
tesseract::kForceReverse
const char kForceReverse[]
Definition: trie.cpp:37
tesseract::SP_SUBSCRIPT
@ SP_SUBSCRIPT
Definition: ratngs.h:254
tesseract::LT_NONE
@ LT_NONE
Definition: static_shape.h:30
tesseract::ParagraphJustification
ParagraphJustification
Definition: publictypes.h:251
tesseract::TESSDATA_CUBE_UNICHARSET
@ TESSDATA_CUBE_UNICHARSET
Definition: tessdatamanager.h:68
BLOCK::reject_blobs
C_BLOB_LIST * reject_blobs()
Definition: ocrblock.h:131
JUSTIFICATION_UNKNOWN
@ JUSTIFICATION_UNKNOWN
Definition: capi.h:153
tesseract::SetBlobStrokeWidth
void SetBlobStrokeWidth(Pix *pix, BLOBNBOX *blob)
Definition: tordmain.cpp:69
TBOX::left
int16_t left() const
Definition: rect.h:72
UNICHAR_ID
int UNICHAR_ID
Definition: unichar.h:34
tesseract::RightWordAttributes
void RightWordAttributes(const UNICHARSET *unicharset, const WERD_CHOICE *werd, const STRING &utf8, bool *is_list, bool *starts_idea, bool *ends_idea)
Definition: paragraphs.cpp:471
sample
Definition: cluster.h:32
tesseract::CT_OK_MULTI_FONT
@ CT_OK_MULTI_FONT
Definition: errorcounter.h:83
tesseract::NT_REPLICATED
@ NT_REPLICATED
Definition: network.h:50
tesseract::FULL
@ FULL
Definition: lstmtrainer.h:59
tesseract::ShapeTable::DeSerialize
bool DeSerialize(TFile *fp)
Definition: shapetable.cpp:246
WERD_CHOICE::unichar_id
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:305
PSM_SPARSE_TEXT_OSD
@ PSM_SPARSE_TEXT_OSD
Definition: capi.h:117
UNICHARSET::get_script_from_script_id
const char * get_script_from_script_id(int id) const
Definition: unicharset.h:854
tprintf
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
tesseract::TN_TOPN
@ TN_TOPN
Definition: recodebeam.h:86
PSM_AUTO_ONLY
@ PSM_AUTO_ONLY
Definition: capi.h:107
UNICHARSET::unichar_to_id
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:210
tesseract::OtsuStats
int OtsuStats(const int *histogram, int *H_out, int *omega0_out)
Definition: otsuthr.cpp:188
UNICHARSET::set_normed
void set_normed(UNICHAR_ID unichar_id, const char *normed)
Definition: unicharset.h:482
ASSERT_HOST
#define ASSERT_HOST(x)
Definition: errcode.h:88
kBoostXYBuckets
const int kBoostXYBuckets
Definition: intfeaturespace.h:29
UNICHARSET::set_direction
void set_direction(UNICHAR_ID unichar_id, UNICHARSET::Direction value)
Definition: unicharset.h:472
tesseract::PTRAIN_NUM_BAD_CASE
@ PTRAIN_NUM_BAD_CASE
Definition: params_training_featdef.h:63
tesseract::TESSDATA_LSTM_SYSTEM_DAWG
@ TESSDATA_LSTM_SYSTEM_DAWG
Definition: tessdatamanager.h:76
tesseract::DAWG_TYPE_WORD
@ DAWG_TYPE_WORD
Definition: dawg.h:70
tesseract::ViramaScript::kMalayalam
@ kMalayalam
PSM_OSD_ONLY
@ PSM_OSD_ONLY
Definition: capi.h:105
TEXTLINE_ORDER_RIGHT_TO_LEFT
@ TEXTLINE_ORDER_RIGHT_TO_LEFT
Definition: capi.h:165
TBOX::width
int16_t width() const
Definition: rect.h:115
tesseract::ET_WORD_RECERR
@ ET_WORD_RECERR
Definition: lstmtrainer.h:40
tesseract::ViramaScript::kBengali
@ kBengali
tesseract::TESSDATA_VERSION
@ TESSDATA_VERSION
Definition: tessdatamanager.h:80
tesseract::PERFECT
@ PERFECT
Definition: lstmtrainer.h:49
DENORM::DenormTransform
void DenormTransform(const DENORM *last_denorm, const TPOINT &pt, TPOINT *original) const
Definition: normalis.cpp:390
tesseract::PTRAIN_DOC_SHORT
@ PTRAIN_DOC_SHORT
Definition: params_training_featdef.h:49
tesseract::ViramaScript::kTamil
@ kTamil
tesseract::TESSDATA_UNICHARSET
@ TESSDATA_UNICHARSET
Definition: tessdatamanager.h:58
tesseract::LR_RIGHT
@ LR_RIGHT
Definition: strokewidth.h:41
TO_BLOCK
Definition: blobbox.h:693
tesseract::PTRAIN_NUM_FEATURE_TYPES
@ PTRAIN_NUM_FEATURE_TYPES
Definition: params_training_featdef.h:70
GenericVector::reserve
void reserve(int size)
Definition: genericvector.h:717
tesseract::ViramaScript::kOriya
@ kOriya
tesseract::TESSDATA_INTTEMP
@ TESSDATA_INTTEMP
Definition: tessdatamanager.h:60
tesseract::PTRAIN_DICT_SHORT
@ PTRAIN_DICT_SHORT
Definition: params_training_featdef.h:53
TEXTLINE_ORDER_TOP_TO_BOTTOM
@ TEXTLINE_ORDER_TOP_TO_BOTTOM
Definition: capi.h:166
ICOORD
integer coordinate
Definition: points.h:32
tesseract::NT_YREVERSED
@ NT_YREVERSED
Definition: network.h:57
tesseract::PrepareDistortedPix
Pix * PrepareDistortedPix(const Pix *pix, bool perspective, bool invert, bool white_noise, bool smooth_noise, bool blur, int box_reduction, TRand *randomizer, GenericVector< TBOX > *boxes)
Definition: degradeimage.cpp:197
UNICHARSET::post_load_setup
void post_load_setup()
Definition: unicharset.cpp:926
WRITING_DIRECTION_TOP_TO_BOTTOM
@ WRITING_DIRECTION_TOP_TO_BOTTOM
Definition: capi.h:161
tesseract::TESSDATA_LSTM_NUMBER_DAWG
@ TESSDATA_LSTM_NUMBER_DAWG
Definition: tessdatamanager.h:77
tesseract::PFR_OK
@ PFR_OK
Definition: strokewidth.h:47
tesseract::LM_PPTYPE_PATH
@ LM_PPTYPE_PATH
Definition: lm_pain_points.h:44
TBOX::top
int16_t top() const
Definition: rect.h:58
tesseract::TN_COUNT
@ TN_COUNT
Definition: recodebeam.h:88
tesseract::GraphemeNorm::kNone
@ kNone
tesseract::FN_Y0
@ FN_Y0
Definition: degradeimage.cpp:79
tesseract::GraphemeNormMode
GraphemeNormMode
Definition: validator.h:50
tesseract::COL_TABLE
@ COL_TABLE
Definition: tablefind.h:33
tesseract::NC_ONLY_DUP
@ NC_ONLY_DUP
Definition: recodebeam.h:74
tesseract::Serialize
bool Serialize(FILE *fp, const char *data, size_t n)
Definition: serialis.cpp:77
tesseract::kScaleFactor
constexpr double kScaleFactor
Definition: functions.h:36
tesseract::ViramaScript::kDevanagari
@ kDevanagari
BLOCK::pdblk
PDBLK pdblk
Page Description Block.
Definition: ocrblock.h:190
ScrollView::GREY
@ GREY
Definition: scrollview.h:134
tesseract::ShapeTable::Serialize
bool Serialize(FILE *fp) const
Definition: shapetable.cpp:241
GenericVector::Serialize
bool Serialize(FILE *fp) const
Definition: genericvector.h:973
tesseract::kDoNotReverse
const char kDoNotReverse[]
Definition: trie.cpp:35
WRITING_DIRECTION_RIGHT_TO_LEFT
@ WRITING_DIRECTION_RIGHT_TO_LEFT
Definition: capi.h:160
tesseract::PTRAIN_DICT_LONG
@ PTRAIN_DICT_LONG
Definition: params_training_featdef.h:55
PSM_SINGLE_WORD
@ PSM_SINGLE_WORD
Definition: capi.h:113
tesseract::NPT_HTEXT
@ NPT_HTEXT
Definition: colpartitiongrid.cpp:1506
tesseract::FullwidthToHalfwidth
char32 FullwidthToHalfwidth(const char32 ch)
Definition: normstrngs.cpp:300
tesseract::PTRAIN_NUM_LONG
@ PTRAIN_NUM_LONG
Definition: params_training_featdef.h:47
tesseract::NT_SERIES
@ NT_SERIES
Definition: network.h:54
PSM_AUTO_OSD
@ PSM_AUTO_OSD
Definition: capi.h:106
tesseract::NF_ADAM
@ NF_ADAM
Definition: network.h:88
tesseract::NPT_IMAGE
@ NPT_IMAGE
Definition: colpartitiongrid.cpp:1512
tesseract::PTRAIN_NUM_BAD_PUNC
@ PTRAIN_NUM_BAD_PUNC
Definition: params_training_featdef.h:62
GetNextFilename
const char * GetNextFilename(int argc, const char *const *argv)
Definition: commontraining.cpp:323
ScrollView::Rectangle
void Rectangle(int x1, int y1, int x2, int y2)
Definition: scrollview.cpp:600
tesseract::UnicodeNormMode::kNFKC
@ kNFKC
tesseract::NC_COUNT
@ NC_COUNT
Definition: recodebeam.h:80
tesseract::HistogramRect
void HistogramRect(Pix *src_pix, int channel, int left, int top, int width, int height, int *histogram)
Definition: otsuthr.cpp:167
tesseract::LM_PPTYPE_NUM
@ LM_PPTYPE_NUM
Definition: lm_pain_points.h:47
PDBLK::poly_block
POLY_BLOCK * poly_block() const
Definition: pdblock.h:55
tesseract::NT_PAR_UD_LSTM
@ NT_PAR_UD_LSTM
Definition: network.h:52
tesseract::kHistogramSize
const int kHistogramSize
Definition: otsuthr.h:27
tesseract::PFR_NOISE
@ PFR_NOISE
Definition: strokewidth.h:49
tesseract::SP_NORMAL
@ SP_NORMAL
Definition: ratngs.h:253
PSM_SINGLE_BLOCK
@ PSM_SINGLE_BLOCK
Definition: capi.h:111
C_OUTLINE::start_pos
const ICOORD & start_pos() const
Definition: coutln.h:148
tesseract::LM_PPTYPE_BLAMER
@ LM_PPTYPE_BLAMER
Definition: lm_pain_points.h:42
tesseract::TS_RE_ENABLE
@ TS_RE_ENABLE
Definition: network.h:99
tesseract::FD_WIDTH
@ FD_WIDTH
Definition: stridemap.h:35
tesseract::TESSDATA_AMBIGS
@ TESSDATA_AMBIGS
Definition: tessdatamanager.h:59
tesseract::CT_OK_MULTI_UNICHAR
@ CT_OK_MULTI_UNICHAR
Definition: errorcounter.h:78
tesseract::kBytesPer64BitNumber
const int kBytesPer64BitNumber
Definition: baseapi.cpp:1504
tesseract::OEM_COUNT
@ OEM_COUNT
Definition: publictypes.h:281
tesseract::AsciiLikelyListItem
bool AsciiLikelyListItem(const STRING &word)
Definition: paragraphs.cpp:297
tesseract::IsUTF8Whitespace
bool IsUTF8Whitespace(const char *text)
Definition: normstrngs.cpp:247
tesseract::TA_LEFT_ALIGNED
@ TA_LEFT_ALIGNED
Definition: tabvector.h:45
tesseract::CT_UNICHAR_TOP_OK
@ CT_UNICHAR_TOP_OK
Definition: errorcounter.h:70
tesseract::TF_INT_MODE
@ TF_INT_MODE
Definition: lstmrecognizer.h:48
tesseract::PTRAIN_DOC_LONG
@ PTRAIN_DOC_LONG
Definition: params_training_featdef.h:51
tesseract::TESSDATA_LSTM_RECODER
@ TESSDATA_LSTM_RECODER
Definition: tessdatamanager.h:79
UNICHARSET::has_special_codes
bool has_special_codes() const
Definition: unicharset.h:722
tesseract::ShapeTable::NumShapes
int NumShapes() const
Definition: shapetable.h:274
tesseract::FN_Y3
@ FN_Y3
Definition: degradeimage.cpp:82
OEM_LSTM_ONLY
@ OEM_LSTM_ONLY
Definition: capi.h:100
tesseract::NT_TANH
@ NT_TANH
Definition: network.h:65
tesseract::XH_INCONSISTENT
@ XH_INCONSISTENT
Definition: dict.h:78
tesseract::NT_INPUT
@ NT_INPUT
Definition: network.h:45
tesseract::Tanh
double Tanh(double x)
Definition: functions.h:43
tesseract::STR_UPDATED
@ STR_UPDATED
Definition: lstmtrainer.h:65
char32
signed int char32
Definition: pango_font_info.h:34
tesseract::ReadFile
STRING ReadFile(const std::string &filename, FileReader reader)
Definition: lang_model_helpers.cpp:57
tesseract::NT_TENSORFLOW
@ NT_TENSORFLOW
Definition: network.h:78
tesseract::NPT_WEAK_VTEXT
@ NPT_WEAK_VTEXT
Definition: colpartitiongrid.cpp:1510
tesseract::ET_CHAR_ERROR
@ ET_CHAR_ERROR
Definition: lstmtrainer.h:41
TBOX::height
int16_t height() const
Definition: rect.h:108
tesseract::ViramaScript::kKannada
@ kKannada
tesseract::PTRAIN_XHEIGHT_CONSISTENCY
@ PTRAIN_XHEIGHT_CONSISTENCY
Definition: params_training_featdef.h:64
INT_XHEIGHT
#define INT_XHEIGHT
Definition: intproto.cpp:52
tesseract::LT_UNKNOWN
@ LT_UNKNOWN
Definition: paragraphs_internal.h:53
tesseract::NM_CHAR_ANISOTROPIC
@ NM_CHAR_ANISOTROPIC
Definition: normalis.h:45
tesseract::PTRAIN_DOC_MED
@ PTRAIN_DOC_MED
Definition: params_training_featdef.h:50
tesseract::NPT_VTEXT
@ NPT_VTEXT
Definition: colpartitiongrid.cpp:1507
tesseract::NT_LSTM_SOFTMAX_ENCODED
@ NT_LSTM_SOFTMAX_ENCODED
Definition: network.h:76
TPOINT::x
int16_t x
Definition: blobs.h:93
tesseract::TESSDATA_PFFMTABLE
@ TESSDATA_PFFMTABLE
Definition: tessdatamanager.h:61
UNICHARSET::set_isalpha
void set_isalpha(UNICHAR_ID unichar_id, bool value)
Definition: unicharset.h:431
tesseract::NT_CONVOLVE
@ NT_CONVOLVE
Definition: network.h:47
UNICHARSET::kCustomLigatures
static TESS_API const char * kCustomLigatures[][2]
Definition: unicharset.h:150
tesseract::CT_REJECTED_JUNK
@ CT_REJECTED_JUNK
Definition: errorcounter.h:86
JUSTIFICATION_LEFT
@ JUSTIFICATION_LEFT
Definition: capi.h:154
tesseract::CST_COUNT
@ CST_COUNT
Definition: colpartition.h:53
tesseract::COL_MIXED
@ COL_MIXED
Definition: tablefind.h:34
tesseract::kCrownLeft
const ParagraphModel * kCrownLeft
Definition: paragraphs.cpp:70
UNICHAR_LEN
#define UNICHAR_LEN
Definition: unichar.h:30
feature_defs
FEATURE_DEFS_STRUCT feature_defs
Definition: commontraining.cpp:89
tesseract::kTableSize
constexpr int kTableSize
Definition: functions.h:34
tesseract::ProjectiveCoeffs
int ProjectiveCoeffs(int width, int height, TRand *randomizer, float **im_coeffs, float **box_coeffs)
Definition: degradeimage.cpp:284
tesseract::IntFeatureSpace::Init
void Init(uint8_t xbuckets, uint8_t ybuckets, uint8_t thetabuckets)
Definition: intfeaturespace.cpp:30
ICOORD::setup_render
void setup_render(ICOORD *major_step, ICOORD *minor_step, int *major, int *minor) const
Definition: points.cpp:83
TEXTLINE_ORDER_LEFT_TO_RIGHT
@ TEXTLINE_ORDER_LEFT_TO_RIGHT
Definition: capi.h:164
tesseract::Logistic
double Logistic(double x)
Definition: functions.h:54
WRITING_DIRECTION_LEFT_TO_RIGHT
@ WRITING_DIRECTION_LEFT_TO_RIGHT
Definition: capi.h:159
tesseract::GeneratePerspectiveDistortion
void GeneratePerspectiveDistortion(int width, int height, TRand *randomizer, Pix **pix, GenericVector< TBOX > *boxes)
Definition: degradeimage.cpp:238
TO_BLOCK::noise_blobs
BLOBNBOX_LIST noise_blobs
Definition: blobbox.h:774
tesseract::LM_PPTYPE_SHAPE
@ LM_PPTYPE_SHAPE
Definition: lm_pain_points.h:45
GenericVector::size
int size() const
Definition: genericvector.h:72
OEM_TESSERACT_LSTM_COMBINED
@ OEM_TESSERACT_LSTM_COMBINED
Definition: capi.h:101
tesseract::kRotationRange
const float kRotationRange
Definition: degradeimage.cpp:74
tesseract::SP_DROPCAP
@ SP_DROPCAP
Definition: ratngs.h:256
OEM_TESSERACT_ONLY
@ OEM_TESSERACT_ONLY
Definition: capi.h:99
tesseract::SetScriptProperties
void SetScriptProperties(const std::string &script_dir, UNICHARSET *unicharset)
Definition: unicharset_training_utils.cpp:143
RIL_WORD
@ RIL_WORD
Definition: capi.h:125
tesseract::NT_XREVERSED
@ NT_XREVERSED
Definition: network.h:56
INT_XCENTER
#define INT_XCENTER
Definition: intproto.cpp:55
tesseract::COL_UNKNOWN
@ COL_UNKNOWN
Definition: tablefind.h:31
tesseract::NT_RELU
@ NT_RELU
Definition: network.h:66
PT_EQUATION
@ PT_EQUATION
Definition: capi.h:133
tesseract::TESSDATA_LANG_CONFIG
@ TESSDATA_LANG_CONFIG
Definition: tessdatamanager.h:57
STRING::size
int32_t size() const
Definition: strngs.h:68
tesseract::PTRAIN_DIGITS_LONG
@ PTRAIN_DIGITS_LONG
Definition: params_training_featdef.h:43
tesseract::TS_DISABLED
@ TS_DISABLED
Definition: network.h:94
baseline
@ baseline
Definition: mfoutline.h:63
tesseract::LT_SOFTMAX
@ LT_SOFTMAX
Definition: static_shape.h:32
RIL_BLOCK
@ RIL_BLOCK
Definition: capi.h:122
tesseract::PTRAIN_DIGITS_MED
@ PTRAIN_DIGITS_MED
Definition: params_training_featdef.h:42
tesseract::DAWG_TYPE_COUNT
@ DAWG_TYPE_COUNT
Definition: dawg.h:74
tesseract::TS_TEMP_DISABLE
@ TS_TEMP_DISABLE
Definition: network.h:97
tesseract::PTRAIN_DICT_MED
@ PTRAIN_DICT_MED
Definition: params_training_featdef.h:54
tesseract::TESSDATA_UNAMBIG_DAWG
@ TESSDATA_UNAMBIG_DAWG
Definition: tessdatamanager.h:72
UNICHARSET::null_sid
int null_sid() const
Definition: unicharset.h:884
tesseract::kGoodRowNumberOfColumnsSmall
const double kGoodRowNumberOfColumnsSmall[]
Definition: tablerecog.cpp:56
tesseract::TESSDATA_LSTM_PUNC_DAWG
@ TESSDATA_LSTM_PUNC_DAWG
Definition: tessdatamanager.h:75
tesseract::HI_PRECISION_ERR
@ HI_PRECISION_ERR
Definition: lstmtrainer.h:51
tesseract::CT_OK_JOINED
@ CT_OK_JOINED
Definition: errorcounter.h:79
tesseract::ET_RMS
@ ET_RMS
Definition: lstmtrainer.h:38
GenericVector::resize_no_init
void resize_no_init(int size)
Definition: genericvector.h:66
tesseract::PTRAIN_NUM_BAD_CHAR_TYPE
@ PTRAIN_NUM_BAD_CHAR_TYPE
Definition: params_training_featdef.h:65
tesseract::kNumbersPerBlob
const int kNumbersPerBlob
Definition: baseapi.cpp:1491
tesseract::FD_DIMSIZE
@ FD_DIMSIZE
Definition: stridemap.h:36
UNICHARSET::save_to_file
bool save_to_file(const char *const filename) const
Definition: unicharset.h:350
tesseract::DAWG_TYPE_PATTERN
@ DAWG_TYPE_PATTERN
Definition: dawg.h:72
tesseract::IntFeatureSpace
Definition: intfeaturespace.h:38
tesseract::REPLACE_AMBIG
@ REPLACE_AMBIG
Definition: ambigs.h:39
C_BLOB
Definition: stepblob.h:38
RIL_SYMBOL
@ RIL_SYMBOL
Definition: capi.h:126
TBLOB::bounding_box
TBOX bounding_box() const
Definition: blobs.cpp:468
kBoostDirBuckets
const int kBoostDirBuckets
Definition: intfeaturespace.h:30
UNICHARSET::load_from_file
bool load_from_file(const char *const filename, bool skip_fragments)
Definition: unicharset.h:388
UNICHARSET::set_isupper
void set_isupper(UNICHAR_ID unichar_id, bool value)
Definition: unicharset.h:441
tesseract::TESSDATA_NUM_ENTRIES
@ TESSDATA_NUM_ENTRIES
Definition: tessdatamanager.h:82
tesseract::FD_HEIGHT
@ FD_HEIGHT
Definition: stridemap.h:34
tesseract::NT_LINEAR
@ NT_LINEAR
Definition: network.h:67
tesseract::NT_LOGISTIC
@ NT_LOGISTIC
Definition: network.h:62
INT_BASELINE
#define INT_BASELINE
Definition: intproto.cpp:51
tesseract::SP_SUPERSCRIPT
@ SP_SUPERSCRIPT
Definition: ratngs.h:255
tesseract::PTRAIN_NGRAM_COST_PER_CHAR
@ PTRAIN_NGRAM_COST_PER_CHAR
Definition: params_training_featdef.h:61
UNICHARSET::set_other_case
void set_other_case(UNICHAR_ID unichar_id, UNICHAR_ID other_case)
Definition: unicharset.h:467
tesseract::UNENCODABLE
@ UNENCODABLE
Definition: lstmtrainer.h:50
tesseract::CT_NUM_RESULTS
@ CT_NUM_RESULTS
Definition: errorcounter.h:84
tesseract::NT_NONE
@ NT_NONE
Definition: network.h:44
tesseract::TA_RIGHT_RAGGED
@ TA_RIGHT_RAGGED
Definition: tabvector.h:49
JUSTIFICATION_RIGHT
@ JUSTIFICATION_RIGHT
Definition: capi.h:156
tesseract::NT_LSTM_SOFTMAX
@ NT_LSTM_SOFTMAX
Definition: network.h:75
tesseract::FN_NUM_FACTORS
@ FN_NUM_FACTORS
Definition: degradeimage.cpp:88
tesseract::LoadDataFromFile
bool LoadDataFromFile(const STRING &filename, GenericVector< char > *data)
Definition: genericvector.h:394
tesseract::LT_MULTIPLE
@ LT_MULTIPLE
Definition: paragraphs_internal.h:54
WERD_CHOICE::length
int length() const
Definition: ratngs.h:293
tesseract::WriteFile
bool WriteFile(const std::string &output_dir, const std::string &lang, const std::string &suffix, const GenericVector< char > &data, FileWriter writer)
Definition: lang_model_helpers.cpp:36
tesseract::PTRAIN_NUM_BAD_SPACING
@ PTRAIN_NUM_BAD_SPACING
Definition: params_training_featdef.h:66
tesseract::LT_BODY
@ LT_BODY
Definition: paragraphs_internal.h:52
UNICHARSET::get_ispunctuation
bool get_ispunctuation(UNICHAR_ID unichar_id) const
Definition: unicharset.h:519
UNICHARSET::set_mirror
void set_mirror(UNICHAR_ID unichar_id, UNICHAR_ID mirror)
Definition: unicharset.h:477
tesseract::NT_LSTM_SUMMARY
@ NT_LSTM_SUMMARY
Definition: network.h:61
UNICHAR_BROKEN
@ UNICHAR_BROKEN
Definition: unicharset.h:36
ScrollView::DrawTo
void DrawTo(int x, int y)
Definition: scrollview.cpp:525
STRING::truncate_at
void truncate_at(int32_t index)
Definition: strngs.cpp:265
tesseract::NM_CHAR_ISOTROPIC
@ NM_CHAR_ISOTROPIC
Definition: normalis.h:44
tesseract::ViramaScript::kGurmukhi
@ kGurmukhi
tesseract::ET_SKIP_RATIO
@ ET_SKIP_RATIO
Definition: lstmtrainer.h:42
INT_CAPHEIGHT
#define INT_CAPHEIGHT
Definition: intproto.cpp:53
tesseract::TRAINABLE
@ TRAINABLE
Definition: lstmtrainer.h:48
BLOCK
Definition: ocrblock.h:31
tesseract::LIGHT
@ LIGHT
Definition: lstmtrainer.h:57
tesseract::NF_LAYER_SPECIFIC_LR
@ NF_LAYER_SPECIFIC_LR
Definition: network.h:87
InitIntegerFX
void InitIntegerFX()
Definition: intfx.cpp:49
tesseract::CT_FONT_ATTR_ERR
@ CT_FONT_ATTR_ERR
Definition: errorcounter.h:82
tesseract::kExposureFactor
const int kExposureFactor
Definition: degradeimage.cpp:76
INT_YCENTER
#define INT_YCENTER
Definition: intproto.cpp:56
tesseract::LT_LOGISTIC
@ LT_LOGISTIC
Definition: static_shape.h:33
C_OUTLINE::bounding_box
const TBOX & bounding_box() const
Definition: coutln.h:113
tesseract::ViramaScript::kSinhala
@ kSinhala
tesseract::LM_PPTYPE_AMBIG
@ LM_PPTYPE_AMBIG
Definition: lm_pain_points.h:43
STRING
Definition: strngs.h:45
tesseract::TESSDATA_NUMBER_DAWG
@ TESSDATA_NUMBER_DAWG
Definition: tessdatamanager.h:65
tesseract::TA_CENTER_JUSTIFIED
@ TA_CENTER_JUSTIFIED
Definition: tabvector.h:47
tesseract::FirstWordWouldHaveFit
bool FirstWordWouldHaveFit(const RowScratchRegisters &before, const RowScratchRegisters &after)
Definition: paragraphs.cpp:1672
JUSTIFICATION_CENTER
@ JUSTIFICATION_CENTER
Definition: capi.h:155
tesseract::CST_WHOLE
@ CST_WHOLE
Definition: classify.h:98
PSM_SINGLE_LINE
@ PSM_SINGLE_LINE
Definition: capi.h:112
tesseract::WriteUnicharset
bool WriteUnicharset(const UNICHARSET &unicharset, const std::string &output_dir, const std::string &lang, FileWriter writer, TessdataManager *traineddata)
Definition: lang_model_helpers.cpp:71
tesseract::CT_UNICHAR_TOPTOP_ERR
@ CT_UNICHAR_TOPTOP_ERR
Definition: errorcounter.h:77
tesseract::NormalizeUTF8String
bool NormalizeUTF8String(UnicodeNormMode u_mode, OCRNorm ocr_normalize, GraphemeNorm grapheme_normalize, const char *str8, std::string *normalized)
Definition: normstrngs.cpp:165
tesseract::TA_COUNT
@ TA_COUNT
Definition: tabvector.h:51
UNICHARSET::set_ispunctuation
void set_ispunctuation(UNICHAR_ID unichar_id, bool value)
Definition: unicharset.h:451
ScrollView::Clear
void Clear()
Definition: scrollview.cpp:589
STATS
Definition: statistc.h:31
tesseract::OCRNorm::kNormalize
@ kNormalize
tesseract::GetXheightString
std::string GetXheightString(const std::string &script_dir, const UNICHARSET &unicharset)
Definition: unicharset_training_utils.cpp:164
tesseract::LT_START
@ LT_START
Definition: paragraphs_internal.h:51
tesseract::TF_COMPRESS_UNICHARSET
@ TF_COMPRESS_UNICHARSET
Definition: lstmrecognizer.h:49
tesseract::FD_BATCH
@ FD_BATCH
Definition: stridemap.h:33
ScrollView::SetCursor
void SetCursor(int x, int y)
Definition: scrollview.cpp:519
UNICHARSET::SetPropertiesFromOther
void SetPropertiesFromOther(const UNICHARSET &src)
Definition: unicharset.h:545
tesseract::SIMILAR_AMBIG
@ SIMILAR_AMBIG
Definition: ambigs.h:41
ICOORD::x
int16_t x() const
access function
Definition: points.h:52
UNICHARSET
Definition: unicharset.h:145
tesseract::SetOfModels
GenericVectorEqEq< const ParagraphModel * > SetOfModels
Definition: paragraphs_internal.h:99
tesseract::TanhTable
const double TanhTable[]
Definition: functions.cpp:4
tesseract::TESSDATA_NORMPROTO
@ TESSDATA_NORMPROTO
Definition: tessdatamanager.h:62
GenericVector::empty
bool empty() const
Definition: genericvector.h:91
tesseract::TESSDATA_LSTM
@ TESSDATA_LSTM
Definition: tessdatamanager.h:74
tesseract::InterwordSpace
int InterwordSpace(const GenericVector< RowScratchRegisters > &rows, int row_start, int row_end)
Definition: paragraphs.cpp:1624
tesseract::NOT_BOXED
@ NOT_BOXED
Definition: lstmtrainer.h:52
WERD_CHOICE
Definition: ratngs.h:263
tesseract::ET_COUNT
@ ET_COUNT
Definition: lstmtrainer.h:43
TBOX::bottom
int16_t bottom() const
Definition: rect.h:65
tesseract::PTRAIN_NUM_SHORT
@ PTRAIN_NUM_SHORT
Definition: params_training_featdef.h:45
STRING::c_str
const char * c_str() const
Definition: strngs.cpp:205
tesseract::NT_MAXPOOL
@ NT_MAXPOOL
Definition: network.h:48
tesseract::NO_BEST_TRAINER
@ NO_BEST_TRAINER
Definition: lstmtrainer.h:58
tesseract::SaveDataToFile
bool SaveDataToFile(const GenericVector< char > &data, const STRING &filename)
Definition: genericvector.h:401
tesseract::kMinRampSize
const int kMinRampSize
Definition: degradeimage.cpp:80
tesseract::TA_LEFT_RAGGED
@ TA_LEFT_RAGGED
Definition: tabvector.h:46
tesseract::NT_PARALLEL
@ NT_PARALLEL
Definition: network.h:49
tesseract::CST_FRAGMENT
@ CST_FRAGMENT
Definition: classify.h:97
tesseract::NPT_WEAK_HTEXT
@ NPT_WEAK_HTEXT
Definition: colpartitiongrid.cpp:1508
tesseract::STR_REPLACED
@ STR_REPLACED
Definition: lstmtrainer.h:66
tesseract::PFR_SKEW
@ PFR_SKEW
Definition: strokewidth.h:48
tesseract::TESSDATA_LSTM_UNICHARSET
@ TESSDATA_LSTM_UNICHARSET
Definition: tessdatamanager.h:78
tesseract::kReverseIfHasRTL
const char kReverseIfHasRTL[]
Definition: trie.cpp:36
PSM_SINGLE_COLUMN
@ PSM_SINGLE_COLUMN
Definition: capi.h:109
tesseract::DAWG_TYPE_PUNCTUATION
@ DAWG_TYPE_PUNCTUATION
Definition: dawg.h:69
tesseract::CT_UNICHAR_TOP1_ERR
@ CT_UNICHAR_TOP1_ERR
Definition: errorcounter.h:74
tesseract::ET_DELTA
@ ET_DELTA
Definition: lstmtrainer.h:39
tesseract::CT_OK_BROKEN
@ CT_OK_BROKEN
Definition: errorcounter.h:80
tesseract::TN_TOP2
@ TN_TOP2
Definition: recodebeam.h:85
tesseract::kCrownRight
const ParagraphModel * kCrownRight
Definition: paragraphs.cpp:72
GenericVector::push_back
int push_back(T object)
Definition: genericvector.h:837