tesseract  4.1.1
ratngs.h
Go to the documentation of this file.
1 /**********************************************************************
2  * File: ratngs.h (Formerly ratings.h)
3  * Description: Definition of the WERD_CHOICE and BLOB_CHOICE classes.
4  * Author: Ray Smith
5  * Created: Thu Apr 23 11:40:38 BST 1992
6  *
7  * (C) Copyright 1992, Hewlett-Packard Ltd.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 #ifndef RATNGS_H
21 #define RATNGS_H
22 
23 #include <cassert>
24 #include <cfloat> // for FLT_MAX
25 
26 #include "clst.h"
27 #include "elst.h"
28 #ifndef DISABLED_LEGACY_ENGINE
29 #include "fontinfo.h"
30 #endif // ndef DISABLED_LEGACY_ENGINE
31 #include "genericvector.h"
32 #include "matrix.h"
33 #include "unichar.h"
34 #include "unicharset.h"
35 #include "werd.h"
36 
37 class MATRIX;
38 struct TBLOB;
39 struct TWERD;
40 
41 // Enum to describe the source of a BLOB_CHOICE to make it possible to determine
42 // whether a blob has been classified by inspecting the BLOB_CHOICEs.
44  BCC_STATIC_CLASSIFIER, // From the char_norm classifier.
45  BCC_ADAPTED_CLASSIFIER, // From the adaptive classifier.
46  BCC_SPECKLE_CLASSIFIER, // Backup for failed classification.
47  BCC_AMBIG, // Generated by ambiguity detection.
48  BCC_FAKE, // From some other process.
49 };
50 
51 class BLOB_CHOICE: public ELIST_LINK
52 {
53  public:
55  unichar_id_ = UNICHAR_SPACE;
56  fontinfo_id_ = -1;
57  fontinfo_id2_ = -1;
58  rating_ = 10.0;
59  certainty_ = -1.0;
60  script_id_ = -1;
61  min_xheight_ = 0.0f;
62  max_xheight_ = 0.0f;
63  yshift_ = 0.0f;
64  classifier_ = BCC_FAKE;
65  }
66  BLOB_CHOICE(UNICHAR_ID src_unichar_id, // character id
67  float src_rating, // rating
68  float src_cert, // certainty
69  int script_id, // script
70  float min_xheight, // min xheight in image pixel units
71  float max_xheight, // max xheight allowed by this char
72  float yshift, // the larger of y shift (top or bottom)
73  BlobChoiceClassifier c); // adapted match or other
74  BLOB_CHOICE(const BLOB_CHOICE &other);
75  ~BLOB_CHOICE() = default;
76 
78  return unichar_id_;
79  }
80  float rating() const {
81  return rating_;
82  }
83  float certainty() const {
84  return certainty_;
85  }
86  int16_t fontinfo_id() const {
87  return fontinfo_id_;
88  }
89  int16_t fontinfo_id2() const {
90  return fontinfo_id2_;
91  }
92  #ifndef DISABLED_LEGACY_ENGINE
94  return fonts_;
95  }
97  fonts_ = fonts;
98  int score1 = 0, score2 = 0;
99  fontinfo_id_ = -1;
100  fontinfo_id2_ = -1;
101  for (int f = 0; f < fonts_.size(); ++f) {
102  if (fonts_[f].score > score1) {
103  score2 = score1;
104  fontinfo_id2_ = fontinfo_id_;
105  score1 = fonts_[f].score;
106  fontinfo_id_ = fonts_[f].fontinfo_id;
107  } else if (fonts_[f].score > score2) {
108  score2 = fonts_[f].score;
109  fontinfo_id2_ = fonts_[f].fontinfo_id;
110  }
111  }
112  }
113  #endif // ndef DISABLED_LEGACY_ENGINE
114  int script_id() const {
115  return script_id_;
116  }
118  return matrix_cell_;
119  }
120  float min_xheight() const {
121  return min_xheight_;
122  }
123  float max_xheight() const {
124  return max_xheight_;
125  }
126  float yshift() const {
127  return yshift_;
128  }
130  return classifier_;
131  }
132  bool IsAdapted() const {
133  return classifier_ == BCC_ADAPTED_CLASSIFIER;
134  }
135  bool IsClassified() const {
136  return classifier_ == BCC_STATIC_CLASSIFIER ||
137  classifier_ == BCC_ADAPTED_CLASSIFIER ||
138  classifier_ == BCC_SPECKLE_CLASSIFIER;
139  }
140 
141  void set_unichar_id(UNICHAR_ID newunichar_id) {
142  unichar_id_ = newunichar_id;
143  }
144  void set_rating(float newrat) {
145  rating_ = newrat;
146  }
147  void set_certainty(float newrat) {
148  certainty_ = newrat;
149  }
150  void set_script(int newscript_id) {
151  script_id_ = newscript_id;
152  }
153  void set_matrix_cell(int col, int row) {
154  matrix_cell_.col = col;
155  matrix_cell_.row = row;
156  }
158  classifier_ = classifier;
159  }
160  static BLOB_CHOICE* deep_copy(const BLOB_CHOICE* src) {
161  auto* choice = new BLOB_CHOICE;
162  *choice = *src;
163  return choice;
164  }
165  // Returns true if *this and other agree on the baseline and x-height
166  // to within some tolerance based on a given estimate of the x-height.
167  bool PosAndSizeAgree(const BLOB_CHOICE& other, float x_height,
168  bool debug) const;
169 
170  void print(const UNICHARSET *unicharset) const {
171  tprintf("r%.2f c%.2f x[%g,%g]: %d %s",
172  rating_, certainty_,
173  min_xheight_, max_xheight_, unichar_id_,
174  (unicharset == nullptr) ? "" :
175  unicharset->debug_str(unichar_id_).string());
176  }
177  void print_full() const {
178  print(nullptr);
179  tprintf(" script=%d, font1=%d, font2=%d, yshift=%g, classifier=%d\n",
180  script_id_, fontinfo_id_, fontinfo_id2_, yshift_, classifier_);
181  }
182  // Sort function for sorting BLOB_CHOICEs in increasing order of rating.
183  static int SortByRating(const void *p1, const void *p2) {
184  const BLOB_CHOICE *bc1 = *static_cast<const BLOB_CHOICE *const *>(p1);
185  const BLOB_CHOICE *bc2 = *static_cast<const BLOB_CHOICE *const *>(p2);
186  return (bc1->rating_ < bc2->rating_) ? -1 : 1;
187  }
188 
189  private:
190  // Copy assignment operator.
191  BLOB_CHOICE& operator=(const BLOB_CHOICE& other);
192 
193  UNICHAR_ID unichar_id_; // unichar id
194 #ifndef DISABLED_LEGACY_ENGINE
195  // Fonts and scores. Allowed to be empty.
197 #endif // ndef DISABLED_LEGACY_ENGINE
198  int16_t fontinfo_id_; // char font information
199  int16_t fontinfo_id2_; // 2nd choice font information
200  // Rating is the classifier distance weighted by the length of the outline
201  // in the blob. In terms of probability, classifier distance is -klog p such
202  // that the resulting distance is in the range [0, 1] and then
203  // rating = w (-k log p) where w is the weight for the length of the outline.
204  // Sums of ratings may be compared meaningfully for words of different
205  // segmentation.
206  float rating_; // size related
207  // Certainty is a number in [-20, 0] indicating the classifier certainty
208  // of the choice. In terms of probability, certainty = 20 (k log p) where
209  // k is defined as above to normalize -klog p to the range [0, 1].
210  float certainty_; // absolute
211  int script_id_;
212  // Holds the position of this choice in the ratings matrix.
213  // Used to location position in the matrix during path backtracking.
214  MATRIX_COORD matrix_cell_;
215  // X-height range (in image pixels) that this classification supports.
216  float min_xheight_;
217  float max_xheight_;
218  // yshift_ - The vertical distance (in image pixels) the character is
219  // shifted (up or down) from an acceptable y position.
220  float yshift_;
221  BlobChoiceClassifier classifier_; // What generated *this.
222 };
223 
224 // Make BLOB_CHOICE listable.
226 
227 // Return the BLOB_CHOICE in bc_list matching a given unichar_id,
228 // or nullptr if there is no match.
229 BLOB_CHOICE *FindMatchingChoice(UNICHAR_ID char_id, BLOB_CHOICE_LIST *bc_list);
230 
231 // Permuter codes used in WERD_CHOICEs.
233  NO_PERM, // 0
234  PUNC_PERM, // 1
246 
248 };
249 
250 namespace tesseract {
251 // ScriptPos tells whether a character is subscript, superscript or normal.
252 enum ScriptPos {
256  SP_DROPCAP
257 };
258 
259 const char *ScriptPosToString(tesseract::ScriptPos script_pos);
260 
261 } // namespace tesseract.
262 
263 class WERD_CHOICE : public ELIST_LINK {
264  public:
265  static const float kBadRating;
266  static const char *permuter_name(uint8_t permuter);
267 
269  : unicharset_(unicharset) { this->init(8); }
270  WERD_CHOICE(const UNICHARSET *unicharset, int reserved)
271  : unicharset_(unicharset) { this->init(reserved); }
272  WERD_CHOICE(const char *src_string,
273  const char *src_lengths,
274  float src_rating,
275  float src_certainty,
276  uint8_t src_permuter,
277  const UNICHARSET &unicharset)
278  : unicharset_(&unicharset) {
279  this->init(src_string, src_lengths, src_rating,
280  src_certainty, src_permuter);
281  }
282  WERD_CHOICE(const char *src_string, const UNICHARSET &unicharset);
284  : ELIST_LINK(word), unicharset_(word.unicharset_) {
285  this->init(word.length());
286  this->operator=(word);
287  }
288  ~WERD_CHOICE();
289 
290  const UNICHARSET *unicharset() const {
291  return unicharset_;
292  }
293  inline int length() const {
294  return length_;
295  }
296  float adjust_factor() const {
297  return adjust_factor_;
298  }
299  void set_adjust_factor(float factor) {
300  adjust_factor_ = factor;
301  }
302  inline const UNICHAR_ID *unichar_ids() const {
303  return unichar_ids_;
304  }
305  inline UNICHAR_ID unichar_id(int index) const {
306  assert(index < length_);
307  return unichar_ids_[index];
308  }
309  inline int state(int index) const {
310  return state_[index];
311  }
313  if (index < 0 || index >= length_)
314  return tesseract::SP_NORMAL;
315  return script_pos_[index];
316  }
317  inline float rating() const {
318  return rating_;
319  }
320  inline float certainty() const {
321  return certainty_;
322  }
323  inline float certainty(int index) const {
324  return certainties_[index];
325  }
326  inline float min_x_height() const {
327  return min_x_height_;
328  }
329  inline float max_x_height() const {
330  return max_x_height_;
331  }
332  inline void set_x_heights(float min_height, float max_height) {
333  min_x_height_ = min_height;
334  max_x_height_ = max_height;
335  }
336  inline uint8_t permuter() const {
337  return permuter_;
338  }
339  const char *permuter_name() const;
340  // Returns the BLOB_CHOICE_LIST corresponding to the given index in the word,
341  // taken from the appropriate cell in the ratings MATRIX.
342  // Borrowed pointer, so do not delete.
343  BLOB_CHOICE_LIST* blob_choices(int index, MATRIX* ratings) const;
344 
345  // Returns the MATRIX_COORD corresponding to the location in the ratings
346  // MATRIX for the given index into the word.
347  MATRIX_COORD MatrixCoord(int index) const;
348 
349  inline void set_unichar_id(UNICHAR_ID unichar_id, int index) {
350  assert(index < length_);
351  unichar_ids_[index] = unichar_id;
352  }
353  bool dangerous_ambig_found() const {
354  return dangerous_ambig_found_;
355  }
356  void set_dangerous_ambig_found_(bool value) {
357  dangerous_ambig_found_ = value;
358  }
359  inline void set_rating(float new_val) {
360  rating_ = new_val;
361  }
362  inline void set_certainty(float new_val) {
363  certainty_ = new_val;
364  }
365  inline void set_permuter(uint8_t perm) {
366  permuter_ = perm;
367  }
368  // Note: this function should only be used if all the fields
369  // are populated manually with set_* functions (rather than
370  // (copy)constructors and append_* functions).
371  inline void set_length(int len) {
372  ASSERT_HOST(reserved_ >= len);
373  length_ = len;
374  }
375 
377  inline void double_the_size() {
378  if (reserved_ > 0) {
380  reserved_, unichar_ids_);
382  reserved_, script_pos_);
384  reserved_, state_);
386  reserved_, certainties_);
387  reserved_ *= 2;
388  } else {
389  unichar_ids_ = new UNICHAR_ID[1];
390  script_pos_ = new tesseract::ScriptPos[1];
391  state_ = new int[1];
392  certainties_ = new float[1];
393  reserved_ = 1;
394  }
395  }
396 
399  inline void init(int reserved) {
400  reserved_ = reserved;
401  if (reserved > 0) {
402  unichar_ids_ = new UNICHAR_ID[reserved];
403  script_pos_ = new tesseract::ScriptPos[reserved];
404  state_ = new int[reserved];
405  certainties_ = new float[reserved];
406  } else {
407  unichar_ids_ = nullptr;
408  script_pos_ = nullptr;
409  state_ = nullptr;
410  certainties_ = nullptr;
411  }
412  length_ = 0;
413  adjust_factor_ = 1.0f;
414  rating_ = 0.0;
415  certainty_ = FLT_MAX;
416  min_x_height_ = 0.0f;
417  max_x_height_ = FLT_MAX;
418  permuter_ = NO_PERM;
419  unichars_in_script_order_ = false; // Tesseract is strict left-to-right.
420  dangerous_ambig_found_ = false;
421  }
422 
428  void init(const char *src_string, const char *src_lengths,
429  float src_rating, float src_certainty,
430  uint8_t src_permuter);
431 
433  inline void make_bad() {
434  length_ = 0;
435  rating_ = kBadRating;
436  certainty_ = -FLT_MAX;
437  }
438 
443  UNICHAR_ID unichar_id, int blob_count,
444  float rating, float certainty) {
445  assert(reserved_ > length_);
446  length_++;
447  this->set_unichar_id(unichar_id, blob_count,
448  rating, certainty, length_-1);
449  }
450 
451  void append_unichar_id(UNICHAR_ID unichar_id, int blob_count,
452  float rating, float certainty);
453 
454  inline void set_unichar_id(UNICHAR_ID unichar_id, int blob_count,
455  float rating, float certainty, int index) {
456  assert(index < length_);
457  unichar_ids_[index] = unichar_id;
458  state_[index] = blob_count;
459  certainties_[index] = certainty;
460  script_pos_[index] = tesseract::SP_NORMAL;
461  rating_ += rating;
462  if (certainty < certainty_) {
463  certainty_ = certainty;
464  }
465  }
466  // Sets the entries for the given index from the BLOB_CHOICE, assuming
467  // unit fragment lengths, but setting the state for this index to blob_count.
468  void set_blob_choice(int index, int blob_count,
469  const BLOB_CHOICE* blob_choice);
470 
472  void remove_unichar_ids(int index, int num);
473  inline void remove_last_unichar_id() { --length_; }
474  inline void remove_unichar_id(int index) {
475  this->remove_unichar_ids(index, 1);
476  }
477  bool has_rtl_unichar_id() const;
479 
480  // Returns the half-open interval of unichar_id indices [start, end) which
481  // enclose the core portion of this word -- the part after stripping
482  // punctuation from the left and right.
483  void punct_stripped(int *start_core, int *end_core) const;
484 
485  // Returns the indices [start, end) containing the core of the word, stripped
486  // of any superscript digits on either side. (i.e., the non-footnote part
487  // of the word). There is no guarantee that the output range is non-empty.
488  void GetNonSuperscriptSpan(int *start, int *end) const;
489 
490  // Return a copy of this WERD_CHOICE with the choices [start, end).
491  // The result is useful only for checking against a dictionary.
492  WERD_CHOICE shallow_copy(int start, int end) const;
493 
494  void string_and_lengths(STRING *word_str, STRING *word_lengths_str) const;
495  const STRING debug_string() const {
496  STRING word_str;
497  for (int i = 0; i < length_; ++i) {
498  word_str += unicharset_->debug_str(unichar_ids_[i]);
499  word_str += " ";
500  }
501  return word_str;
502  }
503  // Returns true if any unichar_id in the word is a non-space-delimited char.
505  for (int i = 0; i < length_; ++i) {
506  if (!unicharset_->IsSpaceDelimited(unichar_ids_[i])) return true;
507  }
508  return false;
509  }
510  // Returns true if the word is all spaces.
511  bool IsAllSpaces() const {
512  for (int i = 0; i < length_; ++i) {
513  if (unichar_ids_[i] != UNICHAR_SPACE) return false;
514  }
515  return true;
516  }
517 
518  // Call this to override the default (strict left to right graphemes)
519  // with the fact that some engine produces a "reading order" set of
520  // Graphemes for each word.
521  bool set_unichars_in_script_order(bool in_script_order) {
522  return unichars_in_script_order_ = in_script_order;
523  }
524 
526  return unichars_in_script_order_;
527  }
528 
529  // Returns a UTF-8 string equivalent to the current choice
530  // of UNICHAR IDs.
531  const STRING &unichar_string() const {
532  this->string_and_lengths(&unichar_string_, &unichar_lengths_);
533  return unichar_string_;
534  }
535 
536  // Returns the lengths, one byte each, representing the number of bytes
537  // required in the unichar_string for each UNICHAR_ID.
538  const STRING &unichar_lengths() const {
539  this->string_and_lengths(&unichar_string_, &unichar_lengths_);
540  return unichar_lengths_;
541  }
542 
543  // Sets up the script_pos_ member using the blobs_list to get the bln
544  // bounding boxes, *this to get the unichars, and this->unicharset
545  // to get the target positions. If small_caps is true, sub/super are not
546  // considered, but dropcaps are.
547  // NOTE: blobs_list should be the chopped_word blobs. (Fully segemented.)
548  void SetScriptPositions(bool small_caps, TWERD* word, int debug = 0);
549  // Sets the script_pos_ member from some source positions with a given length.
550  void SetScriptPositions(const tesseract::ScriptPos* positions, int length);
551  // Sets all the script_pos_ positions to the given position.
553 
554  static tesseract::ScriptPos ScriptPositionOf(bool print_debug,
555  const UNICHARSET& unicharset,
556  const TBOX& blob_box,
558 
559  // Returns the "dominant" script ID for the word. By "dominant", the script
560  // must account for at least half the characters. Otherwise, it returns 0.
561  // Note that for Japanese, Hiragana and Katakana are simply treated as Han.
562  int GetTopScriptID() const;
563 
564  // Fixes the state_ for a chop at the given blob_posiiton.
565  void UpdateStateForSplit(int blob_position);
566 
567  // Returns the sum of all the state elements, being the total number of blobs.
568  int TotalOfStates() const;
569 
570  void print() const { this->print(""); }
571  void print(const char *msg) const;
572  // Prints the segmentation state with an introductory message.
573  void print_state(const char *msg) const;
574 
575  // Displays the segmentation state of *this (if not the same as the last
576  // one displayed) and waits for a click in the window.
577  void DisplaySegmentation(TWERD* word);
578 
579  WERD_CHOICE& operator+= ( // concatanate
580  const WERD_CHOICE & second);// second on first
581 
582  WERD_CHOICE& operator= (const WERD_CHOICE& source);
583 
584  private:
585  const UNICHARSET *unicharset_;
586  // TODO(rays) Perhaps replace the multiple arrays with an array of structs?
587  // unichar_ids_ is an array of classifier "results" that make up a word.
588  // For each unichar_ids_[i], script_pos_[i] has the sub/super/normal position
589  // of each unichar_id.
590  // state_[i] indicates the number of blobs in WERD_RES::chopped_word that
591  // were put together to make the classification results in the ith position
592  // in unichar_ids_, and certainties_[i] is the certainty of the choice that
593  // was used in this word.
594  // == Change from before ==
595  // Previously there was fragment_lengths_ that allowed a word to be
596  // artificially composed of multiple fragment results. Since the new
597  // segmentation search doesn't do fragments, treatment of fragments has
598  // been moved to a lower level, augmenting the ratings matrix with the
599  // combined fragments, and allowing the language-model/segmentation-search
600  // to deal with only the combined unichar_ids.
601  UNICHAR_ID *unichar_ids_; // unichar ids that represent the text of the word
602  tesseract::ScriptPos* script_pos_; // Normal/Sub/Superscript of each unichar.
603  int* state_; // Number of blobs in each unichar.
604  float* certainties_; // Certainty of each unichar.
605  int reserved_; // size of the above arrays
606  int length_; // word length
607  // Factor that was used to adjust the rating.
608  float adjust_factor_;
609  // Rating is the sum of the ratings of the individual blobs in the word.
610  float rating_; // size related
611  // certainty is the min (worst) certainty of the individual blobs in the word.
612  float certainty_; // absolute
613  // xheight computed from the result, or 0 if inconsistent.
614  float min_x_height_;
615  float max_x_height_;
616  uint8_t permuter_; // permuter code
617 
618  // Normally, the ratings_ matrix represents the recognition results in order
619  // from left-to-right. However, some engines (say Cube) may return
620  // recognition results in the order of the script's major reading direction
621  // (for Arabic, that is right-to-left).
622  bool unichars_in_script_order_;
623  // True if NoDangerousAmbig found an ambiguity.
624  bool dangerous_ambig_found_;
625 
626  // The following variables are populated and passed by reference any
627  // time unichar_string() or unichar_lengths() are called.
628  mutable STRING unichar_string_;
629  mutable STRING unichar_lengths_;
630 };
631 
632 // Make WERD_CHOICE listable.
634 using BLOB_CHOICE_LIST_VECTOR = GenericVector<BLOB_CHOICE_LIST *>;
635 
636 // Utilities for comparing WERD_CHOICEs
637 
639  const WERD_CHOICE &word2);
640 
641 // Utilities for debug printing.
642 void print_ratings_list(
643  const char *msg, // intro message
644  BLOB_CHOICE_LIST *ratings, // list of results
645  const UNICHARSET &current_unicharset // unicharset that can be used
646  // for id-to-unichar conversion
647  );
648 
649 #endif
WERD_CHOICE::dangerous_ambig_found
bool dangerous_ambig_found() const
Definition: ratngs.h:353
TBOX
Definition: rect.h:34
PUNC_PERM
@ PUNC_PERM
Definition: ratngs.h:234
WERD_CHOICE::permuter_name
const char * permuter_name() const
Definition: ratngs.cpp:287
WERD_CHOICE::set_unichar_id
void set_unichar_id(UNICHAR_ID unichar_id, int index)
Definition: ratngs.h:349
WERD_CHOICE::DisplaySegmentation
void DisplaySegmentation(TWERD *word)
Definition: ratngs.cpp:765
WERD_CHOICE::state
int state(int index) const
Definition: ratngs.h:309
STRING::string
const char * string() const
Definition: strngs.cpp:194
MATRIX_COORD::col
int col
Definition: matrix.h:636
BLOB_CHOICE::~BLOB_CHOICE
~BLOB_CHOICE()=default
WERD_CHOICE::remove_last_unichar_id
void remove_last_unichar_id()
Definition: ratngs.h:473
WERD_CHOICE::has_rtl_unichar_id
bool has_rtl_unichar_id() const
Definition: ratngs.cpp:435
WERD_CHOICE::BlobPosition
tesseract::ScriptPos BlobPosition(int index) const
Definition: ratngs.h:312
BLOB_CHOICE::unichar_id
UNICHAR_ID unichar_id() const
Definition: ratngs.h:77
WERD_CHOICE::debug_string
const STRING debug_string() const
Definition: ratngs.h:495
BLOB_CHOICE
Definition: ratngs.h:52
MATRIX_COORD
Definition: matrix.h:608
TWERD
Definition: blobs.h:418
BLOB_CHOICE::fontinfo_id2
int16_t fontinfo_id2() const
Definition: ratngs.h:89
WERD_CHOICE::IsAllSpaces
bool IsAllSpaces() const
Definition: ratngs.h:511
UPPER_CASE_PERM
@ UPPER_CASE_PERM
Definition: ratngs.h:237
WERD_CHOICE::set_unichar_id
void set_unichar_id(UNICHAR_ID unichar_id, int blob_count, float rating, float certainty, int index)
Definition: ratngs.h:454
WERD_CHOICE::SetAllScriptPositions
void SetAllScriptPositions(tesseract::ScriptPos position)
Definition: ratngs.cpp:627
WERD_CHOICE::unichar_string
const STRING & unichar_string() const
Definition: ratngs.h:531
WERD_CHOICE::set_dangerous_ambig_found_
void set_dangerous_ambig_found_(bool value)
Definition: ratngs.h:356
clst.h
WERD_CHOICE::MatrixCoord
MATRIX_COORD MatrixCoord(int index) const
Definition: ratngs.cpp:306
WERD_CHOICE::set_x_heights
void set_x_heights(float min_height, float max_height)
Definition: ratngs.h:332
NO_PERM
@ NO_PERM
Definition: ratngs.h:233
WERD_CHOICE::punct_stripped
void punct_stripped(int *start_core, int *end_core) const
Definition: ratngs.cpp:387
werd.h
SYSTEM_DAWG_PERM
@ SYSTEM_DAWG_PERM
Definition: ratngs.h:241
UNICHARSET::IsSpaceDelimited
bool IsSpaceDelimited(UNICHAR_ID unichar_id) const
Definition: unicharset.h:652
FindMatchingChoice
BLOB_CHOICE * FindMatchingChoice(UNICHAR_ID char_id, BLOB_CHOICE_LIST *bc_list)
Definition: ratngs.cpp:184
WERD_CHOICE::WERD_CHOICE
WERD_CHOICE(const UNICHARSET *unicharset, int reserved)
Definition: ratngs.h:270
ELISTIZEH
#define ELISTIZEH(CLASSNAME)
Definition: elst.h:918
WERD_CHOICE::certainty
float certainty(int index) const
Definition: ratngs.h:323
tesseract
Definition: altorenderer.cpp:25
BLOB_CHOICE::rating
float rating() const
Definition: ratngs.h:80
DOC_DAWG_PERM
@ DOC_DAWG_PERM
Definition: ratngs.h:242
WERD_CHOICE::WERD_CHOICE
WERD_CHOICE(const UNICHARSET *unicharset)
Definition: ratngs.h:268
MATRIX_COORD::row
int row
Definition: matrix.h:637
WERD_CHOICE::remove_unichar_ids
void remove_unichar_ids(int index, int num)
Definition: ratngs.cpp:346
WERD_CHOICE::rating
float rating() const
Definition: ratngs.h:317
WERD_CHOICE::remove_unichar_id
void remove_unichar_id(int index)
Definition: ratngs.h:474
ELIST_LINK
Definition: elst.h:78
GenericVector< tesseract::ScoredFont >
BCC_AMBIG
@ BCC_AMBIG
Definition: ratngs.h:47
WERD_CHOICE::set_certainty
void set_certainty(float new_val)
Definition: ratngs.h:362
UNICHARSET::debug_str
STRING debug_str(UNICHAR_ID id) const
Definition: unicharset.cpp:343
genericvector.h
tesseract::SP_SUBSCRIPT
@ SP_SUBSCRIPT
Definition: ratngs.h:254
UNICHAR_ID
int UNICHAR_ID
Definition: unichar.h:34
BLOB_CHOICE::print
void print(const UNICHARSET *unicharset) const
Definition: ratngs.h:170
WERD_CHOICE::unichar_id
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:305
print_ratings_list
void print_ratings_list(const char *msg, BLOB_CHOICE_LIST *ratings, const UNICHARSET &current_unicharset)
Definition: ratngs.cpp:837
tprintf
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
WERD_CHOICE::blob_choices
BLOB_CHOICE_LIST * blob_choices(int index, MATRIX *ratings) const
Definition: ratngs.cpp:294
COMPOUND_PERM
@ COMPOUND_PERM
Definition: ratngs.h:245
WERD_CHOICE::operator=
WERD_CHOICE & operator=(const WERD_CHOICE &source)
Definition: ratngs.cpp:525
ASSERT_HOST
#define ASSERT_HOST(x)
Definition: errcode.h:88
BLOB_CHOICE::max_xheight
float max_xheight() const
Definition: ratngs.h:123
PermuterType
PermuterType
Definition: ratngs.h:232
BLOB_CHOICE::set_classifier
void set_classifier(BlobChoiceClassifier classifier)
Definition: ratngs.h:157
matrix.h
unichar.h
BLOB_CHOICE::fonts
const GenericVector< tesseract::ScoredFont > & fonts() const
Definition: ratngs.h:93
tesseract::ScriptPos
ScriptPos
Definition: ratngs.h:252
WERD_CHOICE::ScriptPositionOf
static tesseract::ScriptPos ScriptPositionOf(bool print_debug, const UNICHARSET &unicharset, const TBOX &blob_box, UNICHAR_ID unichar_id)
Definition: ratngs.cpp:633
WERD_CHOICE::unichars_in_script_order
bool unichars_in_script_order() const
Definition: ratngs.h:525
BLOB_CHOICE::set_rating
void set_rating(float newrat)
Definition: ratngs.h:144
WERD_CHOICE::unichar_lengths
const STRING & unichar_lengths() const
Definition: ratngs.h:538
WERD_CHOICE::append_unichar_id
void append_unichar_id(UNICHAR_ID unichar_id, int blob_count, float rating, float certainty)
Definition: ratngs.cpp:472
elst.h
BCC_ADAPTED_CLASSIFIER
@ BCC_ADAPTED_CLASSIFIER
Definition: ratngs.h:45
tesseract::SP_NORMAL
@ SP_NORMAL
Definition: ratngs.h:253
WERD_CHOICE::set_blob_choice
void set_blob_choice(int index, int blob_count, const BLOB_CHOICE *blob_choice)
Definition: ratngs.cpp:316
WERD_CHOICE::unichar_ids
const UNICHAR_ID * unichar_ids() const
Definition: ratngs.h:302
BLOB_CHOICE::PosAndSizeAgree
bool PosAndSizeAgree(const BLOB_CHOICE &other, float x_height, bool debug) const
Definition: ratngs.cpp:156
BLOB_CHOICE::IsClassified
bool IsClassified() const
Definition: ratngs.h:135
WERD_CHOICE::max_x_height
float max_x_height() const
Definition: ratngs.h:329
unicharset.h
NUMBER_PERM
@ NUMBER_PERM
Definition: ratngs.h:239
WERD_CHOICE::set_permuter
void set_permuter(uint8_t perm)
Definition: ratngs.h:365
WERD_CHOICE::certainty
float certainty() const
Definition: ratngs.h:320
WERD_CHOICE::set_unichars_in_script_order
bool set_unichars_in_script_order(bool in_script_order)
Definition: ratngs.h:521
BLOB_CHOICE::min_xheight
float min_xheight() const
Definition: ratngs.h:120
TOP_CHOICE_PERM
@ TOP_CHOICE_PERM
Definition: ratngs.h:235
BLOB_CHOICE::deep_copy
static BLOB_CHOICE * deep_copy(const BLOB_CHOICE *src)
Definition: ratngs.h:160
WERD_CHOICE::min_x_height
float min_x_height() const
Definition: ratngs.h:326
BLOB_CHOICE::set_fonts
void set_fonts(const GenericVector< tesseract::ScoredFont > &fonts)
Definition: ratngs.h:96
BlobChoiceClassifier
BlobChoiceClassifier
Definition: ratngs.h:43
MATRIX
Definition: matrix.h:578
WERD_CHOICE::unicharset
const UNICHARSET * unicharset() const
Definition: ratngs.h:290
GenericVector::size
int size() const
Definition: genericvector.h:72
TBLOB
Definition: blobs.h:284
tesseract::SP_DROPCAP
@ SP_DROPCAP
Definition: ratngs.h:256
BLOB_CHOICE::set_certainty
void set_certainty(float newrat)
Definition: ratngs.h:147
WERD_CHOICE::make_bad
void make_bad()
Set the fields in this choice to be default (bad) values.
Definition: ratngs.h:433
WERD_CHOICE::double_the_size
void double_the_size()
Make more space in unichar_id_ and fragment_lengths_ arrays.
Definition: ratngs.h:377
BLOB_CHOICE::classifier
BlobChoiceClassifier classifier() const
Definition: ratngs.h:129
BCC_STATIC_CLASSIFIER
@ BCC_STATIC_CLASSIFIER
Definition: ratngs.h:44
USER_DAWG_PERM
@ USER_DAWG_PERM
Definition: ratngs.h:243
WERD_CHOICE::~WERD_CHOICE
~WERD_CHOICE()
Definition: ratngs.cpp:280
WERD_CHOICE::shallow_copy
WERD_CHOICE shallow_copy(int start, int end) const
Definition: ratngs.cpp:418
WERD_CHOICE::adjust_factor
float adjust_factor() const
Definition: ratngs.h:296
fontinfo.h
WERD_CHOICE::GetTopScriptID
int GetTopScriptID() const
Definition: ratngs.cpp:671
WERD_CHOICE::set_adjust_factor
void set_adjust_factor(float factor)
Definition: ratngs.h:299
WERD_CHOICE::append_unichar_id_space_allocated
void append_unichar_id_space_allocated(UNICHAR_ID unichar_id, int blob_count, float rating, float certainty)
Definition: ratngs.h:442
WERD_CHOICE::reverse_and_mirror_unichar_ids
void reverse_and_mirror_unichar_ids()
Definition: ratngs.cpp:369
BLOB_CHOICE::yshift
float yshift() const
Definition: ratngs.h:126
WERD_CHOICE::WERD_CHOICE
WERD_CHOICE(const char *src_string, const char *src_lengths, float src_rating, float src_certainty, uint8_t src_permuter, const UNICHARSET &unicharset)
Definition: ratngs.h:272
BLOB_CHOICE::script_id
int script_id() const
Definition: ratngs.h:114
WERD_CHOICE::kBadRating
static const float kBadRating
Definition: ratngs.h:265
GenericVector::double_the_size_memcpy
static T * double_the_size_memcpy(int current_size, T *data)
Definition: genericvector.h:213
WERD_CHOICE::print_state
void print_state(const char *msg) const
Definition: ratngs.cpp:756
FREQ_DAWG_PERM
@ FREQ_DAWG_PERM
Definition: ratngs.h:244
BLOB_CHOICE::fontinfo_id
int16_t fontinfo_id() const
Definition: ratngs.h:86
tesseract::SP_SUPERSCRIPT
@ SP_SUPERSCRIPT
Definition: ratngs.h:255
WERD_CHOICE::permuter
uint8_t permuter() const
Definition: ratngs.h:336
BLOB_CHOICE::set_script
void set_script(int newscript_id)
Definition: ratngs.h:150
WERD_CHOICE::operator+=
WERD_CHOICE & operator+=(const WERD_CHOICE &second)
Definition: ratngs.cpp:489
BLOB_CHOICE::BLOB_CHOICE
BLOB_CHOICE()
Definition: ratngs.h:54
WERD_CHOICE::length
int length() const
Definition: ratngs.h:293
BLOB_CHOICE::print_full
void print_full() const
Definition: ratngs.h:177
BCC_FAKE
@ BCC_FAKE
Definition: ratngs.h:48
BLOB_CHOICE::certainty
float certainty() const
Definition: ratngs.h:83
WERD_CHOICE::TotalOfStates
int TotalOfStates() const
Definition: ratngs.cpp:715
BLOB_CHOICE::matrix_cell
const MATRIX_COORD & matrix_cell()
Definition: ratngs.h:117
WERD_CHOICE::contains_unichar_id
bool contains_unichar_id(UNICHAR_ID unichar_id) const
Definition: ratngs.cpp:330
WERD_CHOICE::string_and_lengths
void string_and_lengths(STRING *word_str, STRING *word_lengths_str) const
Definition: ratngs.cpp:453
BLOB_CHOICE::IsAdapted
bool IsAdapted() const
Definition: ratngs.h:132
WERD_CHOICE::set_length
void set_length(int len)
Definition: ratngs.h:371
STRING
Definition: strngs.h:45
USER_PATTERN_PERM
@ USER_PATTERN_PERM
Definition: ratngs.h:240
EqualIgnoringCaseAndTerminalPunct
bool EqualIgnoringCaseAndTerminalPunct(const WERD_CHOICE &word1, const WERD_CHOICE &word2)
Definition: ratngs.cpp:809
NGRAM_PERM
@ NGRAM_PERM
Definition: ratngs.h:238
WERD_CHOICE::print
void print() const
Definition: ratngs.h:570
WERD_CHOICE::UpdateStateForSplit
void UpdateStateForSplit(int blob_position)
Definition: ratngs.cpp:703
WERD_CHOICE::ContainsAnyNonSpaceDelimited
bool ContainsAnyNonSpaceDelimited() const
Definition: ratngs.h:504
UNICHARSET
Definition: unicharset.h:145
BCC_SPECKLE_CLASSIFIER
@ BCC_SPECKLE_CLASSIFIER
Definition: ratngs.h:46
WERD_CHOICE
Definition: ratngs.h:263
UNICHAR_SPACE
@ UNICHAR_SPACE
Definition: unicharset.h:34
tesseract::ScriptPosToString
const char * ScriptPosToString(enum ScriptPos script_pos)
Definition: ratngs.cpp:204
NUM_PERMUTER_TYPES
@ NUM_PERMUTER_TYPES
Definition: ratngs.h:247
LOWER_CASE_PERM
@ LOWER_CASE_PERM
Definition: ratngs.h:236
WERD_CHOICE::WERD_CHOICE
WERD_CHOICE(const WERD_CHOICE &word)
Definition: ratngs.h:283
WERD_CHOICE::init
void init(int reserved)
Definition: ratngs.h:399
BLOB_CHOICE::set_unichar_id
void set_unichar_id(UNICHAR_ID newunichar_id)
Definition: ratngs.h:141
WERD_CHOICE::GetNonSuperscriptSpan
void GetNonSuperscriptSpan(int *start, int *end) const
Definition: ratngs.cpp:401
WERD_CHOICE::set_rating
void set_rating(float new_val)
Definition: ratngs.h:359
BLOB_CHOICE::set_matrix_cell
void set_matrix_cell(int col, int row)
Definition: ratngs.h:153
WERD_CHOICE::SetScriptPositions
void SetScriptPositions(bool small_caps, TWERD *word, int debug=0)
Definition: ratngs.cpp:554
BLOB_CHOICE::SortByRating
static int SortByRating(const void *p1, const void *p2)
Definition: ratngs.h:183