tesseract  4.1.1
chopper.cpp
Go to the documentation of this file.
1 /* -*-C-*-
2  ********************************************************************************
3  *
4  * File: chopper.cpp (Formerly chopper.c)
5  * Author: Mark Seaman, OCR Technology
6  *
7  * (c) Copyright 1987, Hewlett-Packard Company.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **************************************************************************/
19 
20 /*----------------------------------------------------------------------
21  I n c l u d e s
22 ----------------------------------------------------------------------*/
23 
24 #include "blamer.h" // for BlamerBundle, IRR_CORRECT
25 #include "blobs.h" // for TPOINT, TBLOB, EDGEPT, TESSLINE, divisible_blob
26 #include "callcpp.h" // for Red
27 #include "dict.h" // for Dict
28 #include "lm_pain_points.h" // for LMPainPoints
29 #include "lm_state.h" // for BestChoiceBundle
30 #include "matrix.h" // for MATRIX
31 #include "normalis.h" // for DENORM
32 #include "pageres.h" // for WERD_RES
33 #include "params.h" // for IntParam, BoolParam
34 #include "ratngs.h" // for BLOB_CHOICE (ptr only), BLOB_CHOICE_LIST (ptr ...
35 #include "rect.h" // for TBOX
36 #include "render.h" // for display_blob
37 #include "seam.h" // for SEAM
38 #include "split.h" // for remove_edgept
39 #include "stopper.h" // for DANGERR
40 #include "tprintf.h" // for tprintf
41 #include "wordrec.h" // for Wordrec, SegSearchPending (ptr only)
42 
43 template <typename T> class GenericVector;
44 
45 // Include automatically generated configuration file if running autoconf.
46 #ifdef HAVE_CONFIG_H
47 #include "config_auto.h"
48 #endif
49 
50 // Even though the limit on the number of chunks may now be removed, keep
51 // the same limit for repeatable behavior, and it may be a speed advantage.
52 static const int kMaxNumChunks = 64;
53 
54 /*----------------------------------------------------------------------
55  F u n c t i o n s
56 ----------------------------------------------------------------------*/
57 
63 static int check_blob(TBLOB *blob) {
64  TESSLINE *outline;
65  EDGEPT *edgept;
66 
67  for (outline = blob->outlines; outline != nullptr; outline = outline->next) {
68  edgept = outline->loop;
69  do {
70  if (edgept == nullptr)
71  break;
72  edgept = edgept->next;
73  }
74  while (edgept != outline->loop);
75  if (edgept == nullptr)
76  return 1;
77  }
78  return 0;
79 }
80 
86 static int any_shared_split_points(const GenericVector<SEAM*>& seams, SEAM *seam) {
87  int length;
88  int index;
89 
90  length = seams.size();
91  for (index = 0; index < length; index++)
92  if (seam->SharesPosition(*seams[index])) return true;
93  return false;
94 }
95 
101 static void preserve_outline(EDGEPT *start) {
102  EDGEPT *srcpt;
103 
104  if (start == nullptr)
105  return;
106  srcpt = start;
107  do {
108  srcpt->flags[1] = 1;
109  srcpt = srcpt->next;
110  }
111  while (srcpt != start);
112  srcpt->flags[1] = 2;
113 }
114 
115 static void preserve_outline_tree(TESSLINE *srcline) {
116  TESSLINE *outline;
117 
118  for (outline = srcline; outline != nullptr; outline = outline->next) {
119  preserve_outline (outline->loop);
120  }
121 }
122 
128 static EDGEPT *restore_outline(EDGEPT *start) {
129  EDGEPT *srcpt;
130  EDGEPT *real_start;
131 
132  if (start == nullptr)
133  return nullptr;
134  srcpt = start;
135  do {
136  if (srcpt->flags[1] == 2)
137  break;
138  srcpt = srcpt->next;
139  }
140  while (srcpt != start);
141  real_start = srcpt;
142  do {
143  srcpt = srcpt->next;
144  if (srcpt->prev->flags[1] == 0) {
145  remove_edgept(srcpt->prev);
146  }
147  }
148  while (srcpt != real_start);
149  return real_start;
150 }
151 
152 static void restore_outline_tree(TESSLINE *srcline) {
153  TESSLINE *outline;
154 
155  for (outline = srcline; outline != nullptr; outline = outline->next) {
156  outline->loop = restore_outline (outline->loop);
157  outline->start = outline->loop->pos;
158  }
159 }
160 
161 /**********************************************************************
162  * total_containment
163  *
164  * Check to see if one of these outlines is totally contained within
165  * the bounding box of the other.
166  **********************************************************************/
167 static int16_t total_containment(TBLOB *blob1, TBLOB *blob2) {
168  TBOX box1 = blob1->bounding_box();
169  TBOX box2 = blob2->bounding_box();
170  return box1.contains(box2) || box2.contains(box1);
171 }
172 
173 // Helper runs all the checks on a seam to make sure it is valid.
174 // Returns the seam if OK, otherwise deletes the seam and returns nullptr.
175 static SEAM* CheckSeam(int debug_level, int32_t blob_number, TWERD* word,
176  TBLOB* blob, TBLOB* other_blob,
177  const GenericVector<SEAM*>& seams, SEAM* seam) {
178  if (seam == nullptr || blob->outlines == nullptr || other_blob->outlines == nullptr ||
179  total_containment(blob, other_blob) || check_blob(other_blob) ||
180  !seam->ContainedByBlob(*blob) || !seam->ContainedByBlob(*other_blob) ||
181  any_shared_split_points(seams, seam) ||
182  !seam->PrepareToInsertSeam(seams, word->blobs, blob_number, false)) {
183  word->blobs.remove(blob_number + 1);
184  if (seam) {
185  seam->UndoSeam(blob, other_blob);
186  delete seam;
187  seam = nullptr;
188 #ifndef GRAPHICS_DISABLED
189  if (debug_level) {
190  if (debug_level >2)
191  display_blob(blob, Red);
192  tprintf("\n** seam being removed ** \n");
193  }
194 #endif
195  } else {
196  delete other_blob;
197  }
198  return nullptr;
199  }
200  return seam;
201 }
202 
203 namespace tesseract {
204 
211 SEAM *Wordrec::attempt_blob_chop(TWERD *word, TBLOB *blob, int32_t blob_number,
212  bool italic_blob,
213  const GenericVector<SEAM*>& seams) {
215  preserve_outline_tree (blob->outlines);
216  TBLOB *other_blob = TBLOB::ShallowCopy(*blob); /* Make new blob */
217  // Insert it into the word.
218  word->blobs.insert(other_blob, blob_number + 1);
219 
220  SEAM *seam = nullptr;
221  if (prioritize_division) {
222  TPOINT location;
223  if (divisible_blob(blob, italic_blob, &location)) {
224  seam = new SEAM(0.0f, location);
225  }
226  }
227  if (seam == nullptr)
228  seam = pick_good_seam(blob);
229  if (chop_debug) {
230  if (seam != nullptr)
231  seam->Print("Good seam picked=");
232  else
233  tprintf("\n** no seam picked *** \n");
234  }
235  if (seam) {
236  seam->ApplySeam(italic_blob, blob, other_blob);
237  }
238 
239  seam = CheckSeam(chop_debug, blob_number, word, blob, other_blob,
240  seams, seam);
241  if (seam == nullptr) {
243  restore_outline_tree(blob->outlines);
245  // If the blob can simply be divided into outlines, then do that.
246  TPOINT location;
247  if (divisible_blob(blob, italic_blob, &location)) {
248  other_blob = TBLOB::ShallowCopy(*blob); /* Make new blob */
249  word->blobs.insert(other_blob, blob_number + 1);
250  seam = new SEAM(0.0f, location);
251  seam->ApplySeam(italic_blob, blob, other_blob);
252  seam = CheckSeam(chop_debug, blob_number, word, blob, other_blob,
253  seams, seam);
254  }
255  }
256  }
257  if (seam != nullptr) {
258  // Make sure this seam doesn't get chopped again.
259  seam->Finalize();
260  }
261  return seam;
262 }
263 
264 
265 SEAM *Wordrec::chop_numbered_blob(TWERD *word, int32_t blob_number,
266  bool italic_blob,
267  const GenericVector<SEAM*>& seams) {
268  return attempt_blob_chop(word, word->blobs[blob_number], blob_number,
269  italic_blob, seams);
270 }
271 
272 
274  bool italic_blob, WERD_RES *word_res,
275  int *blob_number) {
276  TWERD *word = word_res->chopped_word;
277  for (*blob_number = 0; *blob_number < word->NumBlobs(); ++*blob_number) {
278  TBLOB *blob = word->blobs[*blob_number];
279  TPOINT topleft, botright;
280  topleft.x = blob->bounding_box().left();
281  topleft.y = blob->bounding_box().top();
282  botright.x = blob->bounding_box().right();
283  botright.y = blob->bounding_box().bottom();
284 
285  TPOINT original_topleft, original_botright;
286  word_res->denorm.DenormTransform(nullptr, topleft, &original_topleft);
287  word_res->denorm.DenormTransform(nullptr, botright, &original_botright);
288 
289  TBOX original_box = TBOX(original_topleft.x, original_botright.y,
290  original_botright.x, original_topleft.y);
291 
292  bool almost_equal_box = false;
293  int num_overlap = 0;
294  for (int i = 0; i < boxes.size(); i++) {
295  if (original_box.overlap_fraction(boxes[i]) > 0.125)
296  num_overlap++;
297  if (original_box.almost_equal(boxes[i], 3))
298  almost_equal_box = true;
299  }
300 
301  TPOINT location;
302  if (divisible_blob(blob, italic_blob, &location) ||
303  (!almost_equal_box && num_overlap > 1)) {
304  SEAM *seam = attempt_blob_chop(word, blob, *blob_number,
305  italic_blob, word_res->seam_array);
306  if (seam != nullptr)
307  return seam;
308  }
309  }
310 
311  *blob_number = -1;
312  return nullptr;
313 }
314 
328  DANGERR *fixpt,
329  bool split_next_to_fragment,
330  bool italic_blob,
331  WERD_RES* word,
332  int* blob_number) {
333  float rating_ceiling = FLT_MAX;
334  SEAM *seam = nullptr;
335  do {
336  *blob_number = select_blob_to_split_from_fixpt(fixpt);
337  if (chop_debug) tprintf("blob_number from fixpt = %d\n", *blob_number);
338  bool split_point_from_dict = (*blob_number != -1);
339  if (split_point_from_dict) {
340  fixpt->clear();
341  } else {
342  *blob_number = select_blob_to_split(blob_choices, rating_ceiling,
343  split_next_to_fragment);
344  }
345  if (chop_debug) tprintf("blob_number = %d\n", *blob_number);
346  if (*blob_number == -1)
347  return nullptr;
348 
349  // TODO(rays) it may eventually help to allow italic_blob to be true,
350  seam = chop_numbered_blob(word->chopped_word, *blob_number, italic_blob,
351  word->seam_array);
352  if (seam != nullptr)
353  return seam; // Success!
354  if (blob_choices[*blob_number] == nullptr)
355  return nullptr;
356  if (!split_point_from_dict) {
357  // We chopped the worst rated blob, try something else next time.
358  rating_ceiling = blob_choices[*blob_number]->rating();
359  }
360  } while (true);
361  return seam;
362 }
363 
372  const GenericVector<BLOB_CHOICE*>& blob_choices,
373  WERD_RES* word_res,
374  int* blob_number) {
375  if (prioritize_division) {
376  return chop_overlapping_blob(boxes, true, word_res, blob_number);
377  } else {
378  return improve_one_blob(blob_choices, nullptr, false, true, word_res,
379  blob_number);
380  }
381 }
382 
392  int num_blobs = word->chopped_word->NumBlobs();
393  if (word->ratings == nullptr) {
394  word->ratings = new MATRIX(num_blobs, wordrec_max_join_chunks);
395  }
396  if (word->ratings->get(0, 0) == nullptr) {
397  // Run initial classification.
398  for (int b = 0; b < num_blobs; ++b) {
399  BLOB_CHOICE_LIST* choices = classify_piece(word->seam_array, b, b,
400  "Initial:", word->chopped_word,
401  word->blamer_bundle);
402  word->ratings->put(b, b, choices);
403  }
404  } else {
405  // Blobs have been pre-classified. Set matrix cell for all blob choices
406  for (int col = 0; col < word->ratings->dimension(); ++col) {
407  for (int row = col; row < word->ratings->dimension() &&
408  row < col + word->ratings->bandwidth(); ++row) {
409  BLOB_CHOICE_LIST* choices = word->ratings->get(col, row);
410  if (choices != nullptr) {
411  BLOB_CHOICE_IT bc_it(choices);
412  for (bc_it.mark_cycle_pt(); !bc_it.cycled_list(); bc_it.forward()) {
413  bc_it.data()->set_matrix_cell(col, row);
414  }
415  }
416  }
417  }
418  }
419 
420  // Run Segmentation Search.
421  BestChoiceBundle best_choice_bundle(word->ratings->dimension());
422  SegSearch(word, &best_choice_bundle, word->blamer_bundle);
423 
424  if (word->best_choice == nullptr) {
425  // SegSearch found no valid paths, so just use the leading diagonal.
427  }
428  word->RebuildBestState();
429  // If we finished without a hyphen at the end of the word, let the next word
430  // be found in the dictionary.
431  if (word->word->flag(W_EOL) &&
432  !getDict().has_hyphen_end(*word->best_choice)) {
433  getDict().reset_hyphen_vars(true);
434  }
435 
436  if (word->blamer_bundle != nullptr && this->fill_lattice_ != nullptr) {
437  CallFillLattice(*word->ratings, word->best_choices,
438  *word->uch_set, word->blamer_bundle);
439  }
440  if (wordrec_debug_level > 0) {
441  tprintf("Final Ratings Matrix:\n");
442  word->ratings->print(getDict().getUnicharset());
443  }
444  word->FilterWordChoices(getDict().stopper_debug_level);
445 }
446 
454 void Wordrec::improve_by_chopping(float rating_cert_scale,
455  WERD_RES* word,
456  BestChoiceBundle* best_choice_bundle,
457  BlamerBundle* blamer_bundle,
458  LMPainPoints* pain_points,
460  int blob_number;
461  do { // improvement loop.
462  // Make a simple vector of BLOB_CHOICEs to make it easy to pick which
463  // one to chop.
464  GenericVector<BLOB_CHOICE*> blob_choices;
465  int num_blobs = word->ratings->dimension();
466  for (int i = 0; i < num_blobs; ++i) {
467  BLOB_CHOICE_LIST* choices = word->ratings->get(i, i);
468  if (choices == nullptr || choices->empty()) {
469  blob_choices.push_back(nullptr);
470  } else {
471  BLOB_CHOICE_IT bc_it(choices);
472  blob_choices.push_back(bc_it.data());
473  }
474  }
475  SEAM* seam = improve_one_blob(blob_choices, &best_choice_bundle->fixpt,
476  false, false, word, &blob_number);
477  if (seam == nullptr) break;
478  // A chop has been made. We have to correct all the data structures to
479  // take into account the extra bottom-level blob.
480  // Put the seam into the seam_array and correct everything else on the
481  // word: ratings matrix (including matrix location in the BLOB_CHOICES),
482  // states in WERD_CHOICEs, and blob widths.
483  word->InsertSeam(blob_number, seam);
484  // Insert a new entry in the beam array.
485  best_choice_bundle->beam.insert(new LanguageModelState, blob_number);
486  // Fixpts are outdated, but will get recalculated.
487  best_choice_bundle->fixpt.clear();
488  // Remap existing pain points.
489  pain_points->RemapForSplit(blob_number);
490  // Insert a new pending at the chop point.
491  pending->insert(SegSearchPending(), blob_number);
492 
493  // Classify the two newly created blobs using ProcessSegSearchPainPoint,
494  // as that updates the pending correctly and adds new pain points.
495  MATRIX_COORD pain_point(blob_number, blob_number);
496  ProcessSegSearchPainPoint(0.0f, pain_point, "Chop1", pending, word,
497  pain_points, blamer_bundle);
498  pain_point.col = blob_number + 1;
499  pain_point.row = blob_number + 1;
500  ProcessSegSearchPainPoint(0.0f, pain_point, "Chop2", pending, word,
501  pain_points, blamer_bundle);
502  if (language_model_->language_model_ngram_on) {
503  // N-gram evaluation depends on the number of blobs in a chunk, so we
504  // have to re-evaluate everything in the word.
505  ResetNGramSearch(word, best_choice_bundle, pending);
506  blob_number = 0;
507  }
508  // Run language model incrementally. (Except with the n-gram model on.)
509  UpdateSegSearchNodes(rating_cert_scale, blob_number, pending,
510  word, pain_points, best_choice_bundle, blamer_bundle);
511  } while (!language_model_->AcceptableChoiceFound() &&
512  word->ratings->dimension() < kMaxNumChunks);
513 
514  // If after running only the chopper best_choice is incorrect and no blame
515  // has been yet set, blame the classifier if best_choice is classifier's
516  // top choice and is a dictionary word (i.e. language model could not have
517  // helped). Otherwise blame the tradeoff between the classifier and
518  // the old language model (permuters).
519  if (word->blamer_bundle != nullptr &&
521  !word->blamer_bundle->ChoiceIsCorrect(word->best_choice)) {
522  bool valid_permuter = word->best_choice != nullptr &&
525  getDict().getUnicharset(),
526  valid_permuter,
528  }
529 }
530 
531 
532 /**********************************************************************
533  * select_blob_to_split
534  *
535  * These are the results of the last classification. Find a likely
536  * place to apply splits. If none, return -1.
537  **********************************************************************/
539  const GenericVector<BLOB_CHOICE*>& blob_choices,
540  float rating_ceiling, bool split_next_to_fragment) {
541  BLOB_CHOICE *blob_choice;
542  int x;
543  float worst = -FLT_MAX;
544  int worst_index = -1;
545  float worst_near_fragment = -FLT_MAX;
546  int worst_index_near_fragment = -1;
547  const CHAR_FRAGMENT **fragments = nullptr;
548 
549  if (chop_debug) {
550  if (rating_ceiling < FLT_MAX)
551  tprintf("rating_ceiling = %8.4f\n", rating_ceiling);
552  else
553  tprintf("rating_ceiling = No Limit\n");
554  }
555 
556  if (split_next_to_fragment && blob_choices.size() > 0) {
557  fragments = new const CHAR_FRAGMENT *[blob_choices.length()];
558  if (blob_choices[0] != nullptr) {
559  fragments[0] = getDict().getUnicharset().get_fragment(
560  blob_choices[0]->unichar_id());
561  } else {
562  fragments[0] = nullptr;
563  }
564  }
565 
566  for (x = 0; x < blob_choices.size(); ++x) {
567  if (blob_choices[x] == nullptr) {
568  delete[] fragments;
569  return x;
570  } else {
571  blob_choice = blob_choices[x];
572  // Populate fragments for the following position.
573  if (split_next_to_fragment && x+1 < blob_choices.size()) {
574  if (blob_choices[x + 1] != nullptr) {
575  fragments[x + 1] = getDict().getUnicharset().get_fragment(
576  blob_choices[x + 1]->unichar_id());
577  } else {
578  fragments[x + 1] = nullptr;
579  }
580  }
581  if (blob_choice->rating() < rating_ceiling &&
582  blob_choice->certainty() < tessedit_certainty_threshold) {
583  // Update worst and worst_index.
584  if (blob_choice->rating() > worst) {
585  worst_index = x;
586  worst = blob_choice->rating();
587  }
588  if (split_next_to_fragment) {
589  // Update worst_near_fragment and worst_index_near_fragment.
590  bool expand_following_fragment =
591  (x + 1 < blob_choices.size() &&
592  fragments[x+1] != nullptr && !fragments[x+1]->is_beginning());
593  bool expand_preceding_fragment =
594  (x > 0 && fragments[x-1] != nullptr && !fragments[x-1]->is_ending());
595  if ((expand_following_fragment || expand_preceding_fragment) &&
596  blob_choice->rating() > worst_near_fragment) {
597  worst_index_near_fragment = x;
598  worst_near_fragment = blob_choice->rating();
599  if (chop_debug) {
600  tprintf("worst_index_near_fragment=%d"
601  " expand_following_fragment=%d"
602  " expand_preceding_fragment=%d\n",
603  worst_index_near_fragment,
604  expand_following_fragment,
605  expand_preceding_fragment);
606  }
607  }
608  }
609  }
610  }
611  }
612  delete[] fragments;
613  // TODO(daria): maybe a threshold of badness for
614  // worst_near_fragment would be useful.
615  return worst_index_near_fragment != -1 ?
616  worst_index_near_fragment : worst_index;
617 }
618 
619 /**********************************************************************
620  * select_blob_to_split_from_fixpt
621  *
622  * Given the fix point from a dictionary search, if there is a single
623  * dangerous blob that maps to multiple characters, return that blob
624  * index as a place we need to split. If none, return -1.
625  **********************************************************************/
627  if (!fixpt)
628  return -1;
629  for (int i = 0; i < fixpt->size(); i++) {
630  if ((*fixpt)[i].begin + 1 == (*fixpt)[i].end &&
631  (*fixpt)[i].dangerous &&
632  (*fixpt)[i].correct_is_ngram) {
633  return (*fixpt)[i].begin;
634  }
635  }
636  return -1;
637 }
638 
639 } // namespace tesseract
TBOX
Definition: rect.h:34
TWERD::NumBlobs
int NumBlobs() const
Definition: blobs.h:448
rect.h
tesseract::Classify::getDict
virtual Dict & getDict()
Definition: classify.h:107
Red
@ Red
Definition: callcpp.h:30
EDGEPT::prev
EDGEPT * prev
Definition: blobs.h:193
MATRIX_COORD::col
int col
Definition: matrix.h:636
tesseract::Wordrec::chop_debug
int chop_debug
Definition: wordrec.h:204
callcpp.h
BLOB_CHOICE
Definition: ratngs.h:52
MATRIX_COORD
Definition: matrix.h:608
tesseract::Wordrec::UpdateSegSearchNodes
void UpdateSegSearchNodes(float rating_cert_scale, int starting_col, GenericVector< SegSearchPending > *pending, WERD_RES *word_res, LMPainPoints *pain_points, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle)
Definition: segsearch.cpp:180
TWERD
Definition: blobs.h:418
WERD_RES::best_choice
WERD_CHOICE * best_choice
Definition: pageres.h:241
WERD_RES::seam_array
GenericVector< SEAM * > seam_array
Definition: pageres.h:214
EDGEPT
Definition: blobs.h:99
WERD_RES::InsertSeam
void InsertSeam(int blob_number, SEAM *seam)
Definition: pageres.cpp:418
WERD_RES::blamer_bundle
BlamerBundle * blamer_bundle
Definition: pageres.h:252
WERD::flag
bool flag(WERD_FLAGS mask) const
Definition: werd.h:117
tesseract::Wordrec::language_model_
std::unique_ptr< LanguageModel > language_model_
Definition: wordrec.h:471
SEAM::ApplySeam
void ApplySeam(bool italic_blob, TBLOB *blob, TBLOB *other_blob) const
Definition: seam.cpp:118
tesseract::Wordrec::classify_piece
virtual BLOB_CHOICE_LIST * classify_piece(const GenericVector< SEAM * > &seams, int16_t start, int16_t end, const char *description, TWERD *word, BlamerBundle *blamer_bundle)
Definition: pieces.cpp:56
tesseract::BestChoiceBundle
Bundle together all the things pertaining to the best choice/state.
Definition: lm_state.h:222
MATRIX::print
void print(const UNICHARSET &unicharset) const
Definition: matrix.cpp:112
display_blob
void display_blob(TBLOB *blob, C_COL color)
Definition: render.cpp:52
WERD_RES::uch_set
const UNICHARSET * uch_set
Definition: pageres.h:203
tesseract::Wordrec::chop_one_blob
SEAM * chop_one_blob(const GenericVector< TBOX > &boxes, const GenericVector< BLOB_CHOICE * > &blob_choices, WERD_RES *word_res, int *blob_number)
Definition: chopper.cpp:371
TBOX::overlap_fraction
double overlap_fraction(const TBOX &box) const
Definition: rect.h:388
TPOINT::y
int16_t y
Definition: blobs.h:94
wordrec.h
ratngs.h
TBOX::right
int16_t right() const
Definition: rect.h:79
render.h
WERD_RES::FakeWordFromRatings
void FakeWordFromRatings(PermuterType permuter)
Definition: pageres.cpp:898
TESSLINE::loop
EDGEPT * loop
Definition: blobs.h:280
SEAM::ContainedByBlob
bool ContainedByBlob(const TBLOB &blob) const
Definition: seam.h:73
tesseract::Wordrec::wordrec_debug_level
int wordrec_debug_level
Definition: wordrec.h:226
tesseract::Classify::allow_blob_division
bool allow_blob_division
Definition: classify.h:423
tesseract::Wordrec::pick_good_seam
SEAM * pick_good_seam(TBLOB *blob)
Definition: findseam.cpp:217
tesseract
Definition: altorenderer.cpp:25
BLOB_CHOICE::rating
float rating() const
Definition: ratngs.h:80
GenericVector::remove
void remove(int index)
Definition: genericvector.h:803
tesseract::Wordrec::repair_unchopped_blobs
int repair_unchopped_blobs
Definition: wordrec.h:202
MATRIX_COORD::row
int row
Definition: matrix.h:637
TBOX::almost_equal
bool almost_equal(const TBOX &box, int tolerance) const
Definition: rect.cpp:258
TWERD::blobs
GenericVector< TBLOB * > blobs
Definition: blobs.h:459
TPOINT
Definition: blobs.h:51
GenericVector
Definition: baseapi.h:37
tesseract::Wordrec::wordrec_debug_blamer
bool wordrec_debug_blamer
Definition: wordrec.h:231
tesseract::Wordrec::chop_overlapping_blob
SEAM * chop_overlapping_blob(const GenericVector< TBOX > &boxes, bool italic_blob, WERD_RES *word_res, int *blob_number)
Definition: chopper.cpp:273
TBOX::left
int16_t left() const
Definition: rect.h:72
SEAM::PrepareToInsertSeam
bool PrepareToInsertSeam(const GenericVector< SEAM * > &seams, const GenericVector< TBLOB * > &blobs, int insert_index, bool modify)
Definition: seam.cpp:76
EDGEPT::flags
char flags[EDGEPTFLAGS]
Definition: blobs.h:191
tprintf
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
tesseract::LMPainPoints::RemapForSplit
void RemapForSplit(int index)
Definition: lm_pain_points.cpp:211
GenericVector::clear
void clear()
Definition: genericvector.h:895
TESSLINE::next
TESSLINE * next
Definition: blobs.h:281
GenericVector::insert
void insert(const T &t, int index)
Definition: genericvector.h:788
matrix.h
WERD_RES::best_choices
WERD_CHOICE_LIST best_choices
Definition: pageres.h:249
DENORM::DenormTransform
void DenormTransform(const DENORM *last_denorm, const TPOINT &pt, TPOINT *original) const
Definition: normalis.cpp:390
TESSLINE::start
TPOINT start
Definition: blobs.h:278
BlamerBundle
Definition: blamer.h:102
TBOX::top
int16_t top() const
Definition: rect.h:58
CHAR_FRAGMENT::is_ending
bool is_ending() const
Definition: unicharset.h:108
tesseract::SegSearchPending
Definition: wordrec.h:112
split.h
SEAM::UndoSeam
void UndoSeam(TBLOB *blob, TBLOB *other_blob) const
Definition: seam.cpp:134
tesseract::Wordrec::SegSearch
void SegSearch(WERD_RES *word_res, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle)
Definition: segsearch.cpp:42
tesseract::Wordrec::chop_word_main
void chop_word_main(WERD_RES *word)
Definition: chopper.cpp:391
GenericVector::length
int length() const
Definition: genericvector.h:86
tesseract::LMPainPoints
Definition: lm_pain_points.h:57
GENERIC_2D_ARRAY::put
void put(ICOORD pos, const T &thing)
Definition: matrix.h:223
tesseract::BestChoiceBundle::beam
PointerVector< LanguageModelState > beam
Definition: lm_state.h:238
tprintf.h
blobs.h
tesseract::Dict::valid_word_permuter
static bool valid_word_permuter(uint8_t perm, bool numbers_ok)
Check all the DAWGs to see if this word is in any of them.
Definition: dict.h:474
SEAM::Print
void Print(const char *label) const
Definition: seam.cpp:154
tesseract::Wordrec::chop_numbered_blob
SEAM * chop_numbered_blob(TWERD *word, int32_t blob_number, bool italic_blob, const GenericVector< SEAM * > &seams)
Definition: chopper.cpp:265
tesseract::BestChoiceBundle::fixpt
DANGERR fixpt
Places to try to fix the word suggested by ambiguity checking.
Definition: lm_state.h:234
WERD_RES
Definition: pageres.h:166
TPOINT::x
int16_t x
Definition: blobs.h:93
SEAM::SharesPosition
bool SharesPosition(const SEAM &other) const
Definition: seam.h:89
BandTriMatrix::dimension
int dimension() const
Definition: matrix.h:536
tesseract::Dict::reset_hyphen_vars
void reset_hyphen_vars(bool last_word_on_line)
Definition: hyphen.cpp:28
stopper.h
divisible_blob
bool divisible_blob(TBLOB *blob, bool italic_blob, TPOINT *location)
Definition: blobs.cpp:913
EDGEPT::pos
TPOINT pos
Definition: blobs.h:186
TOP_CHOICE_PERM
@ TOP_CHOICE_PERM
Definition: ratngs.h:235
EDGEPT::next
EDGEPT * next
Definition: blobs.h:192
tesseract::Wordrec::improve_one_blob
SEAM * improve_one_blob(const GenericVector< BLOB_CHOICE * > &blob_choices, DANGERR *fixpt, bool split_next_to_fragment, bool italic_blob, WERD_RES *word, int *blob_number)
Definition: chopper.cpp:327
WERD_RES::ratings
MATRIX * ratings
Definition: pageres.h:237
UNICHARSET::get_fragment
const CHAR_FRAGMENT * get_fragment(UNICHAR_ID unichar_id) const
Definition: unicharset.h:734
SEAM::Finalize
void Finalize()
Definition: seam.h:110
MATRIX
Definition: matrix.h:578
GenericVector::size
int size() const
Definition: genericvector.h:72
TBLOB
Definition: blobs.h:284
WERD_RES::FilterWordChoices
void FilterWordChoices(int debug_level)
Definition: pageres.cpp:513
tesseract::Wordrec::attempt_blob_chop
SEAM * attempt_blob_chop(TWERD *word, TBLOB *blob, int32_t blob_number, bool italic_blob, const GenericVector< SEAM * > &seams)
Definition: chopper.cpp:211
tesseract::Wordrec::CallFillLattice
void CallFillLattice(const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)
Definition: wordrec.h:259
WERD_RES::chopped_word
TWERD * chopped_word
Definition: pageres.h:212
tesseract::Wordrec::tessedit_certainty_threshold
double tessedit_certainty_threshold
Definition: wordrec.h:203
TBOX::contains
bool contains(const FCOORD pt) const
Definition: rect.h:333
tesseract::LanguageModelState
Struct to store information maintained by various language model components.
Definition: lm_state.h:200
remove_edgept
void remove_edgept(EDGEPT *point)
Definition: split.cpp:200
lm_state.h
BlamerBundle::incorrect_result_reason
IncorrectResultReason incorrect_result_reason() const
Definition: blamer.h:120
tesseract::Wordrec::ProcessSegSearchPainPoint
void ProcessSegSearchPainPoint(float pain_point_priority, const MATRIX_COORD &pain_point, const char *pain_point_type, GenericVector< SegSearchPending > *pending, WERD_RES *word_res, LMPainPoints *pain_points, BlamerBundle *blamer_bundle)
Definition: segsearch.cpp:248
BlamerBundle::ChoiceIsCorrect
bool ChoiceIsCorrect(const WERD_CHOICE *word_choice) const
Definition: blamer.cpp:119
tesseract::Classify::prioritize_division
bool prioritize_division
Definition: classify.h:428
tesseract::Wordrec::select_blob_to_split_from_fixpt
int select_blob_to_split_from_fixpt(DANGERR *fixpt)
Definition: chopper.cpp:626
TBLOB::bounding_box
TBOX bounding_box() const
Definition: blobs.cpp:468
blamer.h
dict.h
WERD_CHOICE::permuter
uint8_t permuter() const
Definition: ratngs.h:336
WERD_RES::word
WERD * word
Definition: pageres.h:186
tesseract::Wordrec::improve_by_chopping
void improve_by_chopping(float rating_cert_scale, WERD_RES *word, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle, LMPainPoints *pain_points, GenericVector< SegSearchPending > *pending)
Definition: chopper.cpp:454
tesseract::Wordrec::select_blob_to_split
int select_blob_to_split(const GenericVector< BLOB_CHOICE * > &blob_choices, float rating_ceiling, bool split_next_to_fragment)
Definition: chopper.cpp:538
W_EOL
@ W_EOL
end of line
Definition: werd.h:49
WERD_RES::denorm
DENORM denorm
Definition: pageres.h:201
IRR_CORRECT
@ IRR_CORRECT
Definition: blamer.h:53
BLOB_CHOICE::certainty
float certainty() const
Definition: ratngs.h:83
GENERIC_2D_ARRAY::get
T get(ICOORD pos) const
Definition: matrix.h:231
tesseract::Wordrec::ResetNGramSearch
void ResetNGramSearch(WERD_RES *word_res, BestChoiceBundle *best_choice_bundle, GenericVector< SegSearchPending > *pending)
Definition: segsearch.cpp:311
CHAR_FRAGMENT
Definition: unicharset.h:48
tesseract::Dict::getUnicharset
const UNICHARSET & getUnicharset() const
Definition: dict.h:101
lm_pain_points.h
params.h
TBLOB::outlines
TESSLINE * outlines
Definition: blobs.h:400
BlamerBundle::BlameClassifierOrLangModel
void BlameClassifierOrLangModel(const WERD_RES *word, const UNICHARSET &unicharset, bool valid_permuter, bool debug)
Definition: blamer.cpp:377
normalis.h
WERD_RES::RebuildBestState
void RebuildBestState()
Definition: pageres.cpp:808
pageres.h
tesseract::Wordrec::wordrec_max_join_chunks
int wordrec_max_join_chunks
Definition: wordrec.h:228
TBOX::bottom
int16_t bottom() const
Definition: rect.h:65
TBLOB::ShallowCopy
static TBLOB * ShallowCopy(const TBLOB &src)
Definition: blobs.cpp:335
TESSLINE
Definition: blobs.h:203
SEAM
Definition: seam.h:38
CHAR_FRAGMENT::is_beginning
bool is_beginning() const
Definition: unicharset.h:105
seam.h
GenericVector::push_back
int push_back(T object)
Definition: genericvector.h:837