47 #include "config_auto.h"
52 static const int kMaxNumChunks = 64;
63 static int check_blob(
TBLOB *blob) {
67 for (outline = blob->
outlines; outline !=
nullptr; outline = outline->
next) {
68 edgept = outline->
loop;
70 if (edgept ==
nullptr)
72 edgept = edgept->
next;
74 while (edgept != outline->
loop);
75 if (edgept ==
nullptr)
91 for (index = 0; index <
length; index++)
101 static void preserve_outline(
EDGEPT *start) {
104 if (start ==
nullptr)
111 while (srcpt != start);
115 static void preserve_outline_tree(
TESSLINE *srcline) {
118 for (outline = srcline; outline !=
nullptr; outline = outline->
next) {
119 preserve_outline (outline->
loop);
132 if (start ==
nullptr)
136 if (srcpt->
flags[1] == 2)
140 while (srcpt != start);
148 while (srcpt != real_start);
152 static void restore_outline_tree(
TESSLINE *srcline) {
155 for (outline = srcline; outline !=
nullptr; outline = outline->
next) {
156 outline->
loop = restore_outline (outline->
loop);
167 static int16_t total_containment(
TBLOB *blob1,
TBLOB *blob2) {
175 static SEAM* CheckSeam(
int debug_level, int32_t blob_number,
TWERD* word,
178 if (seam ==
nullptr || blob->
outlines ==
nullptr || other_blob->
outlines ==
nullptr ||
179 total_containment(blob, other_blob) || check_blob(other_blob) ||
181 any_shared_split_points(seams, seam) ||
188 #ifndef GRAPHICS_DISABLED
192 tprintf(
"\n** seam being removed ** \n");
215 preserve_outline_tree (blob->
outlines);
220 SEAM *seam =
nullptr;
224 seam =
new SEAM(0.0f, location);
231 seam->
Print(
"Good seam picked=");
233 tprintf(
"\n** no seam picked *** \n");
236 seam->
ApplySeam(italic_blob, blob, other_blob);
239 seam = CheckSeam(
chop_debug, blob_number, word, blob, other_blob,
241 if (seam ==
nullptr) {
243 restore_outline_tree(blob->
outlines);
250 seam =
new SEAM(0.0f, location);
251 seam->
ApplySeam(italic_blob, blob, other_blob);
252 seam = CheckSeam(
chop_debug, blob_number, word, blob, other_blob,
257 if (seam !=
nullptr) {
274 bool italic_blob,
WERD_RES *word_res,
277 for (*blob_number = 0; *blob_number < word->
NumBlobs(); ++*blob_number) {
285 TPOINT original_topleft, original_botright;
289 TBOX original_box =
TBOX(original_topleft.
x, original_botright.
y,
290 original_botright.
x, original_topleft.
y);
292 bool almost_equal_box =
false;
294 for (
int i = 0; i < boxes.
size(); i++) {
298 almost_equal_box =
true;
303 (!almost_equal_box && num_overlap > 1)) {
329 bool split_next_to_fragment,
333 float rating_ceiling = FLT_MAX;
334 SEAM *seam =
nullptr;
338 bool split_point_from_dict = (*blob_number != -1);
339 if (split_point_from_dict) {
343 split_next_to_fragment);
346 if (*blob_number == -1)
354 if (blob_choices[*blob_number] ==
nullptr)
356 if (!split_point_from_dict) {
358 rating_ceiling = blob_choices[*blob_number]->rating();
393 if (word->
ratings ==
nullptr) {
398 for (
int b = 0; b < num_blobs; ++b) {
408 row < col + word->ratings->bandwidth(); ++row) {
409 BLOB_CHOICE_LIST* choices = word->
ratings->
get(col, row);
410 if (choices !=
nullptr) {
411 BLOB_CHOICE_IT bc_it(choices);
412 for (bc_it.mark_cycle_pt(); !bc_it.cycled_list(); bc_it.forward()) {
413 bc_it.data()->set_matrix_cell(col, row);
436 if (word->
blamer_bundle !=
nullptr && this->fill_lattice_ !=
nullptr) {
441 tprintf(
"Final Ratings Matrix:\n");
466 for (
int i = 0; i < num_blobs; ++i) {
467 BLOB_CHOICE_LIST* choices = word->
ratings->
get(i, i);
468 if (choices ==
nullptr || choices->empty()) {
471 BLOB_CHOICE_IT bc_it(choices);
476 false,
false, word, &blob_number);
477 if (seam ==
nullptr)
break;
497 pain_points, blamer_bundle);
498 pain_point.
col = blob_number + 1;
499 pain_point.
row = blob_number + 1;
501 pain_points, blamer_bundle);
510 word, pain_points, best_choice_bundle, blamer_bundle);
522 bool valid_permuter = word->
best_choice !=
nullptr &&
540 float rating_ceiling,
bool split_next_to_fragment) {
543 float worst = -FLT_MAX;
544 int worst_index = -1;
545 float worst_near_fragment = -FLT_MAX;
546 int worst_index_near_fragment = -1;
550 if (rating_ceiling < FLT_MAX)
551 tprintf(
"rating_ceiling = %8.4f\n", rating_ceiling);
553 tprintf(
"rating_ceiling = No Limit\n");
556 if (split_next_to_fragment && blob_choices.
size() > 0) {
558 if (blob_choices[0] !=
nullptr) {
560 blob_choices[0]->unichar_id());
562 fragments[0] =
nullptr;
566 for (x = 0; x < blob_choices.
size(); ++x) {
567 if (blob_choices[x] ==
nullptr) {
571 blob_choice = blob_choices[x];
573 if (split_next_to_fragment && x+1 < blob_choices.
size()) {
574 if (blob_choices[x + 1] !=
nullptr) {
576 blob_choices[x + 1]->unichar_id());
578 fragments[x + 1] =
nullptr;
581 if (blob_choice->
rating() < rating_ceiling &&
584 if (blob_choice->
rating() > worst) {
586 worst = blob_choice->
rating();
588 if (split_next_to_fragment) {
590 bool expand_following_fragment =
591 (x + 1 < blob_choices.
size() &&
592 fragments[x+1] !=
nullptr && !fragments[x+1]->
is_beginning());
593 bool expand_preceding_fragment =
594 (x > 0 && fragments[x-1] !=
nullptr && !fragments[x-1]->
is_ending());
595 if ((expand_following_fragment || expand_preceding_fragment) &&
596 blob_choice->
rating() > worst_near_fragment) {
597 worst_index_near_fragment = x;
598 worst_near_fragment = blob_choice->
rating();
600 tprintf(
"worst_index_near_fragment=%d"
601 " expand_following_fragment=%d"
602 " expand_preceding_fragment=%d\n",
603 worst_index_near_fragment,
604 expand_following_fragment,
605 expand_preceding_fragment);
615 return worst_index_near_fragment != -1 ?
616 worst_index_near_fragment : worst_index;
629 for (
int i = 0; i < fixpt->
size(); i++) {
630 if ((*fixpt)[i].begin + 1 == (*fixpt)[i].end &&
631 (*fixpt)[i].dangerous &&
632 (*fixpt)[i].correct_is_ngram) {
633 return (*fixpt)[i].begin;
bool divisible_blob(TBLOB *blob, bool italic_blob, TPOINT *location)
void remove_edgept(EDGEPT *point)
DLLSYM void tprintf(const char *format,...)
void display_blob(TBLOB *blob, C_COL color)
void insert(const T &t, int index)
void put(ICOORD pos, const T &thing)
bool ChoiceIsCorrect(const WERD_CHOICE *word_choice) const
IncorrectResultReason incorrect_result_reason() const
void BlameClassifierOrLangModel(const WERD_RES *word, const UNICHARSET &unicharset, bool valid_permuter, bool debug)
TBOX bounding_box() const
static TBLOB * ShallowCopy(const TBLOB &src)
GenericVector< TBLOB * > blobs
void print(const UNICHARSET &unicharset) const
void DenormTransform(const DENORM *last_denorm, const TPOINT &pt, TPOINT *original) const
const UNICHARSET * uch_set
WERD_CHOICE_LIST best_choices
BlamerBundle * blamer_bundle
GenericVector< SEAM * > seam_array
WERD_CHOICE * best_choice
void FakeWordFromRatings(PermuterType permuter)
void FilterWordChoices(int debug_level)
void InsertSeam(int blob_number, SEAM *seam)
bool almost_equal(const TBOX &box, int tolerance) const
double overlap_fraction(const TBOX &box) const
bool contains(const FCOORD pt) const
bool SharesPosition(const SEAM &other) const
void UndoSeam(TBLOB *blob, TBLOB *other_blob) const
bool PrepareToInsertSeam(const GenericVector< SEAM * > &seams, const GenericVector< TBLOB * > &blobs, int insert_index, bool modify)
void ApplySeam(bool italic_blob, TBLOB *blob, TBLOB *other_blob) const
bool ContainedByBlob(const TBLOB &blob) const
void Print(const char *label) const
bool flag(WERD_FLAGS mask) const
bool is_beginning() const
const CHAR_FRAGMENT * get_fragment(UNICHAR_ID unichar_id) const
const UNICHARSET & getUnicharset() const
static bool valid_word_permuter(uint8_t perm, bool numbers_ok)
Check all the DAWGs to see if this word is in any of them.
void reset_hyphen_vars(bool last_word_on_line)
void RemapForSplit(int index)
Struct to store information maintained by various language model components.
Bundle together all the things pertaining to the best choice/state.
DANGERR fixpt
Places to try to fix the word suggested by ambiguity checking.
PointerVector< LanguageModelState > beam
void ResetNGramSearch(WERD_RES *word_res, BestChoiceBundle *best_choice_bundle, GenericVector< SegSearchPending > *pending)
SEAM * chop_one_blob(const GenericVector< TBOX > &boxes, const GenericVector< BLOB_CHOICE * > &blob_choices, WERD_RES *word_res, int *blob_number)
int select_blob_to_split(const GenericVector< BLOB_CHOICE * > &blob_choices, float rating_ceiling, bool split_next_to_fragment)
void UpdateSegSearchNodes(float rating_cert_scale, int starting_col, GenericVector< SegSearchPending > *pending, WERD_RES *word_res, LMPainPoints *pain_points, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle)
void CallFillLattice(const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)
SEAM * chop_numbered_blob(TWERD *word, int32_t blob_number, bool italic_blob, const GenericVector< SEAM * > &seams)
void improve_by_chopping(float rating_cert_scale, WERD_RES *word, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle, LMPainPoints *pain_points, GenericVector< SegSearchPending > *pending)
SEAM * chop_overlapping_blob(const GenericVector< TBOX > &boxes, bool italic_blob, WERD_RES *word_res, int *blob_number)
int repair_unchopped_blobs
void chop_word_main(WERD_RES *word)
double tessedit_certainty_threshold
SEAM * improve_one_blob(const GenericVector< BLOB_CHOICE * > &blob_choices, DANGERR *fixpt, bool split_next_to_fragment, bool italic_blob, WERD_RES *word, int *blob_number)
virtual BLOB_CHOICE_LIST * classify_piece(const GenericVector< SEAM * > &seams, int16_t start, int16_t end, const char *description, TWERD *word, BlamerBundle *blamer_bundle)
bool wordrec_debug_blamer
int wordrec_max_join_chunks
int select_blob_to_split_from_fixpt(DANGERR *fixpt)
SEAM * attempt_blob_chop(TWERD *word, TBLOB *blob, int32_t blob_number, bool italic_blob, const GenericVector< SEAM * > &seams)
SEAM * pick_good_seam(TBLOB *blob)
void SegSearch(WERD_RES *word_res, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle)
std::unique_ptr< LanguageModel > language_model_
void ProcessSegSearchPainPoint(float pain_point_priority, const MATRIX_COORD &pain_point, const char *pain_point_type, GenericVector< SegSearchPending > *pending, WERD_RES *word_res, LMPainPoints *pain_points, BlamerBundle *blamer_bundle)