Go to the documentation of this file.
39 SegSearch(word_res, &best_choice_bundle,
nullptr);
59 blamer_bundle, &pain_points, &pending);
63 if (blamer_bundle !=
nullptr &&
71 float pain_point_priority;
72 int num_futile_classifications = 0;
76 (blamer_bundle !=
nullptr &&
79 bool found_nothing =
true;
81 while ((pp_type = pain_points.
Deque(&pain_point, &pain_point_priority)) !=
85 pain_point.
row - pain_point.
col + 1);
90 found_nothing =
false;
100 &pending, word_res, &pain_points, blamer_bundle);
103 word_res, &pain_points, best_choice_bundle,
105 if (!best_choice_bundle->
updated) ++num_futile_classifications;
108 tprintf(
"num_futile_classifications %d\n", num_futile_classifications);
111 best_choice_bundle->
updated =
false;
116 blamer_bundle !=
nullptr &&
122 if (blamer_bundle !=
nullptr) {
128 tprintf(
"Done with SegSearch (AcceptableChoiceFound: %d)\n",
141 tprintf(
"Starting SegSearch on ratings matrix%s:\n",
160 if (blamer_bundle !=
nullptr) {
175 (*pending)[0].SetColumnClassified();
177 pain_points, best_choice_bundle, blamer_bundle);
181 float rating_cert_scale,
191 for (
int col = starting_col; col < ratings->
dimension(); ++col) {
192 if (!(*pending)[col].WorkToDo())
continue;
194 int last_row = std::min(ratings->
dimension() - 1,
196 if ((*pending)[col].SingleRow() >= 0) {
197 first_row = last_row = (*pending)[col].SingleRow();
200 tprintf(
"\n\nUpdateSegSearchNodes: col=%d, rows=[%d,%d], alljust=%d\n",
201 col, first_row, last_row,
202 (*pending)[col].IsRowJustClassified(INT32_MAX));
205 for (
int row = first_row; row <= last_row; ++row) {
207 BLOB_CHOICE_LIST *current_node = ratings->
get(col, row);
209 col == 0 ? nullptr : best_choice_bundle->
beam[col - 1];
210 if (current_node !=
nullptr &&
212 col, row, current_node, parent_node,
213 pain_points, word_res,
214 best_choice_bundle, blamer_bundle) &&
218 (*pending)[row + 1].RevisitWholeColumn();
220 tprintf(
"Added child col=%d to pending\n", row + 1);
225 if (best_choice_bundle->
best_vse !=
nullptr) {
229 best_choice_bundle->
best_vse, word_res);
232 best_choice_bundle->
best_vse, word_res);
238 for (
int col = 0; col < pending->
size(); ++col) {
239 (*pending)[col].Clear();
241 vse_it(&best_choice_bundle->
beam[col]->viterbi_state_entries);
242 for (vse_it.mark_cycle_pt(); !vse_it.cycled_list(); vse_it.forward()) {
243 vse_it.data()->updated =
false;
249 float pain_point_priority,
250 const MATRIX_COORD &pain_point,
const char* pain_point_type,
254 tprintf(
"Classifying pain point %s priority=%.4f, col=%d, row=%d\n",
255 pain_point_type, pain_point_priority,
256 pain_point.
col, pain_point.
row);
261 if (!pain_point.
Valid(*ratings)) {
266 pain_point.
col, pain_point.
row,
270 BLOB_CHOICE_LIST *lst = ratings->
get(pain_point.
col, pain_point.
row);
271 if (lst ==
nullptr) {
272 ratings->
put(pain_point.
col, pain_point.
row, classified);
278 BLOB_CHOICE_IT it(lst);
279 it.add_list_before(classified);
281 classified =
nullptr;
286 ratings->
get(pain_point.
col, pain_point.
row),
293 if (classified !=
nullptr && !classified->empty()) {
294 if (pain_point.
col > 0) {
305 (*pending)[pain_point.
col].SetBlobClassified(pain_point.
row);
316 for (
int col = 0; col < best_choice_bundle->
beam.size(); ++col) {
317 best_choice_bundle->
beam[col]->Clear();
321 best_choice_bundle->
best_vse =
nullptr;
323 (*pending)[0].SetColumnClassified();
324 for (
int i = 1; i < pending->
size(); ++i)
325 (*pending)[i].Clear();
332 pain_points->
Clear();
338 blamer_debug, pp_cb);
bool GeneratePainPoint(int col, int row, LMPainPointsType pp_type, float special_priority, bool ok_to_extend, float max_char_wh_ratio, WERD_RES *word_res)
void init_to_size(int size, const T &t)
WERD_CHOICE * prev_word_best_choice_
void FinishSegSearch(const WERD_CHOICE *best_choice, bool debug, STRING *debug_str)
bool updated
set to true if the entry has just been created/updated
bool SegSearchDone(int num_futile_classifications)
void UpdateSegSearchNodes(float rating_cert_scale, int starting_col, GenericVector< SegSearchPending > *pending, WERD_RES *word_res, LMPainPoints *pain_points, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle)
void GenerateFromPath(float rating_cert_scale, ViterbiStateEntry *vse, WERD_RES *word_res)
WERD_CHOICE * best_choice
void SetupCorrectSegmentation(const TWERD *word, bool debug)
GenericVector< SEAM * > seam_array
std::unique_ptr< LanguageModel > language_model_
virtual BLOB_CHOICE_LIST * classify_piece(const GenericVector< SEAM * > &seams, int16_t start, int16_t end, const char *description, TWERD *word, BlamerBundle *blamer_bundle)
Bundle together all the things pertaining to the best choice/state.
void print(const UNICHARSET &unicharset) const
bool Valid(const MATRIX &m) const
static void PrintSeams(const char *label, const GenericVector< SEAM * > &seams)
void InitialSegSearch(WERD_RES *word_res, LMPainPoints *pain_points, GenericVector< SegSearchPending > *pending, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle)
ViterbiStateEntry * best_vse
Best ViterbiStateEntry and BLOB_CHOICE.
bool GuidedSegsearchStillGoing() const
bool GenerateForBlamer(double max_char_wh_ratio, WERD_RES *word_res, int col, int row)
bool assume_fixed_pitch_char_segment
static const char * PainPointDescription(LMPainPointsType type)
bool wordrec_debug_blamer
void IncreaseBandSize(int bandwidth)
int segsearch_debug_level
DLLSYM void tprintf(const char *format,...)
bool Classified(int col, int row, int wildcard_id) const
void GenerateFromAmbigs(const DANGERR &fixpt, ViterbiStateEntry *vse, WERD_RES *word_res)
_ConstTessMemberResultCallback_5_0< false, R, T1, P1, P2, P3, P4, P5 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)(P1, P2, P3, P4, P5) const, typename Identity< P1 >::type p1, typename Identity< P2 >::type p2, typename Identity< P3 >::type p3, typename Identity< P4 >::type p4, typename Identity< P5 >::type p5)
void SegSearch(WERD_RES *word_res, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle)
bool wordrec_enable_assoc
void put(ICOORD pos, const T &thing)
PointerVector< LanguageModelState > beam
void SetChopperBlame(const WERD_RES *word, bool debug)
DANGERR fixpt
Places to try to fix the word suggested by ambiguity checking.
int segsearch_max_pain_points
void DoSegSearch(WERD_RES *word_res)
void InitForSegSearch(const WERD_CHOICE *best_choice, MATRIX *ratings, UNICHAR_ID wildcard_id, bool debug, STRING *debug_str, TessResultCallback2< bool, int, int > *pp_cb)
Struct to store information maintained by various language model components.
void print_ratings_list(const char *msg, BLOB_CHOICE_LIST *ratings, const UNICHARSET ¤t_unicharset)
LMPainPointsType Deque(MATRIX_COORD *pp, float *priority)
double segsearch_max_char_wh_ratio
void ProcessSegSearchPainPoint(float pain_point_priority, const MATRIX_COORD &pain_point, const char *pain_point_type, GenericVector< SegSearchPending > *pending, WERD_RES *word_res, LMPainPoints *pain_points, BlamerBundle *blamer_bundle)
void GenerateInitial(WERD_RES *word_res)
bool ChoiceIsCorrect(const WERD_CHOICE *word_choice) const
bool updated
Flag to indicate whether anything was changed.
bool GuidedSegsearchNeeded(const WERD_CHOICE *best_choice) const
void improve_by_chopping(float rating_cert_scale, WERD_RES *word, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle, LMPainPoints *pain_points, GenericVector< SegSearchPending > *pending)
void InitBlamerForSegSearch(WERD_RES *word_res, LMPainPoints *pain_points, BlamerBundle *blamer_bundle, STRING *blamer_debug)
void ResetNGramSearch(WERD_RES *word_res, BestChoiceBundle *best_choice_bundle, GenericVector< SegSearchPending > *pending)