tesseract  4.1.1
tesseract::RecodeBeamSearch Class Reference

#include <recodebeam.h>

Public Member Functions

 RecodeBeamSearch (const UnicharCompress &recoder, int null_char, bool simple_text, Dict *dict)
 
void Decode (const NetworkIO &output, double dict_ratio, double cert_offset, double worst_dict_cert, const UNICHARSET *charset, int lstm_choice_mode=0)
 
void Decode (const GENERIC_2D_ARRAY< float > &output, double dict_ratio, double cert_offset, double worst_dict_cert, const UNICHARSET *charset)
 
void ExtractBestPathAsLabels (GenericVector< int > *labels, GenericVector< int > *xcoords) const
 
void ExtractBestPathAsUnicharIds (bool debug, const UNICHARSET *unicharset, GenericVector< int > *unichar_ids, GenericVector< float > *certs, GenericVector< float > *ratings, GenericVector< int > *xcoords) const
 
void ExtractBestPathAsWords (const TBOX &line_box, float scale_factor, bool debug, const UNICHARSET *unicharset, PointerVector< WERD_RES > *words, int lstm_choice_mode=0)
 
void DebugBeams (const UNICHARSET &unicharset) const
 

Static Public Member Functions

static int LengthFromBeamsIndex (int index)
 
static NodeContinuation ContinuationFromBeamsIndex (int index)
 
static bool IsDawgFromBeamsIndex (int index)
 
static int BeamIndex (bool is_dawg, NodeContinuation cont, int length)
 

Public Attributes

std::vector< std::vector< std::pair< const char *, float > > > timesteps
 

Static Public Attributes

static constexpr float kMinCertainty = -20.0f
 
static const int kNumLengths = RecodedCharID::kMaxCodeLen + 1
 
static const int kNumBeams = 2 * NC_COUNT * kNumLengths
 

Detailed Description

Definition at line 179 of file recodebeam.h.

Constructor & Destructor Documentation

◆ RecodeBeamSearch()

tesseract::RecodeBeamSearch::RecodeBeamSearch ( const UnicharCompress recoder,
int  null_char,
bool  simple_text,
Dict dict 
)

Definition at line 62 of file recodebeam.cpp.

64  : recoder_(recoder),
65  beam_size_(0),
66  top_code_(-1),
67  second_code_(-1),
68  dict_(dict),
69  space_delimited_(true),
70  is_simple_text_(simple_text),
71  null_char_(null_char) {
72  if (dict_ != nullptr && !dict_->IsSpaceDelimitedLang()) space_delimited_ = false;
73 }

Member Function Documentation

◆ BeamIndex()

static int tesseract::RecodeBeamSearch::BeamIndex ( bool  is_dawg,
NodeContinuation  cont,
int  length 
)
inlinestatic

Definition at line 237 of file recodebeam.h.

237  {
238  return (is_dawg * NC_COUNT + cont) * kNumLengths + length;
239  }

◆ ContinuationFromBeamsIndex()

static NodeContinuation tesseract::RecodeBeamSearch::ContinuationFromBeamsIndex ( int  index)
inlinestatic

Definition at line 230 of file recodebeam.h.

230  {
231  return static_cast<NodeContinuation>((index / kNumLengths) % NC_COUNT);
232  }

◆ DebugBeams()

void tesseract::RecodeBeamSearch::DebugBeams ( const UNICHARSET unicharset) const

Definition at line 303 of file recodebeam.cpp.

303  {
304  for (int p = 0; p < beam_size_; ++p) {
305  for (int d = 0; d < 2; ++d) {
306  for (int c = 0; c < NC_COUNT; ++c) {
307  auto cont = static_cast<NodeContinuation>(c);
308  int index = BeamIndex(d, cont, 0);
309  if (beam_[p]->beams_[index].empty()) continue;
310  // Print all the best scoring nodes for each unichar found.
311  tprintf("Position %d: %s+%s beam\n", p, d ? "Dict" : "Non-Dict",
312  kNodeContNames[c]);
313  DebugBeamPos(unicharset, beam_[p]->beams_[index]);
314  }
315  }
316  }
317 }

◆ Decode() [1/2]

void tesseract::RecodeBeamSearch::Decode ( const GENERIC_2D_ARRAY< float > &  output,
double  dict_ratio,
double  cert_offset,
double  worst_dict_cert,
const UNICHARSET charset 
)

Definition at line 92 of file recodebeam.cpp.

95  {
96  beam_size_ = 0;
97  int width = output.dim1();
98  for (int t = 0; t < width; ++t) {
99  ComputeTopN(output[t], output.dim2(), kBeamWidths[0]);
100  DecodeStep(output[t], t, dict_ratio, cert_offset, worst_dict_cert, charset);
101  }
102 }

◆ Decode() [2/2]

void tesseract::RecodeBeamSearch::Decode ( const NetworkIO output,
double  dict_ratio,
double  cert_offset,
double  worst_dict_cert,
const UNICHARSET charset,
int  lstm_choice_mode = 0 
)

Definition at line 76 of file recodebeam.cpp.

78  {
79  beam_size_ = 0;
80  int width = output.Width();
81  if (lstm_choice_mode)
82  timesteps.clear();
83  for (int t = 0; t < width; ++t) {
84  ComputeTopN(output.f(t), output.NumFeatures(), kBeamWidths[0]);
85  DecodeStep(output.f(t), t, dict_ratio, cert_offset, worst_dict_cert,
86  charset);
87  if (lstm_choice_mode) {
88  SaveMostCertainChoices(output.f(t), output.NumFeatures(), charset, t);
89  }
90  }
91 }

◆ ExtractBestPathAsLabels()

void tesseract::RecodeBeamSearch::ExtractBestPathAsLabels ( GenericVector< int > *  labels,
GenericVector< int > *  xcoords 
) const

Definition at line 133 of file recodebeam.cpp.

134  {
135  labels->truncate(0);
136  xcoords->truncate(0);
138  ExtractBestPaths(&best_nodes, nullptr);
139  // Now just run CTC on the best nodes.
140  int t = 0;
141  int width = best_nodes.size();
142  while (t < width) {
143  int label = best_nodes[t]->code;
144  if (label != null_char_) {
145  labels->push_back(label);
146  xcoords->push_back(t);
147  }
148  while (++t < width && !is_simple_text_ && best_nodes[t]->code == label) {
149  }
150  }
151  xcoords->push_back(width);
152 }

◆ ExtractBestPathAsUnicharIds()

void tesseract::RecodeBeamSearch::ExtractBestPathAsUnicharIds ( bool  debug,
const UNICHARSET unicharset,
GenericVector< int > *  unichar_ids,
GenericVector< float > *  certs,
GenericVector< float > *  ratings,
GenericVector< int > *  xcoords 
) const

Definition at line 156 of file recodebeam.cpp.

159  {
161  ExtractBestPaths(&best_nodes, nullptr);
162  ExtractPathAsUnicharIds(best_nodes, unichar_ids, certs, ratings, xcoords);
163  if (debug) {
164  DebugPath(unicharset, best_nodes);
165  DebugUnicharPath(unicharset, best_nodes, *unichar_ids, *certs, *ratings,
166  *xcoords);
167  }
168 }

◆ ExtractBestPathAsWords()

void tesseract::RecodeBeamSearch::ExtractBestPathAsWords ( const TBOX line_box,
float  scale_factor,
bool  debug,
const UNICHARSET unicharset,
PointerVector< WERD_RES > *  words,
int  lstm_choice_mode = 0 
)

Definition at line 171 of file recodebeam.cpp.

175  {
176  words->truncate(0);
177  GenericVector<int> unichar_ids;
178  GenericVector<float> certs;
179  GenericVector<float> ratings;
180  GenericVector<int> xcoords;
183  std::deque<std::tuple<int, int>> best_choices;
184  ExtractBestPaths(&best_nodes, &second_nodes);
185  if (debug) {
186  DebugPath(unicharset, best_nodes);
187  ExtractPathAsUnicharIds(second_nodes, &unichar_ids, &certs, &ratings,
188  &xcoords);
189  tprintf("\nSecond choice path:\n");
190  DebugUnicharPath(unicharset, second_nodes, unichar_ids, certs, ratings,
191  xcoords);
192  }
193  int timestepEnd= 0;
194  //if lstm choice mode is required in granularity level 2 it stores the x
195  //Coordinates of every chosen character to match the alternative choices to it
196  if (lstm_choice_mode == 2) {
197  ExtractPathAsUnicharIds(best_nodes, &unichar_ids, &certs, &ratings,
198  &xcoords, &best_choices);
199  if (best_choices.size() > 0) {
200  timestepEnd = std::get<1>(best_choices.front());
201  best_choices.pop_front();
202  }
203  } else {
204  ExtractPathAsUnicharIds(best_nodes, &unichar_ids, &certs, &ratings,
205  &xcoords);
206  }
207  int num_ids = unichar_ids.size();
208  if (debug) {
209  DebugUnicharPath(unicharset, best_nodes, unichar_ids, certs, ratings,
210  xcoords);
211  }
212  // Convert labels to unichar-ids.
213  int word_end = 0;
214  float prev_space_cert = 0.0f;
215  for (int word_start = 0; word_start < num_ids; word_start = word_end) {
216  for (word_end = word_start + 1; word_end < num_ids; ++word_end) {
217  // A word is terminated when a space character or start_of_word flag is
218  // hit. We also want to force a separate word for every non
219  // space-delimited character when not in a dictionary context.
220  if (unichar_ids[word_end] == UNICHAR_SPACE) break;
221  int index = xcoords[word_end];
222  if (best_nodes[index]->start_of_word) break;
223  if (best_nodes[index]->permuter == TOP_CHOICE_PERM &&
224  (!unicharset->IsSpaceDelimited(unichar_ids[word_end]) ||
225  !unicharset->IsSpaceDelimited(unichar_ids[word_end - 1])))
226  break;
227  }
228  float space_cert = 0.0f;
229  if (word_end < num_ids && unichar_ids[word_end] == UNICHAR_SPACE)
230  space_cert = certs[word_end];
231  bool leading_space =
232  word_start > 0 && unichar_ids[word_start - 1] == UNICHAR_SPACE;
233  // Create a WERD_RES for the output word.
234  WERD_RES* word_res = InitializeWord(
235  leading_space, line_box, word_start, word_end,
236  std::min(space_cert, prev_space_cert), unicharset, xcoords, scale_factor);
237  if (lstm_choice_mode == 1) {
238  for (size_t i = timestepEnd; i < xcoords[word_end]; i++) {
239  word_res->timesteps.push_back(timesteps[i]);
240  }
241  timestepEnd = xcoords[word_end];
242  } else if (lstm_choice_mode == 2){
243  // Accumulated Timesteps (choice mode 2 processing)
244  float sum = 0;
245  std::vector<std::pair<const char*, float>> choice_pairs;
246  for (size_t i = timestepEnd; i < xcoords[word_end]; i++) {
247  for (std::pair<const char*, float> choice : timesteps[i]) {
248  if (std::strcmp(choice.first, "")) {
249  sum += choice.second;
250  choice_pairs.push_back(choice);
251  }
252  }
253  if ((best_choices.size() > 0 && i == std::get<1>(best_choices.front()) - 1)
254  || i == xcoords[word_end]-1) {
255  std::map<const char*, float> summed_propabilities;
256  for (auto & choice_pair : choice_pairs) {
257  summed_propabilities[choice_pair.first] += choice_pair.second;
258  }
259  std::vector<std::pair<const char*, float>> accumulated_timestep;
260  for (auto& summed_propability : summed_propabilities) {
261  if(sum == 0) break;
262  summed_propability.second/=sum;
263  size_t pos = 0;
264  while (accumulated_timestep.size() > pos
265  && accumulated_timestep[pos].second > summed_propability.second) {
266  pos++;
267  }
268  accumulated_timestep.insert(accumulated_timestep.begin() + pos,
269  std::pair<const char*,float>(summed_propability.first,
270  summed_propability.second));
271  }
272  if (best_choices.size() > 0) {
273  best_choices.pop_front();
274  }
275  choice_pairs.clear();
276  word_res->timesteps.push_back(accumulated_timestep);
277  sum = 0;
278  }
279  }
280  timestepEnd = xcoords[word_end];
281  }
282  for (int i = word_start; i < word_end; ++i) {
283  auto* choices = new BLOB_CHOICE_LIST;
284  BLOB_CHOICE_IT bc_it(choices);
285  auto* choice = new BLOB_CHOICE(
286  unichar_ids[i], ratings[i], certs[i], -1, 1.0f,
287  static_cast<float>(INT16_MAX), 0.0f, BCC_STATIC_CLASSIFIER);
288  int col = i - word_start;
289  choice->set_matrix_cell(col, col);
290  bc_it.add_after_then_move(choice);
291  word_res->ratings->put(col, col, choices);
292  }
293  int index = xcoords[word_end - 1];
294  word_res->FakeWordFromRatings(best_nodes[index]->permuter);
295  words->push_back(word_res);
296  prev_space_cert = space_cert;
297  if (word_end < num_ids && unichar_ids[word_end] == UNICHAR_SPACE)
298  ++word_end;
299  }
300 }

◆ IsDawgFromBeamsIndex()

static bool tesseract::RecodeBeamSearch::IsDawgFromBeamsIndex ( int  index)
inlinestatic

Definition at line 233 of file recodebeam.h.

233  {
234  return index / (kNumLengths * NC_COUNT) > 0;
235  }

◆ LengthFromBeamsIndex()

static int tesseract::RecodeBeamSearch::LengthFromBeamsIndex ( int  index)
inlinestatic

Definition at line 229 of file recodebeam.h.

229 { return index % kNumLengths; }

Member Data Documentation

◆ kMinCertainty

constexpr float tesseract::RecodeBeamSearch::kMinCertainty = -20.0f
staticconstexpr

Definition at line 222 of file recodebeam.h.

◆ kNumBeams

const int tesseract::RecodeBeamSearch::kNumBeams = 2 * NC_COUNT * kNumLengths
static

Definition at line 227 of file recodebeam.h.

◆ kNumLengths

const int tesseract::RecodeBeamSearch::kNumLengths = RecodedCharID::kMaxCodeLen + 1
static

Definition at line 224 of file recodebeam.h.

◆ timesteps

std::vector< std::vector<std::pair<const char*, float> > > tesseract::RecodeBeamSearch::timesteps

Definition at line 216 of file recodebeam.h.


The documentation for this class was generated from the following files:
BLOB_CHOICE
Definition: ratngs.h:52
tesseract::RecodeBeamSearch::timesteps
std::vector< std::vector< std::pair< const char *, float > > > timesteps
Definition: recodebeam.h:216
GenericVector::truncate
void truncate(int size)
Definition: genericvector.h:137
tesseract::RecodeBeamSearch::kNumLengths
static const int kNumLengths
Definition: recodebeam.h:224
WERD_RES::timesteps
std::vector< std::vector< std::pair< const char *, float > > > timesteps
Definition: pageres.h:221
WERD_RES::FakeWordFromRatings
void FakeWordFromRatings(PermuterType permuter)
Definition: pageres.cpp:898
UNICHARSET::IsSpaceDelimited
bool IsSpaceDelimited(UNICHAR_ID unichar_id) const
Definition: unicharset.h:652
GENERIC_2D_ARRAY::dim2
int dim2() const
Definition: matrix.h:210
tesseract::RecodeBeamSearch::BeamIndex
static int BeamIndex(bool is_dawg, NodeContinuation cont, int length)
Definition: recodebeam.h:237
GenericVector
Definition: baseapi.h:37
tesseract::NodeContinuation
NodeContinuation
Definition: recodebeam.h:72
tprintf
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
tesseract::PointerVector::truncate
void truncate(int size)
Definition: genericvector.h:496
GENERIC_2D_ARRAY::dim1
int dim1() const
Definition: matrix.h:209
tesseract::NC_COUNT
@ NC_COUNT
Definition: recodebeam.h:80
GENERIC_2D_ARRAY::put
void put(ICOORD pos, const T &thing)
Definition: matrix.h:223
WERD_RES
Definition: pageres.h:166
TOP_CHOICE_PERM
@ TOP_CHOICE_PERM
Definition: ratngs.h:235
WERD_RES::ratings
MATRIX * ratings
Definition: pageres.h:237
GenericVector::size
int size() const
Definition: genericvector.h:72
BCC_STATIC_CLASSIFIER
@ BCC_STATIC_CLASSIFIER
Definition: ratngs.h:44
tesseract::Dict::IsSpaceDelimitedLang
bool IsSpaceDelimitedLang() const
Returns true if the language is space-delimited (not CJ, or T).
Definition: dict.cpp:883
UNICHAR_SPACE
@ UNICHAR_SPACE
Definition: unicharset.h:34
GenericVector::push_back
int push_back(T object)
Definition: genericvector.h:837