|
| ~ResultIterator () override=default |
|
void | Begin () override |
|
bool | Next (PageIteratorLevel level) override |
|
bool | IsAtBeginningOf (PageIteratorLevel level) const override |
|
bool | IsAtFinalElement (PageIteratorLevel level, PageIteratorLevel element) const override |
|
int | BlanksBeforeWord () const |
|
virtual char * | GetUTF8Text (PageIteratorLevel level) const |
|
virtual std::vector< std::vector< std::pair< const char *, float > > > * | GetBestLSTMSymbolChoices () const |
|
bool | ParagraphIsLtr () const |
|
| LTRResultIterator (PAGE_RES *page_res, Tesseract *tesseract, int scale, int scaled_yres, int rect_left, int rect_top, int rect_width, int rect_height) |
|
| ~LTRResultIterator () override |
|
char * | GetUTF8Text (PageIteratorLevel level) const |
|
void | SetLineSeparator (const char *new_line) |
|
void | SetParagraphSeparator (const char *new_para) |
|
float | Confidence (PageIteratorLevel level) const |
|
void | RowAttributes (float *row_height, float *descenders, float *ascenders) const |
|
const char * | WordFontAttributes (bool *is_bold, bool *is_italic, bool *is_underlined, bool *is_monospace, bool *is_serif, bool *is_smallcaps, int *pointsize, int *font_id) const |
|
const char * | WordRecognitionLanguage () const |
|
StrongScriptDirection | WordDirection () const |
|
bool | WordIsFromDictionary () const |
|
int | BlanksBeforeWord () const |
|
bool | WordIsNumeric () const |
|
bool | HasBlamerInfo () const |
|
const void * | GetParamsTrainingBundle () const |
|
const char * | GetBlamerDebug () const |
|
const char * | GetBlamerMisadaptionDebug () const |
|
bool | HasTruthString () const |
|
bool | EquivalentToTruth (const char *str) const |
|
char * | WordTruthUTF8Text () const |
|
char * | WordNormedUTF8Text () const |
|
const char * | WordLattice (int *lattice_size) const |
|
bool | SymbolIsSuperscript () const |
|
bool | SymbolIsSubscript () const |
|
bool | SymbolIsDropcap () const |
|
| PageIterator (PAGE_RES *page_res, Tesseract *tesseract, int scale, int scaled_yres, int rect_left, int rect_top, int rect_width, int rect_height) |
|
virtual | ~PageIterator () |
|
| PageIterator (const PageIterator &src) |
|
const PageIterator & | operator= (const PageIterator &src) |
|
bool | PositionedAtSameWord (const PAGE_RES_IT *other) const |
|
virtual void | RestartParagraph () |
|
bool | IsWithinFirstTextlineOfParagraph () const |
|
virtual void | RestartRow () |
|
int | Cmp (const PageIterator &other) const |
|
void | SetBoundingBoxComponents (bool include_upper_dots, bool include_lower_dots) |
|
bool | BoundingBox (PageIteratorLevel level, int *left, int *top, int *right, int *bottom) const |
|
bool | BoundingBox (PageIteratorLevel level, int padding, int *left, int *top, int *right, int *bottom) const |
|
bool | BoundingBoxInternal (PageIteratorLevel level, int *left, int *top, int *right, int *bottom) const |
|
bool | Empty (PageIteratorLevel level) const |
|
PolyBlockType | BlockType () const |
|
Pta * | BlockPolygon () const |
|
Pix * | GetBinaryImage (PageIteratorLevel level) const |
|
Pix * | GetImage (PageIteratorLevel level, int padding, Pix *original_img, int *left, int *top) const |
|
bool | Baseline (PageIteratorLevel level, int *x1, int *y1, int *x2, int *y2) const |
|
void | Orientation (tesseract::Orientation *orientation, tesseract::WritingDirection *writing_direction, tesseract::TextlineOrder *textline_order, float *deskew_angle) const |
|
void | ParagraphInfo (tesseract::ParagraphJustification *justification, bool *is_list_item, bool *is_crown, int *first_line_indent) const |
|
bool | SetWordBlamerBundle (BlamerBundle *blamer_bundle) |
|
Definition at line 41 of file resultiterator.h.
Yields the reading order as a sequence of indices and (optional) meta-marks for a set of words (given left-to-right). The meta marks are passed as negative values: kMinorRunStart Start of minor direction text. kMinorRunEnd End of minor direction text. kComplexWord The next indexed word contains both left-to-right and right-to-left characters and was treated as neutral.
For example, suppose we have five words in a text line, indexed [0,1,2,3,4] from the leftmost side of the text line. The following are all believable reading_orders:
Left-to-Right (in ltr paragraph): { 0, 1, 2, 3, 4 } Left-to-Right (in rtl paragraph): { kMinorRunStart, 0, 1, 2, 3, 4, kMinorRunEnd } Right-to-Left (in rtl paragraph): { 4, 3, 2, 1, 0 } Left-to-Right except for an RTL phrase in words 2, 3 in an ltr paragraph: { 0, 1, kMinorRunStart, 3, 2, kMinorRunEnd, 4 }
Definition at line 257 of file resultiterator.cpp.
261 reading_order->truncate(0);
262 if (word_dirs.
size() == 0)
return;
266 int minor_direction, major_direction, major_step, start, end;
267 if (paragraph_is_ltr) {
269 end = word_dirs.
size();
274 start = word_dirs.
size() - 1;
283 int neutral_end = start;
284 while (neutral_end > 0 && word_dirs[neutral_end] ==
DIR_NEUTRAL) {
290 int left = neutral_end;
295 for (
int i = left; i < word_dirs.
size(); i++) {
296 reading_order->push_back(i);
304 for (
int i = start; i != end;) {
305 if (word_dirs[i] == minor_direction) {
307 while (j != end && word_dirs[j] != major_direction)
309 if (j == end) j -= major_step;
310 while (j != i && word_dirs[j] != minor_direction)
314 for (
int k = j; k != i; k -= major_step) {
315 reading_order->push_back(k);
317 reading_order->push_back(i);
321 reading_order->push_back(i);
Moves to the start of the next object at the given level in the page hierarchy in the appropriate reading order and returns false if the end of the page was reached. NOTE that RIL_SYMBOL will skip non-text blocks, but all other PageIteratorLevel level values will visit each non-text block once. Think of non text blocks as containing a single para, with a single line, with a single imaginary word. Calls to Next with different levels may be freely intermixed. This function iterates words in right-to-left scripts correctly, if the appropriate language has been loaded into Tesseract.
Reimplemented from tesseract::PageIterator.
Definition at line 423 of file resultiterator.cpp.
424 if (
it_->
block() ==
nullptr)
return false;
433 current_paragraph_is_ltr_ = CurrentParagraphIsLtr();
435 in_minor_direction_ =
false;
436 MoveToLogicalStartOfTextline();
441 CalculateBlobOrder(&blob_order);
443 while (next_blob < blob_order.
size() &&
447 if (next_blob < blob_order.
size()) {
450 at_beginning_of_minor_run_ =
false;
460 int this_word_index = LTRWordIndex();
464 int final_real_index = word_indices.size() - 1;
465 while (final_real_index > 0 && word_indices[final_real_index] < 0)
467 for (
int i = 0; i < final_real_index; i++) {
468 if (word_indices[i] == this_word_index) {
470 for (; j < final_real_index && word_indices[j] < 0; j++) {
471 if (word_indices[j] ==
kMinorRunStart) in_minor_direction_ =
true;
472 if (word_indices[j] ==
kMinorRunEnd) in_minor_direction_ =
false;
474 at_beginning_of_minor_run_ = (word_indices[j - 1] ==
kMinorRunStart);
477 tprintf(
"Next(RIL_WORD): %d -> %d\n",
478 this_word_index, word_indices[j]);
481 for (
int k = 0; k < word_indices[j]; k++) {
484 MoveToLogicalStartOfWord();
489 tprintf(
"Next(RIL_WORD): %d -> EOL\n", this_word_index);