tesseract  4.1.1
PAGE_RES_IT Class Reference

#include <pageres.h>

Public Member Functions

 PAGE_RES_IT ()=default
 
 PAGE_RES_IT (PAGE_RES *the_page_res)
 
bool operator== (const PAGE_RES_IT &other) const
 
bool operator!= (const PAGE_RES_IT &other) const
 
int cmp (const PAGE_RES_IT &other) const
 
WERD_RESrestart_page ()
 
WERD_RESrestart_page_with_empties ()
 
WERD_RESstart_page (bool empty_ok)
 
WERD_RESrestart_row ()
 
WERD_RESInsertSimpleCloneWord (const WERD_RES &clone_res, WERD *new_word)
 
void ReplaceCurrentWord (tesseract::PointerVector< WERD_RES > *words)
 
void DeleteCurrentWord ()
 
void MakeCurrentWordFuzzy ()
 
WERD_RESforward ()
 
WERD_RESforward_with_empties ()
 
WERD_RESforward_paragraph ()
 
WERD_RESforward_block ()
 
WERD_RESprev_word () const
 
ROW_RESprev_row () const
 
BLOCK_RESprev_block () const
 
WERD_RESword () const
 
ROW_RESrow () const
 
BLOCK_RESblock () const
 
WERD_RESnext_word () const
 
ROW_RESnext_row () const
 
BLOCK_RESnext_block () const
 
void rej_stat_word ()
 
void ResetWordIterator ()
 

Public Attributes

PAGE_RESpage_res
 

Detailed Description

Definition at line 675 of file pageres.h.

Constructor & Destructor Documentation

◆ PAGE_RES_IT() [1/2]

PAGE_RES_IT::PAGE_RES_IT ( )
default

◆ PAGE_RES_IT() [2/2]

PAGE_RES_IT::PAGE_RES_IT ( PAGE_RES the_page_res)
inline

Definition at line 683 of file pageres.h.

688  {

Member Function Documentation

◆ block()

BLOCK_RES* PAGE_RES_IT::block ( ) const
inline

Definition at line 762 of file pageres.h.

763  { // next word
764  return next_word_res;

◆ cmp()

int PAGE_RES_IT::cmp ( const PAGE_RES_IT other) const

Definition at line 1145 of file pageres.cpp.

1145  {
1146  ASSERT_HOST(page_res == other.page_res);
1147  if (other.block_res == nullptr) {
1148  // other points to the end of the page.
1149  if (block_res == nullptr)
1150  return 0;
1151  return -1;
1152  }
1153  if (block_res == nullptr) {
1154  return 1; // we point to the end of the page.
1155  }
1156  if (block_res == other.block_res) {
1157  if (other.row_res == nullptr || row_res == nullptr) {
1158  // this should only happen if we hit an image block.
1159  return 0;
1160  }
1161  if (row_res == other.row_res) {
1162  // we point to the same block and row.
1163  ASSERT_HOST(other.word_res != nullptr && word_res != nullptr);
1164  if (word_res == other.word_res) {
1165  // we point to the same word!
1166  return 0;
1167  }
1168 
1169  WERD_RES_IT word_res_it(&row_res->word_res_list);
1170  for (word_res_it.mark_cycle_pt(); !word_res_it.cycled_list();
1171  word_res_it.forward()) {
1172  if (word_res_it.data() == word_res) {
1173  return -1;
1174  } else if (word_res_it.data() == other.word_res) {
1175  return 1;
1176  }
1177  }
1178  ASSERT_HOST("Error: Incomparable PAGE_RES_ITs" == nullptr);
1179  }
1180 
1181  // we both point to the same block, but different rows.
1182  ROW_RES_IT row_res_it(&block_res->row_res_list);
1183  for (row_res_it.mark_cycle_pt(); !row_res_it.cycled_list();
1184  row_res_it.forward()) {
1185  if (row_res_it.data() == row_res) {
1186  return -1;
1187  } else if (row_res_it.data() == other.row_res) {
1188  return 1;
1189  }
1190  }
1191  ASSERT_HOST("Error: Incomparable PAGE_RES_ITs" == nullptr);
1192  }
1193 
1194  // We point to different blocks.
1195  BLOCK_RES_IT block_res_it(&page_res->block_res_list);
1196  for (block_res_it.mark_cycle_pt();
1197  !block_res_it.cycled_list(); block_res_it.forward()) {
1198  if (block_res_it.data() == block_res) {
1199  return -1;
1200  } else if (block_res_it.data() == other.block_res) {
1201  return 1;
1202  }
1203  }
1204  // Shouldn't happen...
1205  ASSERT_HOST("Error: Incomparable PAGE_RES_ITs" == nullptr);
1206  return 0;
1207 }

◆ DeleteCurrentWord()

void PAGE_RES_IT::DeleteCurrentWord ( )

Definition at line 1440 of file pageres.cpp.

1440  {
1441  // Check that this word is as we expect. part_of_combos are NEVER iterated
1442  // by the normal iterator, so we should never be trying to delete them.
1443  ASSERT_HOST(!word_res->part_of_combo);
1444  if (!word_res->combination) {
1445  // Combinations own their own word, so we won't find the word on the
1446  // row's word_list, but it is legitimate to try to delete them.
1447  // Delete word from the ROW when not a combination.
1448  WERD_IT w_it(row()->row->word_list());
1449  for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) {
1450  if (w_it.data() == word_res->word) {
1451  break;
1452  }
1453  }
1454  ASSERT_HOST(!w_it.cycled_list());
1455  delete w_it.extract();
1456  }
1457  // Remove the WERD_RES for the new_word.
1458  // Remove the WORD_RES from the ROW_RES.
1459  WERD_RES_IT wr_it(&row()->word_res_list);
1460  for (wr_it.mark_cycle_pt(); !wr_it.cycled_list(); wr_it.forward()) {
1461  if (wr_it.data() == word_res) {
1462  word_res = nullptr;
1463  break;
1464  }
1465  }
1466  ASSERT_HOST(!wr_it.cycled_list());
1467  delete wr_it.extract();
1469 }

◆ forward()

WERD_RES* PAGE_RES_IT::forward ( )
inline

Definition at line 736 of file pageres.h.

738  {

◆ forward_block()

WERD_RES * PAGE_RES_IT::forward_block ( )

Definition at line 1660 of file pageres.cpp.

1660  {
1661  while (block_res == next_block_res) {
1662  internal_forward(false, true);
1663  }
1664  return internal_forward(false, true);
1665 }

◆ forward_paragraph()

WERD_RES * PAGE_RES_IT::forward_paragraph ( )

Definition at line 1645 of file pageres.cpp.

1645  {
1646  while (block_res == next_block_res &&
1647  (next_row_res != nullptr && next_row_res->row != nullptr &&
1648  row_res->row->para() == next_row_res->row->para())) {
1649  internal_forward(false, true);
1650  }
1651  return internal_forward(false, true);
1652 }

◆ forward_with_empties()

WERD_RES* PAGE_RES_IT::forward_with_empties ( )
inline

Definition at line 740 of file pageres.h.

745  { // previous word

◆ InsertSimpleCloneWord()

WERD_RES * PAGE_RES_IT::InsertSimpleCloneWord ( const WERD_RES clone_res,
WERD new_word 
)

Definition at line 1213 of file pageres.cpp.

1214  {
1215  // Make a WERD_RES for the new_word.
1216  auto* new_res = new WERD_RES(new_word);
1217  new_res->CopySimpleFields(clone_res);
1218  new_res->combination = true;
1219  // Insert into the appropriate place in the ROW_RES.
1220  WERD_RES_IT wr_it(&row()->word_res_list);
1221  for (wr_it.mark_cycle_pt(); !wr_it.cycled_list(); wr_it.forward()) {
1222  WERD_RES* word = wr_it.data();
1223  if (word == word_res)
1224  break;
1225  }
1226  ASSERT_HOST(!wr_it.cycled_list());
1227  wr_it.add_before_then_move(new_res);
1228  if (wr_it.at_first()) {
1229  // This is the new first word, so reset the member iterator so it
1230  // detects the cycled_list state correctly.
1232  }
1233  return new_res;
1234 }

◆ MakeCurrentWordFuzzy()

void PAGE_RES_IT::MakeCurrentWordFuzzy ( )

Definition at line 1473 of file pageres.cpp.

1473  {
1474  WERD* real_word = word_res->word;
1475  if (!real_word->flag(W_FUZZY_SP) && !real_word->flag(W_FUZZY_NON)) {
1476  real_word->set_flag(W_FUZZY_SP, true);
1477  if (word_res->combination) {
1478  // The next word should be the corresponding part of combo, but we have
1479  // already stepped past it, so find it by search.
1480  WERD_RES_IT wr_it(&row()->word_res_list);
1481  for (wr_it.mark_cycle_pt();
1482  !wr_it.cycled_list() && wr_it.data() != word_res; wr_it.forward()) {
1483  }
1484  wr_it.forward();
1485  ASSERT_HOST(wr_it.data()->part_of_combo);
1486  real_word = wr_it.data()->word;
1487  ASSERT_HOST(!real_word->flag(W_FUZZY_SP) &&
1488  !real_word->flag(W_FUZZY_NON));
1489  real_word->set_flag(W_FUZZY_SP, true);
1490  }
1491  }
1492 }

◆ next_block()

BLOCK_RES* PAGE_RES_IT::next_block ( ) const
inline

Definition at line 771 of file pageres.h.

775  :
776  WERD_RES *internal_forward(bool new_block, bool empty_ok);

◆ next_row()

ROW_RES* PAGE_RES_IT::next_row ( ) const
inline

Definition at line 768 of file pageres.h.

769  { // block of next word
770  return next_block_res;

◆ next_word()

WERD_RES* PAGE_RES_IT::next_word ( ) const
inline

Definition at line 765 of file pageres.h.

766  { // row of next word
767  return next_row_res;

◆ operator!=()

bool PAGE_RES_IT::operator!= ( const PAGE_RES_IT other) const
inline

Definition at line 695 of file pageres.h.

701 {

◆ operator==()

bool PAGE_RES_IT::operator== ( const PAGE_RES_IT other) const
inline

Definition at line 690 of file pageres.h.

693  {return !(*this == other); }

◆ prev_block()

BLOCK_RES* PAGE_RES_IT::prev_block ( ) const
inline

Definition at line 753 of file pageres.h.

754  { // current word
755  return word_res;

◆ prev_row()

ROW_RES* PAGE_RES_IT::prev_row ( ) const
inline

Definition at line 750 of file pageres.h.

751  { // block of prev word
752  return prev_block_res;

◆ prev_word()

WERD_RES* PAGE_RES_IT::prev_word ( ) const
inline

Definition at line 747 of file pageres.h.

748  { // row of prev word
749  return prev_row_res;

◆ rej_stat_word()

void PAGE_RES_IT::rej_stat_word ( )

Definition at line 1667 of file pageres.cpp.

1667  {
1668  int16_t chars_in_word;
1669  int16_t rejects_in_word = 0;
1670 
1671  chars_in_word = word_res->reject_map.length ();
1672  page_res->char_count += chars_in_word;
1673  block_res->char_count += chars_in_word;
1674  row_res->char_count += chars_in_word;
1675 
1676  rejects_in_word = word_res->reject_map.reject_count ();
1677 
1678  page_res->rej_count += rejects_in_word;
1679  block_res->rej_count += rejects_in_word;
1680  row_res->rej_count += rejects_in_word;
1681  if (chars_in_word == rejects_in_word)
1682  row_res->whole_word_rej_count += rejects_in_word;
1683 }

◆ ReplaceCurrentWord()

void PAGE_RES_IT::ReplaceCurrentWord ( tesseract::PointerVector< WERD_RES > *  words)

Definition at line 1333 of file pageres.cpp.

1334  {
1335  if (words->empty()) {
1337  return;
1338  }
1339  WERD_RES* input_word = word();
1340  // Set the BOL/EOL flags on the words from the input word.
1341  if (input_word->word->flag(W_BOL)) {
1342  (*words)[0]->word->set_flag(W_BOL, true);
1343  } else {
1344  (*words)[0]->word->set_blanks(input_word->word->space());
1345  }
1346  words->back()->word->set_flag(W_EOL, input_word->word->flag(W_EOL));
1347 
1348  // Move the blobs from the input word to the new set of words.
1349  // If the input word_res is a combination, then the replacements will also be
1350  // combinations, and will own their own words. If the input word_res is not a
1351  // combination, then the final replacements will not be either, (although it
1352  // is allowed for the input words to be combinations) and their words
1353  // will get put on the row list. This maintains the ownership rules.
1354  WERD_IT w_it(row()->row->word_list());
1355  if (!input_word->combination) {
1356  for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) {
1357  WERD* word = w_it.data();
1358  if (word == input_word->word)
1359  break;
1360  }
1361  // w_it is now set to the input_word's word.
1362  ASSERT_HOST(!w_it.cycled_list());
1363  }
1364  // Insert into the appropriate place in the ROW_RES.
1365  WERD_RES_IT wr_it(&row()->word_res_list);
1366  for (wr_it.mark_cycle_pt(); !wr_it.cycled_list(); wr_it.forward()) {
1367  WERD_RES* word = wr_it.data();
1368  if (word == input_word)
1369  break;
1370  }
1371  ASSERT_HOST(!wr_it.cycled_list());
1372  // Since we only have an estimate of the bounds between blobs, use the blob
1373  // x-middle as the determiner of where to put the blobs
1374  C_BLOB_IT src_b_it(input_word->word->cblob_list());
1375  src_b_it.sort(&C_BLOB::SortByXMiddle);
1376  C_BLOB_IT rej_b_it(input_word->word->rej_cblob_list());
1377  rej_b_it.sort(&C_BLOB::SortByXMiddle);
1378  TBOX clip_box;
1379  for (int w = 0; w < words->size(); ++w) {
1380  WERD_RES* word_w = (*words)[w];
1381  clip_box = ComputeWordBounds(*words, w, clip_box, wr_it_of_current_word);
1382  // Compute blob boundaries.
1383  GenericVector<int> blob_ends;
1384  C_BLOB_LIST* next_word_blobs =
1385  w + 1 < words->size() ? (*words)[w + 1]->word->cblob_list() : nullptr;
1386  ComputeBlobEnds(*word_w, clip_box, next_word_blobs, &blob_ends);
1387  // Remove the fake blobs on the current word, but keep safe for back-up if
1388  // no blob can be found.
1389  C_BLOB_LIST fake_blobs;
1390  C_BLOB_IT fake_b_it(&fake_blobs);
1391  fake_b_it.add_list_after(word_w->word->cblob_list());
1392  fake_b_it.move_to_first();
1393  word_w->word->cblob_list()->clear();
1394  C_BLOB_IT dest_it(word_w->word->cblob_list());
1395  // Build the box word as we move the blobs.
1396  auto* box_word = new tesseract::BoxWord;
1397  for (int i = 0; i < blob_ends.size(); ++i, fake_b_it.forward()) {
1398  int end_x = blob_ends[i];
1399  TBOX blob_box;
1400  // Add the blobs up to end_x.
1401  while (!src_b_it.empty() &&
1402  src_b_it.data()->bounding_box().x_middle() < end_x) {
1403  blob_box += MoveAndClipBlob(&src_b_it, &dest_it, clip_box);
1404  src_b_it.forward();
1405  }
1406  while (!rej_b_it.empty() &&
1407  rej_b_it.data()->bounding_box().x_middle() < end_x) {
1408  blob_box += MoveAndClipBlob(&rej_b_it, &dest_it, clip_box);
1409  rej_b_it.forward();
1410  }
1411  if (blob_box.null_box()) {
1412  // Use the original box as a back-up.
1413  blob_box = MoveAndClipBlob(&fake_b_it, &dest_it, clip_box);
1414  }
1415  box_word->InsertBox(i, blob_box);
1416  }
1417  delete word_w->box_word;
1418  word_w->box_word = box_word;
1419  if (!input_word->combination) {
1420  // Insert word_w->word into the ROW. It doesn't own its word, so the
1421  // ROW needs to own it.
1422  w_it.add_before_stay_put(word_w->word);
1423  word_w->combination = false;
1424  }
1425  (*words)[w] = nullptr; // We are taking ownership.
1426  wr_it.add_before_stay_put(word_w);
1427  }
1428  // We have taken ownership of the words.
1429  words->clear();
1430  // Delete the current word, which has been replaced. We could just call
1431  // DeleteCurrentWord, but that would iterate both lists again, and we know
1432  // we are already in the right place.
1433  if (!input_word->combination)
1434  delete w_it.extract();
1435  delete wr_it.extract();
1437 }

◆ ResetWordIterator()

void PAGE_RES_IT::ResetWordIterator ( )

Definition at line 1523 of file pageres.cpp.

1523  {
1524  if (row_res == next_row_res) {
1525  // Reset the member iterator so it can move forward and detect the
1526  // cycled_list state correctly.
1527  word_res_it.move_to_first();
1528  for (word_res_it.mark_cycle_pt();
1529  !word_res_it.cycled_list() && word_res_it.data() != next_word_res;
1530  word_res_it.forward()) {
1531  if (!word_res_it.data()->part_of_combo) {
1532  if (prev_row_res == row_res) prev_word_res = word_res;
1533  word_res = word_res_it.data();
1534  }
1535  }
1536  ASSERT_HOST(!word_res_it.cycled_list());
1537  wr_it_of_next_word = word_res_it;
1538  word_res_it.forward();
1539  } else {
1540  // word_res_it is OK, but reset word_res and prev_word_res if needed.
1541  WERD_RES_IT wr_it(&row_res->word_res_list);
1542  for (wr_it.mark_cycle_pt(); !wr_it.cycled_list(); wr_it.forward()) {
1543  if (!wr_it.data()->part_of_combo) {
1544  if (prev_row_res == row_res) prev_word_res = word_res;
1545  word_res = wr_it.data();
1546  }
1547  }
1548  }
1549 }

◆ restart_page()

WERD_RES* PAGE_RES_IT::restart_page ( )
inline

Definition at line 703 of file pageres.h.

704  {
705  return start_page(true); // Allow empty blocks.

◆ restart_page_with_empties()

WERD_RES* PAGE_RES_IT::restart_page_with_empties ( )
inline

Definition at line 706 of file pageres.h.

◆ restart_row()

WERD_RES * PAGE_RES_IT::restart_row ( )

Definition at line 1630 of file pageres.cpp.

1630  {
1631  ROW_RES *row = this->row();
1632  if (!row) return nullptr;
1633  for (restart_page(); this->row() != row; forward()) {
1634  // pass
1635  }
1636  return word();
1637 }

◆ row()

ROW_RES* PAGE_RES_IT::row ( ) const
inline

Definition at line 759 of file pageres.h.

760  { // block of cur. word
761  return block_res;

◆ start_page()

WERD_RES * PAGE_RES_IT::start_page ( bool  empty_ok)

Definition at line 1500 of file pageres.cpp.

1500  {
1501  block_res_it.set_to_list(&page_res->block_res_list);
1502  block_res_it.mark_cycle_pt();
1503  prev_block_res = nullptr;
1504  prev_row_res = nullptr;
1505  prev_word_res = nullptr;
1506  block_res = nullptr;
1507  row_res = nullptr;
1508  word_res = nullptr;
1509  next_block_res = nullptr;
1510  next_row_res = nullptr;
1511  next_word_res = nullptr;
1512  internal_forward(true, empty_ok);
1513  return internal_forward(false, empty_ok);
1514 }

◆ word()

WERD_RES* PAGE_RES_IT::word ( ) const
inline

Definition at line 756 of file pageres.h.

757  { // row of current word
758  return row_res;

Member Data Documentation

◆ page_res

PAGE_RES* PAGE_RES_IT::page_res

Definition at line 679 of file pageres.h.


The documentation for this class was generated from the following files:
TBOX
Definition: rect.h:34
tesseract::BoxWord
Definition: boxword.h:37
ROW_RES::rej_count
int32_t rej_count
Definition: pageres.h:144
TBOX::null_box
bool null_box() const
Definition: rect.h:50
W_FUZZY_SP
@ W_FUZZY_SP
fuzzy space
Definition: werd.h:55
WERD::flag
bool flag(WERD_FLAGS mask) const
Definition: werd.h:117
REJMAP::reject_count
int16_t reject_count()
Definition: rejctmap.h:229
BLOCK_RES::rej_count
int32_t rej_count
Definition: pageres.h:120
WERD_RES::part_of_combo
bool part_of_combo
Definition: pageres.h:340
REJMAP::length
int32_t length() const
Definition: rejctmap.h:223
PAGE_RES_IT::word
WERD_RES * word() const
Definition: pageres.h:756
ROW_RES
Definition: pageres.h:138
WERD
Definition: werd.h:56
WERD::cblob_list
C_BLOB_LIST * cblob_list()
Definition: werd.h:95
WERD_RES::combination
bool combination
Definition: pageres.h:339
WERD::set_flag
void set_flag(WERD_FLAGS mask, bool value)
Definition: werd.h:118
WERD_RES::reject_map
REJMAP reject_map
Definition: pageres.h:294
GenericVector< int >
ROW::para
PARA * para() const
Definition: ocrrow.h:118
ASSERT_HOST
#define ASSERT_HOST(x)
Definition: errcode.h:88
C_BLOB::SortByXMiddle
static int SortByXMiddle(const void *v1, const void *v2)
Definition: stepblob.h:125
tesseract::PointerVector::clear
void clear()
Definition: genericvector.h:530
ROW_RES::word_res_list
WERD_RES_LIST word_res_list
Definition: pageres.h:146
PAGE_RES_IT::ResetWordIterator
void ResetWordIterator()
Definition: pageres.cpp:1523
PAGE_RES_IT::row
ROW_RES * row() const
Definition: pageres.h:759
W_FUZZY_NON
@ W_FUZZY_NON
fuzzy nonspace
Definition: werd.h:56
ROW_RES::char_count
int32_t char_count
Definition: pageres.h:143
PAGE_RES_IT::restart_page
WERD_RES * restart_page()
Definition: pageres.h:703
WERD_RES
Definition: pageres.h:166
PAGE_RES_IT::forward
WERD_RES * forward()
Definition: pageres.h:736
GenericVector::size
int size() const
Definition: genericvector.h:72
WERD::rej_cblob_list
C_BLOB_LIST * rej_cblob_list()
Definition: werd.h:90
PAGE_RES_IT::page_res
PAGE_RES * page_res
Definition: pageres.h:679
GenericVector::back
T & back() const
Definition: genericvector.h:766
BLOCK_RES::row_res_list
ROW_RES_LIST row_res_list
Definition: pageres.h:127
WERD_RES::box_word
tesseract::BoxWord * box_word
Definition: pageres.h:272
WERD::space
uint8_t space()
Definition: werd.h:99
WERD_RES::word
WERD * word
Definition: pageres.h:186
W_EOL
@ W_EOL
end of line
Definition: werd.h:49
PAGE_RES::char_count
int32_t char_count
Definition: pageres.h:80
ROW_RES::row
ROW * row
Definition: pageres.h:142
PAGE_RES_IT::start_page
WERD_RES * start_page(bool empty_ok)
Definition: pageres.cpp:1500
PAGE_RES::block_res_list
BLOCK_RES_LIST block_res_list
Definition: pageres.h:82
GenericVector::empty
bool empty() const
Definition: genericvector.h:91
W_BOL
@ W_BOL
start of line
Definition: werd.h:48
PAGE_RES_IT::DeleteCurrentWord
void DeleteCurrentWord()
Definition: pageres.cpp:1440
ROW_RES::whole_word_rej_count
int32_t whole_word_rej_count
Definition: pageres.h:145
BLOCK_RES::char_count
int32_t char_count
Definition: pageres.h:119
PAGE_RES::rej_count
int32_t rej_count
Definition: pageres.h:81