21 #include "config_auto.h"
82 : left_margin_(-INT32_MAX), right_margin_(INT32_MAX),
83 median_bottom_(INT32_MAX), median_top_(-INT32_MAX),
84 median_left_(INT32_MAX), median_right_(-INT32_MAX),
85 blob_type_(blob_type),
87 memset(special_blobs_densities_, 0,
sizeof(special_blobs_densities_));
117 ColPartition_LIST* big_part_list) {
126 if (big_part_list !=
nullptr) {
127 ColPartition_IT part_it(big_part_list);
128 part_it.add_to_end(single);
136 ColPartition_C_IT it(&upper_partners_);
137 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
138 it.data()->RemovePartner(
false,
this);
140 it.set_to_list(&lower_partners_);
141 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
142 it.data()->RemovePartner(
true,
this);
150 int left,
int bottom,
151 int right,
int top) {
153 part->bounding_box_ =
TBOX(left, bottom, right, top);
154 part->median_bottom_ = bottom;
155 part->median_top_ = top;
156 part->median_height_ = top - bottom;
157 part->median_left_ = left;
158 part->median_right_ = right;
159 part->median_width_ = right - left;
160 part->left_key_ = part->BoxLeftKey();
161 part->right_key_ = part->BoxRightKey();
172 if (boxes_.length() == 0) {
175 bounding_box_ += box;
179 if (!last_add_was_vertical_) {
180 boxes_.sort(SortByBoxBottom<BLOBNBOX>);
181 last_add_was_vertical_ =
true;
183 boxes_.add_sorted(SortByBoxBottom<BLOBNBOX>,
true, bbox);
185 if (last_add_was_vertical_) {
186 boxes_.sort(SortByBoxLeft<BLOBNBOX>);
187 last_add_was_vertical_ =
false;
189 boxes_.add_sorted(SortByBoxLeft<BLOBNBOX>,
true, bbox);
196 tprintf(
"Added box (%d,%d)->(%d,%d) left_blob_x_=%d, right_blob_x_ = %d\n",
198 bounding_box_.
left(), bounding_box_.
right());
203 BLOBNBOX_C_IT bb_it(&boxes_);
204 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
205 if (box == bb_it.data()) {
217 BLOBNBOX_C_IT bb_it(&boxes_);
218 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
221 if (biggest ==
nullptr ||
225 if (biggest ==
nullptr ||
236 BLOBNBOX_C_IT bb_it(&boxes_);
237 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
238 if (box != bb_it.data()) {
239 result += bb_it.data()->bounding_box();
248 BLOBNBOX_C_IT bb_it(&boxes_);
249 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
252 if (other ==
nullptr) {
264 BLOBNBOX_C_IT bb_it(&boxes_);
265 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
277 BLOBNBOX_C_IT bb_it(&boxes_);
278 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
280 if (bblob->
owner() ==
this)
290 BLOBNBOX_C_IT bb_it(&boxes_);
291 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
298 if (bb_it.empty())
return false;
309 for (BLOBNBOX_C_IT bb_it(&boxes_); !bb_it.empty(); bb_it.forward()) {
311 delete bblob->
cblob();
321 BLOBNBOX_CLIST reversed_boxes;
322 BLOBNBOX_C_IT reversed_it(&reversed_boxes);
324 BLOBNBOX_C_IT bb_it(&boxes_);
325 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
326 reversed_it.add_before_then_move(bb_it.extract());
328 bb_it.add_list_after(&reversed_boxes);
330 int tmp = left_margin_;
331 left_margin_ = -right_margin_;
332 right_margin_ = -tmp;
343 if (bounding_box_.
left() > bounding_box_.
right()) {
345 tprintf(
"Bounding box invalid\n");
350 if (left_margin_ > bounding_box_.
left() ||
351 right_margin_ < bounding_box_.
right()) {
360 tprintf(
"Key inside box: %d v %d or %d v %d\n",
371 int y = (
MidY() + other.
MidY()) / 2;
414 if (bounding_box_.
right() < other.bounding_box_.
left() &&
417 if (other.bounding_box_.
right() < bounding_box_.
left() &&
420 if (bounding_box_.
left() > other.bounding_box_.
right() &&
423 if (other.bounding_box_.
left() > bounding_box_.
right() &&
431 double fractional_tolerance,
432 double constant_tolerance)
const {
434 int nonmatch_count = 0;
435 BLOBNBOX_C_IT box_it(
const_cast<BLOBNBOX_CLIST*
>(&boxes_));
436 BLOBNBOX_C_IT other_it(
const_cast<BLOBNBOX_CLIST*
>(&other.boxes_));
437 box_it.mark_cycle_pt();
438 other_it.mark_cycle_pt();
439 while (!box_it.cycled_list() && !other_it.cycled_list()) {
440 if (box_it.data()->MatchingStrokeWidth(*other_it.data(),
441 fractional_tolerance,
449 return match_count > nonmatch_count;
460 BLOBNBOX_C_IT it(
const_cast<BLOBNBOX_CLIST*
>(&boxes_));
461 int min_top = INT32_MAX;
462 int max_bottom = -INT32_MAX;
463 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
467 tprintf(
"Blob is not a diacritic:");
479 bool result = min_top > candidate.median_bottom_ &&
480 max_bottom < candidate.median_top_;
485 tprintf(
"y ranges don\'t overlap: %d-%d / %d-%d\n",
486 max_bottom, min_top, median_bottom_, median_top_);
495 if (tab_vector !=
nullptr) {
499 left_key_tab_ =
false;
507 if (tab_vector !=
nullptr) {
508 right_key_ = tab_vector->
sort_key();
511 right_key_tab_ =
false;
520 left_key_tab_ = take_box ? false : src.left_key_tab_;
522 left_key_ = src.left_key_;
527 if (left_margin_ > bounding_box_.
left())
528 left_margin_ = src.left_margin_;
533 right_key_tab_ = take_box ? false : src.right_key_tab_;
534 if (right_key_tab_) {
535 right_key_ = src.right_key_;
540 if (right_margin_ < bounding_box_.
right())
541 right_margin_ = src.right_margin_;
546 BLOBNBOX_C_IT it(
const_cast<BLOBNBOX_CLIST*
>(&boxes_));
547 return it.data()->left_rule();
551 BLOBNBOX_C_IT it(
const_cast<BLOBNBOX_CLIST*
>(&boxes_));
553 return it.data()->right_rule();
558 return special_blobs_densities_[
type];
563 BLOBNBOX_C_IT blob_it(&boxes_);
565 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
579 special_blobs_densities_[
type] = density;
583 memset(special_blobs_densities_, 0,
sizeof(special_blobs_densities_));
584 if (boxes_.empty()) {
588 BLOBNBOX_C_IT blob_it(&boxes_);
589 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
592 special_blobs_densities_[
type]++;
595 for (
float& special_blobs_density : special_blobs_densities_) {
596 special_blobs_density /= boxes_.length();
605 partner->lower_partners_.add_sorted(SortByBoxLeft<ColPartition>,
607 upper_partners_.add_sorted(SortByBoxLeft<ColPartition>,
true, partner);
609 partner->upper_partners_.add_sorted(SortByBoxLeft<ColPartition>,
611 lower_partners_.add_sorted(SortByBoxLeft<ColPartition>,
true, partner);
619 ColPartition_C_IT it(upper ? &upper_partners_ : &lower_partners_);
620 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
621 if (it.data() == partner) {
630 ColPartition_CLIST* partners = upper ? &upper_partners_ : &lower_partners_;
631 if (!partners->singleton())
633 ColPartition_C_IT it(partners);
645 bounding_box_.
bottom()) ||
647 other->bounding_box_.
bottom())) {
654 memset(special_blobs_densities_, 0,
sizeof(special_blobs_densities_));
656 unsigned w1 = boxes_.length();
657 unsigned w2 = other->boxes_.length();
658 float new_val = special_blobs_densities_[
type] * w1 +
659 other->special_blobs_densities_[
type] * w2;
662 special_blobs_densities_[
type] = new_val / (w1 + w2);
667 BLOBNBOX_C_IT it(&boxes_);
668 BLOBNBOX_C_IT it2(&other->boxes_);
669 for (; !it2.empty(); it2.forward()) {
672 if (prev_owner != other && prev_owner !=
nullptr) {
676 ASSERT_HOST(prev_owner == other || prev_owner ==
nullptr);
677 if (prev_owner == other)
679 it.add_to_end(bbox2);
681 left_margin_ = std::min(left_margin_, other->left_margin_);
682 right_margin_ = std::max(right_margin_, other->right_margin_);
683 if (other->left_key_ < left_key_) {
684 left_key_ = other->left_key_;
685 left_key_tab_ = other->left_key_tab_;
687 if (other->right_key_ > right_key_) {
688 right_key_ = other->right_key_;
689 right_key_tab_ = other->right_key_tab_;
694 flow_ = other->flow_;
695 blob_type_ = other->blob_type_;
699 boxes_.sort(SortByBoxBottom<BLOBNBOX>);
700 last_add_was_vertical_ =
true;
702 boxes_.sort(SortByBoxLeft<BLOBNBOX>);
703 last_add_was_vertical_ =
false;
708 for (
int upper = 0; upper < 2; ++upper) {
709 ColPartition_CLIST partners;
710 ColPartition_C_IT part_it(&partners);
711 part_it.add_list_after(upper ? &other->upper_partners_
712 : &other->lower_partners_);
713 for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
738 int ok_box_overlap,
bool debug) {
742 tprintf(
"Vertical partition\n");
756 if (merged_box.
bottom() < median_top_ && merged_box.
top() > median_bottom_ &&
757 merged_box.
bottom() < bounding_box_.
top() - ok_box_overlap &&
758 merged_box.
top() > bounding_box_.
bottom() + ok_box_overlap) {
760 tprintf(
"Excessive box overlap\n");
770 if (boxes_.empty() || boxes_.singleton())
772 BLOBNBOX_C_IT it(&boxes_);
773 TBOX left_box(it.data()->bounding_box());
774 for (it.forward(); !it.at_first(); it.forward()) {
790 BLOBNBOX_C_IT it(&boxes_);
791 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
795 if (bbox == split_blob || !split_part->boxes_.empty()) {
796 split_part->
AddBox(it.extract());
808 right_key_tab_ =
false;
809 split_part->left_key_tab_ =
false;
824 if (split_x <= bounding_box_.
left() || split_x >= bounding_box_.
right())
828 BLOBNBOX_C_IT it(&boxes_);
829 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
834 if (box.
left() >= split_x) {
835 split_part->
AddBox(it.extract());
842 it.add_list_after(&split_part->boxes_);
851 right_key_tab_ =
false;
852 split_part->left_key_tab_ =
false;
853 right_margin_ = split_x;
854 split_part->left_margin_ = split_x;
862 bounding_box_ =
TBOX();
863 BLOBNBOX_C_IT it(&boxes_);
865 int non_leader_count = 0;
867 bounding_box_.
set_left(left_margin_);
872 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
884 tprintf(
"Computed left-illegal partition\n");
890 tprintf(
"Computed right-illegal partition\n");
897 median_top_ = bounding_box_.
top();
898 median_bottom_ = bounding_box_.
bottom();
899 median_height_ = bounding_box_.
height();
900 median_left_ = bounding_box_.
left();
901 median_right_ = bounding_box_.
right();
902 median_width_ = bounding_box_.
width();
905 STATS bottom_stats(bounding_box_.
bottom(), bounding_box_.
top() + 1);
907 STATS left_stats(bounding_box_.
left(), bounding_box_.
right() + 1);
908 STATS right_stats(bounding_box_.
left(), bounding_box_.
right() + 1);
909 STATS width_stats(0, bounding_box_.
width() + 1);
910 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
914 int area = box.
area();
915 top_stats.
add(box.
top(), area);
918 left_stats.
add(box.
left(), area);
923 median_top_ =
static_cast<int>(top_stats.
median() + 0.5);
924 median_bottom_ =
static_cast<int>(bottom_stats.
median() + 0.5);
925 median_height_ =
static_cast<int>(height_stats.
median() + 0.5);
926 median_left_ =
static_cast<int>(left_stats.
median() + 0.5);
927 median_right_ =
static_cast<int>(right_stats.
median() + 0.5);
928 median_width_ =
static_cast<int>(width_stats.
median() + 0.5);
932 tprintf(
"Made partition with bad right coords");
936 tprintf(
"Made partition with bad left coords");
942 for (
int upper = 0; upper < 2; ++upper) {
943 ColPartition_CLIST partners;
944 ColPartition_C_IT part_it(&partners);
945 part_it.add_list_after(upper ? &upper_partners_ : &lower_partners_);
946 for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
953 bounding_box_.
bottom())) {
954 tprintf(
"Recomputed box for partition %p\n",
this);
961 BLOBNBOX_C_IT it(&boxes_);
962 int overlap_count = 0;
963 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
968 return overlap_count;
974 int first_spanned_col = -1;
977 bounding_box_.
left(), bounding_box_.
right(),
978 std::min(bounding_box_.
height(), bounding_box_.
width()),
979 MidY(), left_margin_, right_margin_,
980 &first_column_, &last_column_,
982 column_set_ = columns;
983 if (first_column_ < last_column_ && span_type ==
CST_PULLOUT &&
987 if (first_spanned_col >= 0) {
988 first_column_ = first_spanned_col;
989 last_column_ = first_spanned_col;
991 if ((first_column_ & 1) == 0)
992 last_column_ = first_column_;
993 else if ((last_column_ & 1) == 0)
994 first_column_ = last_column_;
996 first_column_ = last_column_ = (first_column_ + last_column_) / 2;
1014 switch (blob_type_) {
1057 int* first_col,
int* last_col) {
1058 int first_spanned_col = -1;
1061 bounding_box_.
left(), bounding_box_.
right(),
1062 std::min(bounding_box_.
height(), bounding_box_.
width()),
1063 MidY(), left_margin_, right_margin_,
1064 first_col, last_col,
1065 &first_spanned_col);
1073 good_width_ = cb->
Run(width);
1074 good_column_ = blob_type_ ==
BRT_TEXT && left_key_tab_ && right_key_tab_;
1084 bool result =
false;
1086 int part_width = bounding_box_.
width();
1087 STATS gap_stats(0, part_width);
1088 STATS width_stats(0, part_width);
1089 BLOBNBOX_C_IT it(&boxes_);
1094 for (it.forward(); !it.at_first(); it.forward()) {
1099 width_stats.
add(right - left, 1);
1104 double median_gap = gap_stats.
median();
1108 double gap_iqr = gap_stats.
ile(0.75f) - gap_stats.
ile(0.25f);
1110 tprintf(
"gap iqr = %g, blob_count=%d, limits=%g,%g\n",
1120 int offset =
static_cast<int>(ceil(gap_iqr * 2));
1121 int min_step =
static_cast<int>(median_gap +
median_width + 0.5);
1122 int max_step = min_step + offset;
1125 int part_left = bounding_box_.
left() - min_step / 2;
1126 part_width += min_step;
1127 auto* projection =
new DPPoint[part_width];
1128 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1133 for (
int x = left; x < right; ++x) {
1134 projection[left - part_left].AddLocalCost(height);
1139 part_width, projection);
1140 if (best_end !=
nullptr && best_end->
total_cost() < blob_count) {
1143 bool modified_blob_list =
false;
1144 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1147 if (it.at_first()) {
1152 modified_blob_list =
true;
1158 it.data_relative(-1)->bounding_box().right();
1161 modified_blob_list =
true;
1172 if (best_end ==
nullptr) {
1179 delete [] projection;
1193 int good_blob_score_ = 0;
1194 int noisy_count = 0;
1195 int hline_count = 0;
1196 int vline_count = 0;
1197 BLOBNBOX_C_IT it(&boxes_);
1198 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1208 if (hline_count > vline_count) {
1211 }
else if (vline_count > hline_count) {
1214 }
else if (value < -1 || 1 < value) {
1218 long_side = bounding_box_.
width();
1219 short_side = bounding_box_.
height();
1222 long_side = bounding_box_.
height();
1223 short_side = bounding_box_.
width();
1239 if (flow_ ==
BTFT_CHAIN && strong_score == 3)
1247 if (noisy_count >= blob_count) {
1253 bounding_box_.
bottom())) {
1254 tprintf(
"RegionFlowTypesFromProjectionValue count=%d, noisy=%d, score=%d,",
1255 blob_count, noisy_count, good_blob_score_);
1256 tprintf(
" Projection value=%d, flow=%d, blob_type=%d\n",
1257 value, flow_, blob_type_);
1268 BLOBNBOX_C_IT it(&boxes_);
1269 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1286 int total_height = 0;
1288 int height_count = 0;
1290 BLOBNBOX_C_IT it(&boxes_);
1291 TBOX box(it.data()->bounding_box());
1300 linepoints.
Add(first_pt);
1301 for (it.forward(); !it.at_last(); it.forward()) {
1305 linepoints.
Add(box_pt);
1306 total_height += box.
width();
1307 coverage += box.
height();
1310 box = it.data()->bounding_box();
1312 linepoints.
Add(last_pt);
1313 width = last_pt.
y() - first_pt.
y();
1317 TBOX box(it.data()->bounding_box());
1321 linepoints.
Add(first_pt);
1322 for (it.forward(); !it.at_last(); it.forward()) {
1326 linepoints.
Add(box_pt);
1327 total_height += box.
height();
1328 coverage += box.
width();
1331 box = it.data()->bounding_box();
1333 linepoints.
Add(last_pt);
1334 width = last_pt.
x() - first_pt.
x();
1337 if (height_count == 0)
1341 double error = linepoints.
Fit(&start_pt, &end_pt);
1349 ColPartition_LIST* used_parts,
1350 WorkingPartSet_LIST* working_sets) {
1353 block_owned_ =
true;
1354 WorkingPartSet_IT it(working_sets);
1357 if (partner !=
nullptr && partner->working_set_ !=
nullptr) {
1358 working_set_ = partner->working_set_;
1363 tprintf(
"Partition with partner has no working set!:");
1371 for (it.mark_cycle_pt(); !it.cycled_list() &&
1372 col_index != first_column_;
1373 it.forward(), ++col_index);
1375 tprintf(
"Match is %s for:", (col_index & 1) ?
"Real" :
"Between");
1379 tprintf(
"Target column=%d, only had %d\n", first_column_, col_index);
1382 work_set = it.data();
1385 if (!it.cycled_list() && last_column_ != first_column_ && !
IsPulloutType()) {
1387 BLOCK_LIST completed_blocks;
1388 TO_BLOCK_LIST to_blocks;
1389 for (; !it.cycled_list() && col_index <= last_column_;
1390 it.forward(), ++col_index) {
1393 &completed_blocks, &to_blocks);
1397 working_set_ = work_set;
1409 ColPartition_LIST* block_parts,
1410 ColPartition_LIST* used_parts,
1411 BLOCK_LIST* completed_blocks,
1412 TO_BLOCK_LIST* to_blocks) {
1413 int page_height = tright.
y() - bleft.
y();
1415 ColPartition_IT it(block_parts);
1417 int max_line_height = 0;
1423 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1429 BLOBNBOX_C_IT blob_it(part->
boxes());
1430 int prev_bottom = blob_it.data()->bounding_box().bottom();
1431 for (blob_it.forward(); !blob_it.at_first(); blob_it.forward()) {
1434 int step = bottom - prev_bottom;
1437 side_steps.
add(step, 1);
1438 prev_bottom = bottom;
1441 if (!it.at_last()) {
1452 tprintf(
"side step = %.2f, top spacing = %d, bottom spacing=%d\n",
1457 if (part_count == 0)
1460 SmoothSpacings(resolution, page_height, block_parts);
1463 BLOCK_IT block_it(completed_blocks);
1464 TO_BLOCK_IT to_block_it(to_blocks);
1465 ColPartition_LIST spacing_parts;
1466 ColPartition_IT sp_block_it(&spacing_parts);
1468 for (it.mark_cycle_pt(); !it.empty();) {
1470 sp_block_it.add_to_end(part);
1472 if (it.empty() || part->
bottom_spacing() > same_block_threshold ||
1473 !part->SpacingsEqual(*it.data(), resolution)) {
1476 if (!it.empty() && part->
bottom_spacing() <= same_block_threshold) {
1480 ColPartition* third_part = it.at_last() ? nullptr : it.data_relative(1);
1482 tprintf(
"Spacings unequal: upper:%d/%d, lower:%d/%d,"
1483 " sizes %d %d %d\n",
1491 if (part->SizesSimilar(*next_part) &&
1498 if (third_part ==
nullptr ||
1499 !next_part->SizesSimilar(*third_part) ||
1506 sp_block_it.add_to_end(it.extract());
1509 tprintf(
"Added line to current block.\n");
1515 if (to_block !=
nullptr) {
1516 to_block_it.add_to_end(to_block);
1517 block_it.add_to_end(to_block->
block);
1519 sp_block_it.set_to_list(&spacing_parts);
1523 tprintf(
"Spacings equal: upper:%d/%d, lower:%d/%d, median:%d/%d\n",
1534 if (pos->
x() < bleft.
x())
1536 if (pos->
x() > tright.
x())
1538 if (pos->
y() < bleft.
y())
1540 if (pos->
y() > tright.
y())
1548 static TO_BLOCK* MoveBlobsToBlock(
bool vertical_text,
int line_spacing,
1550 ColPartition_LIST* block_parts,
1551 ColPartition_LIST* used_parts) {
1557 STATS sizes(0, std::max(block_box.width(), block_box.height()));
1559 ColPartition_IT it(block_parts);
1560 auto* to_block =
new TO_BLOCK(block);
1561 BLOBNBOX_IT blob_it(&to_block->blobs);
1562 ColPartition_IT used_it(used_parts);
1563 for (it.move_to_first(); !it.empty(); it.forward()) {
1564 ColPartition* part = it.extract();
1568 for (BLOBNBOX_C_IT bb_it(part->boxes()); !bb_it.empty();
1571 if (bblob->
owner() != part) {
1572 tprintf(
"Ownership incorrect for blob:");
1576 if (bblob->
owner() ==
nullptr) {
1589 C_OUTLINE_IT ol_it(outlines);
1590 ASSERT_HOST(!text_type || ol_it.data()->pathlength() > 0);
1595 blob_it.add_after_then_move(bblob);
1597 used_it.add_to_end(part);
1599 if (text_type && blob_it.empty()) {
1604 to_block->line_size = sizes.median();
1605 if (vertical_text) {
1607 if (block_width < line_spacing)
1608 line_spacing = block_width;
1609 to_block->line_spacing =
static_cast<float>(line_spacing);
1610 to_block->max_blob_size =
static_cast<float>(block_width + 1);
1613 if (block_height < line_spacing)
1614 line_spacing = block_height;
1615 to_block->line_spacing =
static_cast<float>(line_spacing);
1616 to_block->max_blob_size =
static_cast<float>(block_height + 1);
1624 ColPartition_LIST* block_parts,
1625 ColPartition_LIST* used_parts) {
1626 if (block_parts->empty())
1632 ColPartition_IT it(block_parts);
1643 ICOORDELT_LIST vertices;
1644 ICOORDELT_IT vert_it(&vertices);
1646 int min_x = INT32_MAX;
1647 int max_x = -INT32_MAX;
1648 int min_y = INT32_MAX;
1649 int max_y = -INT32_MAX;
1653 ColPartition::LeftEdgeRun(&it, &start, &end);
1655 ColPartition::RightEdgeRun(&it, &start, &end);
1656 ClipCoord(bleft, tright, &start);
1657 ClipCoord(bleft, tright, &end);
1658 vert_it.add_after_then_move(
new ICOORDELT(start));
1659 vert_it.add_after_then_move(
new ICOORDELT(end));
1664 if ((iteration == 0 && it.at_first()) ||
1665 (iteration == 1 && it.at_last())) {
1669 }
while (iteration < 2);
1671 tprintf(
"Making block at (%d,%d)->(%d,%d)\n",
1672 min_x, min_y, max_x, max_y);
1673 auto* block =
new BLOCK(
"",
true, 0, 0, min_x, min_y, max_x, max_y);
1675 return MoveBlobsToBlock(
false, line_spacing, block, block_parts, used_parts);
1682 ColPartition_LIST* block_parts,
1683 ColPartition_LIST* used_parts) {
1684 if (block_parts->empty())
1686 ColPartition_IT it(block_parts);
1689 int line_spacing = block_box.
width();
1691 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1692 block_box += it.data()->bounding_box();
1698 auto* block =
new BLOCK(
"",
true, 0, 0, block_box.
left(), block_box.
bottom(),
1699 block_box.
right(), block_box.
top());
1701 return MoveBlobsToBlock(
true, line_spacing, block, block_parts, used_parts);
1707 BLOBNBOX_C_IT blob_it(&boxes_);
1709 int line_size =
IsVerticalType() ? median_width_ : median_height_;
1711 for (; !blob_it.empty(); blob_it.forward()) {
1712 BLOBNBOX* blob = blob_it.extract();
1716 if (row ==
nullptr) {
1717 row =
new TO_ROW(blob,
static_cast<float>(top),
1718 static_cast<float>(bottom),
1719 static_cast<float>(line_size));
1721 row->
add_blob(blob,
static_cast<float>(top),
1722 static_cast<float>(bottom),
1723 static_cast<float>(line_size));
1733 part->left_margin_ = left_margin_;
1734 part->right_margin_ = right_margin_;
1735 part->bounding_box_ = bounding_box_;
1736 memcpy(part->special_blobs_densities_, special_blobs_densities_,
1737 sizeof(special_blobs_densities_));
1738 part->median_bottom_ = median_bottom_;
1739 part->median_top_ = median_top_;
1740 part->median_height_ = median_height_;
1741 part->median_left_ = median_left_;
1742 part->median_right_ = median_right_;
1743 part->median_width_ = median_width_;
1744 part->good_width_ = good_width_;
1745 part->good_column_ = good_column_;
1746 part->left_key_tab_ = left_key_tab_;
1747 part->right_key_tab_ = right_key_tab_;
1748 part->type_ = type_;
1749 part->flow_ = flow_;
1750 part->left_key_ = left_key_;
1751 part->right_key_ = right_key_;
1752 part->first_column_ = first_column_;
1753 part->last_column_ = last_column_;
1754 part->owns_blobs_ =
false;
1761 BLOBNBOX_C_IT inserter(copy->
boxes());
1762 BLOBNBOX_C_IT traverser(
boxes());
1763 for (traverser.mark_cycle_pt(); !traverser.cycled_list(); traverser.forward())
1764 inserter.add_after_then_move(traverser.data());
1768 #ifndef GRAPHICS_DISABLED
1779 static char kBlobTypes[
BRT_COUNT + 1] =
"NHSRIUVT";
1784 tprintf(
"ColPart:%c(M%d-%c%d-B%d/%d,%d/%d)->(%dB-%d%c-%dM/%d,%d/%d)"
1785 " w-ok=%d, v-ok=%d, type=%d%c%d, fc=%d, lc=%d, boxes=%d"
1786 " ts=%d bs=%d ls=%d rs=%d\n",
1787 boxes_.empty() ?
'E' :
' ',
1788 left_margin_, left_key_tab_ ?
'T' :
'B',
LeftAtY(y),
1789 bounding_box_.
left(), median_left_,
1790 bounding_box_.
bottom(), median_bottom_,
1791 bounding_box_.
right(),
RightAtY(y), right_key_tab_ ?
'T' :
'B',
1792 right_margin_, median_right_, bounding_box_.
top(), median_top_,
1793 good_width_, good_column_, type_,
1794 kBlobTypes[blob_type_], flow_,
1795 first_column_, last_column_, boxes_.length(),
1796 space_above_, space_below_, space_to_left_, space_to_right_);
1801 tprintf(
"Colors:(%d, %d, %d)%d -> (%d, %d, %d)\n",
1802 color1_[COLOR_RED], color1_[COLOR_GREEN], color1_[COLOR_BLUE],
1803 color1_[L_ALPHA_CHANNEL],
1804 color2_[COLOR_RED], color2_[COLOR_GREEN], color2_[COLOR_BLUE]);
1809 STATS left_stats(0, working_set_count);
1810 STATS right_stats(0, working_set_count);
1815 if (partner->type_ > max_type)
1816 max_type = partner->type_;
1817 if (column_set_ == partner->column_set_) {
1818 left_stats.
add(partner->first_column_, 1);
1819 right_stats.
add(partner->last_column_, 1);
1827 first_column_ = left_stats.
mode();
1828 last_column_ = right_stats.
mode();
1829 if (last_column_ < first_column_)
1830 last_column_ = first_column_;
1835 partner->type_ = max_type;
1837 if (column_set_ == partner->column_set_) {
1838 partner->first_column_ = first_column_;
1839 partner->last_column_ = last_column_;
1880 RefinePartnersInternal(
true, get_desperate, grid);
1881 RefinePartnersInternal(
false, get_desperate, grid);
1885 RefinePartnersByType(
true, &upper_partners_);
1886 RefinePartnersByType(
false, &lower_partners_);
1890 if (!upper_partners_.empty() && !upper_partners_.singleton())
1891 RefinePartnersByOverlap(
true, &upper_partners_);
1892 if (!lower_partners_.empty() && !lower_partners_.singleton())
1893 RefinePartnersByOverlap(
false, &lower_partners_);
1902 void ColPartition::RefinePartnersInternal(
bool upper,
bool get_desperate,
1904 ColPartition_CLIST* partners = upper ? &upper_partners_ : &lower_partners_;
1905 if (!partners->empty() && !partners->singleton()) {
1906 RefinePartnersByType(upper, partners);
1907 if (!partners->empty() && !partners->singleton()) {
1909 RefinePartnerShortcuts(upper, partners);
1910 if (!partners->empty() && !partners->singleton()) {
1914 RefineTextPartnersByMerge(upper,
false, partners, grid);
1915 if (!partners->empty() && !partners->singleton())
1916 RefineTextPartnersByMerge(upper,
true, partners, grid);
1919 if (!partners->empty() && !partners->singleton())
1920 RefinePartnersByOverlap(upper, partners);
1929 void ColPartition::RefinePartnersByType(
bool upper,
1930 ColPartition_CLIST* partners) {
1934 tprintf(
"Refining %d %s partners by type for:\n",
1935 partners->length(), upper ?
"Upper" :
"Lower");
1938 ColPartition_C_IT it(partners);
1944 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1951 partner->RemovePartner(!upper,
this);
1960 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1968 partner->RemovePartner(!upper,
this);
1983 void ColPartition::RefinePartnerShortcuts(
bool upper,
1984 ColPartition_CLIST* partners) {
1985 bool done_any =
false;
1988 ColPartition_C_IT it(partners);
1989 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1993 ColPartition_C_IT it1(upper ? &a->upper_partners_ : &a->lower_partners_);
1994 for (it1.mark_cycle_pt(); !it1.cycled_list(); it1.forward()) {
1999 a->RemovePartner(!upper,
this);
2002 ColPartition_C_IT it2(partners);
2003 for (it2.mark_cycle_pt(); !it2.cycled_list(); it2.forward()) {
2008 b2->RemovePartner(!upper,
this);
2021 }
while (done_any && !partners->empty() && !partners->singleton());
2032 void ColPartition::RefineTextPartnersByMerge(
bool upper,
bool desperate,
2033 ColPartition_CLIST* partners,
2034 ColPartitionGrid* grid) {
2038 tprintf(
"Refining %d %s partners by merge for:\n",
2039 partners->length(), upper ?
"Upper" :
"Lower");
2042 while (!partners->empty() && !partners->singleton()) {
2045 ColPartition_C_IT it(partners);
2049 ColPartition_CLIST candidates;
2050 ColPartition_C_IT cand_it(&candidates);
2051 for (it.forward(); !it.at_first(); it.forward()) {
2053 if (part->first_column_ == candidate->last_column_ &&
2054 part->last_column_ == candidate->first_column_)
2055 cand_it.add_after_then_move(it.data());
2057 int overlap_increase;
2058 ColPartition* candidate = grid->BestMergeCandidate(part, &candidates, debug,
2059 nullptr, &overlap_increase);
2060 if (candidate !=
nullptr && (overlap_increase <= 0 || desperate)) {
2062 tprintf(
"Merging:hoverlap=%d, voverlap=%d, OLI=%d\n",
2063 part->HCoreOverlap(*candidate), part->VCoreOverlap(*candidate),
2067 grid->RemoveBBox(candidate);
2068 grid->RemoveBBox(part);
2069 part->Absorb(candidate,
nullptr);
2071 grid->InsertBBox(
true,
true, part);
2072 if (overlap_increase > 0)
2073 part->desperately_merged_ =
true;
2082 void ColPartition::RefinePartnersByOverlap(
bool upper,
2083 ColPartition_CLIST* partners) {
2087 tprintf(
"Refining %d %s partners by overlap for:\n",
2088 partners->length(), upper ?
"Upper" :
"Lower");
2091 ColPartition_C_IT it(partners);
2094 int best_overlap = 0;
2095 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
2097 int overlap = std::min(bounding_box_.
right(), partner->bounding_box_.right())
2098 - std::max(bounding_box_.
left(), partner->bounding_box_.left());
2099 if (overlap > best_overlap) {
2100 best_overlap = overlap;
2101 best_partner = partner;
2105 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
2107 if (partner != best_partner) {
2112 partner->RemovePartner(!upper,
this);
2119 bool ColPartition::ThisPartitionBetter(
BLOBNBOX* bbox,
2120 const ColPartition& other) {
2123 int left = box.
left();
2124 int right = box.
right();
2125 if (left < left_margin_ || right > right_margin_)
2127 if (left < other.left_margin_ || right > other.right_margin_)
2129 int top = box.
top();
2130 int bottom = box.
bottom();
2131 int this_overlap = std::min(top, median_top_) - std::max(bottom, median_bottom_);
2132 int other_overlap = std::min(top, other.median_top_) -
2133 std::max(bottom, other.median_bottom_);
2134 int this_miss = median_top_ - median_bottom_ - this_overlap;
2135 int other_miss = other.median_top_ - other.median_bottom_ - other_overlap;
2137 tprintf(
"Unique on (%d,%d)->(%d,%d) overlap %d/%d, miss %d/%d, mt=%d/%d\n",
2139 this_overlap, other_overlap, this_miss, other_miss,
2140 median_top_, other.median_top_);
2142 if (this_miss < other_miss)
2144 if (this_miss > other_miss)
2146 if (this_overlap > other_overlap)
2148 if (this_overlap < other_overlap)
2150 return median_top_ >= other.median_top_;
2157 static int MedianSpacing(
int page_height, ColPartition_IT it) {
2158 STATS stats(0, page_height);
2159 while (!it.cycled_list()) {
2160 ColPartition* part = it.data();
2162 stats.add(part->bottom_spacing(), 1);
2163 stats.add(part->top_spacing(), 1);
2165 return static_cast<int>(stats.median() + 0.5);
2179 return (last_column_ >= part.first_column_) &&
2180 (first_column_ <= part.last_column_);
2186 void ColPartition::SmoothSpacings(
int resolution,
int page_height,
2187 ColPartition_LIST* parts) {
2195 ColPartition_IT it(parts);
2202 int median_space = MedianSpacing(page_height, it);
2203 ColPartition_IT start_it(it);
2204 ColPartition_IT end_it(it);
2205 for (
int i = 0; i < PN_COUNT; ++i) {
2206 if (i < PN_UPPER || it.cycled_list()) {
2207 neighbourhood[i] =
nullptr;
2211 neighbourhood[i] = it.data();
2215 while (neighbourhood[PN_UPPER] !=
nullptr) {
2237 if (neighbourhood[PN_LOWER] ==
nullptr ||
2238 (!neighbourhood[PN_UPPER]->SpacingsEqual(*neighbourhood[PN_LOWER],
2240 !OKSpacingBlip(resolution, median_space, neighbourhood) &&
2241 (!OKSpacingBlip(resolution, median_space, neighbourhood - 1) ||
2242 !neighbourhood[PN_LOWER]->SpacingEqual(median_space, resolution)) &&
2243 (!OKSpacingBlip(resolution, median_space, neighbourhood + 1) ||
2244 !neighbourhood[PN_UPPER]->SpacingEqual(median_space, resolution)))) {
2247 ColPartition_IT sum_it(start_it);
2249 double total_bottom = 0.0;
2250 double total_top = 0.0;
2251 int total_count = 0;
2254 while (upper != last_part) {
2255 total_bottom += upper->bottom_spacing();
2256 total_top += upper->top_spacing();
2259 upper = sum_it.data();
2261 if (total_count > 0) {
2263 int top_spacing =
static_cast<int>(total_top / total_count + 0.5);
2264 int bottom_spacing =
static_cast<int>(total_bottom / total_count + 0.5);
2266 tprintf(
"Spacing run ended. Cause:");
2267 if (neighbourhood[PN_LOWER] ==
nullptr) {
2270 tprintf(
"Spacing change. Spacings:\n");
2271 for (
int i = 0; i < PN_COUNT; ++i) {
2272 if (neighbourhood[i] ==
nullptr) {
2274 if (i > 0 && neighbourhood[i - 1] !=
nullptr) {
2279 tprintf(
" nullptr lower partner:\n");
2285 tprintf(
"Top = %d, bottom = %d\n",
2294 upper = sum_it.data();
2295 while (upper != last_part) {
2303 upper = sum_it.data();
2310 median_space = MedianSpacing(page_height, end_it);
2313 for (
int j = 1; j < PN_COUNT; ++j) {
2314 neighbourhood[j - 1] = neighbourhood[j];
2316 if (it.cycled_list()) {
2317 neighbourhood[PN_COUNT - 1] =
nullptr;
2319 neighbourhood[PN_COUNT - 1] = it.data();
2329 bool ColPartition::OKSpacingBlip(
int resolution,
int median_spacing,
2330 ColPartition** parts) {
2331 if (parts[PN_UPPER] ==
nullptr || parts[PN_LOWER] ==
nullptr)
2335 return parts[PN_UPPER]->SummedSpacingOK(*parts[PN_LOWER],
2336 median_spacing, resolution) &&
2337 ((parts[PN_ABOVE1] !=
nullptr &&
2338 parts[PN_ABOVE1]->SpacingEqual(median_spacing, resolution)) ||
2339 (parts[PN_BELOW1] !=
nullptr &&
2340 parts[PN_BELOW1]->SpacingEqual(median_spacing, resolution)));
2345 bool ColPartition::SpacingEqual(
int spacing,
int resolution)
const {
2346 int bottom_error = BottomSpacingMargin(resolution);
2347 int top_error = TopSpacingMargin(resolution);
2348 return NearlyEqual(bottom_spacing_, spacing, bottom_error) &&
2354 bool ColPartition::SpacingsEqual(
const ColPartition& other,
2355 int resolution)
const {
2356 int bottom_error = std::max(BottomSpacingMargin(resolution),
2357 other.BottomSpacingMargin(resolution));
2358 int top_error = std::max(TopSpacingMargin(resolution),
2359 other.TopSpacingMargin(resolution));
2360 return NearlyEqual(bottom_spacing_, other.bottom_spacing_, bottom_error) &&
2361 (
NearlyEqual(top_spacing_, other.top_spacing_, top_error) ||
2362 NearlyEqual(top_spacing_ + other.top_spacing_, bottom_spacing_ * 2,
2369 bool ColPartition::SummedSpacingOK(
const ColPartition& other,
2370 int spacing,
int resolution)
const {
2371 int bottom_error = std::max(BottomSpacingMargin(resolution),
2372 other.BottomSpacingMargin(resolution));
2373 int top_error = std::max(TopSpacingMargin(resolution),
2374 other.TopSpacingMargin(resolution));
2375 int bottom_total = bottom_spacing_ + other.bottom_spacing_;
2376 int top_total = top_spacing_ + other.top_spacing_;
2377 return (
NearlyEqual(spacing, bottom_total, bottom_error) &&
2379 (
NearlyEqual(spacing * 2, bottom_total, bottom_error) &&
2385 int ColPartition::BottomSpacingMargin(
int resolution)
const {
2391 int ColPartition::TopSpacingMargin(
int resolution)
const {
2393 BottomSpacingMargin(resolution);
2398 bool ColPartition::SizesSimilar(
const ColPartition& other)
const {
2399 return median_height_ <= other.median_height_ *
kMaxSizeRatio &&
2406 static bool UpdateLeftMargin(
const ColPartition& part,
2407 int* margin_left,
int* margin_right) {
2408 const TBOX& part_box = part.bounding_box();
2409 int top = part_box.
top();
2410 int bottom = part_box.
bottom();
2411 int tl_key = part.SortKey(part.left_margin(), top);
2412 int tr_key = part.SortKey(part_box.
left(), top);
2413 int bl_key = part.SortKey(part.left_margin(), bottom);
2414 int br_key = part.SortKey(part_box.
left(), bottom);
2415 int left_key = std::max(tl_key, bl_key);
2416 int right_key = std::min(tr_key, br_key);
2417 if (left_key <= *margin_right && right_key >= *margin_left) {
2419 *margin_right = std::min(*margin_right, right_key);
2420 *margin_left = std::max(*margin_left, left_key);
2431 void ColPartition::LeftEdgeRun(ColPartition_IT* part_it,
2435 int start_y = part->bounding_box_.top();
2436 if (!part_it->at_first()) {
2437 int prev_bottom = part_it->data_relative(-1)->bounding_box_.bottom();
2438 if (prev_bottom < start_y)
2439 start_y = prev_bottom;
2440 else if (prev_bottom > start_y)
2441 start_y = (start_y + prev_bottom) / 2;
2443 int end_y = part->bounding_box_.bottom();
2444 int margin_right = INT32_MAX;
2445 int margin_left = -INT32_MAX;
2446 UpdateLeftMargin(*part, &margin_left, &margin_right);
2449 part = part_it->data();
2450 }
while (!part_it->at_first() &&
2451 UpdateLeftMargin(*part, &margin_left, &margin_right));
2455 int next_margin_right = INT32_MAX;
2456 int next_margin_left = -INT32_MAX;
2457 UpdateLeftMargin(*part, &next_margin_left, &next_margin_right);
2458 if (next_margin_left > margin_right) {
2459 ColPartition_IT next_it(*part_it);
2462 part = next_it.data();
2463 }
while (!next_it.at_first() &&
2464 UpdateLeftMargin(*part, &next_margin_left, &next_margin_right));
2468 part_it->backward();
2469 part = part_it->data();
2470 }
while (part != start_part &&
2471 UpdateLeftMargin(*part, &next_margin_left, &next_margin_right));
2475 part = part_it->data_relative(-1);
2476 end_y = part->bounding_box_.bottom();
2477 if (!part_it->at_first() && part_it->data()->bounding_box_.top() < end_y)
2478 end_y = (end_y + part_it->data()->bounding_box_.top()) / 2;
2479 start->
set_y(start_y);
2480 start->
set_x(part->XAtY(margin_right, start_y));
2482 end->
set_x(part->XAtY(margin_right, end_y));
2484 tprintf(
"Left run from y=%d to %d terminated with sum %d-%d, new %d-%d\n",
2485 start_y, end_y, part->XAtY(margin_left, end_y),
2486 end->
x(), part->left_margin_, part->bounding_box_.left());
2492 static bool UpdateRightMargin(
const ColPartition& part,
2493 int* margin_left,
int* margin_right) {
2494 const TBOX& part_box = part.bounding_box();
2495 int top = part_box.
top();
2496 int bottom = part_box.
bottom();
2497 int tl_key = part.SortKey(part_box.
right(), top);
2498 int tr_key = part.SortKey(part.right_margin(), top);
2499 int bl_key = part.SortKey(part_box.
right(), bottom);
2500 int br_key = part.SortKey(part.right_margin(), bottom);
2501 int left_key = std::max(tl_key, bl_key);
2502 int right_key = std::min(tr_key, br_key);
2503 if (left_key <= *margin_right && right_key >= *margin_left) {
2505 *margin_right = std::min(*margin_right, right_key);
2506 *margin_left = std::max(*margin_left, left_key);
2518 void ColPartition::RightEdgeRun(ColPartition_IT* part_it,
2522 int start_y = part->bounding_box_.bottom();
2523 if (!part_it->at_last()) {
2524 int next_y = part_it->data_relative(1)->bounding_box_.top();
2525 if (next_y > start_y)
2527 else if (next_y < start_y)
2528 start_y = (start_y + next_y) / 2;
2530 int end_y = part->bounding_box_.top();
2531 int margin_right = INT32_MAX;
2532 int margin_left = -INT32_MAX;
2533 UpdateRightMargin(*part, &margin_left, &margin_right);
2535 part_it->backward();
2536 part = part_it->data();
2537 }
while (!part_it->at_last() &&
2538 UpdateRightMargin(*part, &margin_left, &margin_right));
2541 int next_margin_right = INT32_MAX;
2542 int next_margin_left = -INT32_MAX;
2543 UpdateRightMargin(*part, &next_margin_left, &next_margin_right);
2544 if (next_margin_right < margin_left) {
2545 ColPartition_IT next_it(*part_it);
2548 part = next_it.data();
2549 }
while (!next_it.at_last() &&
2550 UpdateRightMargin(*part, &next_margin_left,
2551 &next_margin_right));
2556 part = part_it->data();
2557 }
while (part != start_part &&
2558 UpdateRightMargin(*part, &next_margin_left,
2559 &next_margin_right));
2560 part_it->backward();
2563 part = part_it->data_relative(1);
2564 end_y = part->bounding_box().top();
2565 if (!part_it->at_last() &&
2566 part_it->data()->bounding_box_.bottom() > end_y)
2567 end_y = (end_y + part_it->data()->bounding_box_.bottom()) / 2;
2568 start->
set_y(start_y);
2569 start->
set_x(part->XAtY(margin_left, start_y));
2571 end->
set_x(part->XAtY(margin_left, end_y));
2573 tprintf(
"Right run from y=%d to %d terminated with sum %d-%d, new %d-%d\n",
2574 start_y, end_y, end->
x(), part->XAtY(margin_right, end_y),
2575 part->bounding_box_.right(), part->right_margin_);
bool DominatesInMerge(BlobTextFlowType type1, BlobTextFlowType type2)
CLISTIZE(BLOCK_RES) ELISTIZE(ROW_RES) ELISTIZE(WERD_RES) static const double kStopperAmbiguityThresholdGain
#define ELIST2IZE(CLASSNAME)
void UpdateRange(const T1 &x, T2 *lower_bound, T2 *upper_bound)
bool NearlyEqual(T x, T y, T tolerance)
DLLSYM void tprintf(const char *format,...)
int textord_debug_tabfind
const double kMaxLeaderGapFractionOfMin
const int kColumnWidthFactor
const int kMinChainTextValue
const double kMaxSizeRatio
const int kHorzStrongTextlineCount
const int kMaxColorDistance
const int kHorzStrongTextlineHeight
const double kMinBaselineCoverage
const double kMaxBaselineError
const int kHorzStrongTextlineAspect
const int kMinLeaderCount
const double kMaxSameBlockLineSpacing
const double kMaxTopSpacingFraction
const int kMaxRMSColorNoise
const int kMinStrongTextValue
const double kMaxSpacingDrift
const double kMaxLeaderGapFractionOfMax
int NoisyNeighbours() const
BlobRegionType region_type() const
static ScrollView::Color TextlineColor(BlobRegionType region_type, BlobTextFlowType flow_type)
void set_flow(BlobTextFlowType value)
BlobSpecialTextType special_text_type() const
void set_region_type(BlobRegionType new_type)
tesseract::ColPartition * owner() const
int base_char_bottom() const
int base_char_top() const
BlobTextFlowType flow() const
const TBOX & bounding_box() const
void set_owner(tesseract::ColPartition *new_owner)
void add_blob(BLOBNBOX *blob, float top, float bottom, float row_size)
void Add(const ICOORD &pt)
double Fit(ICOORD *pt1, ICOORD *pt2)
static DPPoint * Solve(int min_step, int max_step, bool debug, CostFunc cost_func, int size, DPPoint *points)
int64_t CostWithVariance(const DPPoint *prev)
PDBLK pdblk
Page Description Block.
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
void set_poly_block(POLY_BLOCK *blk)
set the poly block
POLY_BLOCK * poly_block() const
void set_x(int16_t xin)
rewrite function
int16_t y() const
access_function
void set_y(int16_t yin)
rewrite function
int16_t x() const
access function
static ScrollView::Color ColorForPolyBlockType(PolyBlockType type)
Returns a color to draw the given type.
bool overlap(const TBOX &box) const
void add(int32_t value, int32_t count)
double ile(double frac) const
static C_BLOB * FakeBlob(const TBOX &box)
C_OUTLINE_LIST * out_list()
static bool WithinTestRegion(int detail_level, int x, int y)
bool ReleaseNonLeaderBoxes()
void SetSpecialBlobsDensity(const BlobSpecialTextType type, const float density)
BlobTextFlowType flow() const
static void LineSpacingBlocks(const ICOORD &bleft, const ICOORD &tright, int resolution, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts, BLOCK_LIST *completed_blocks, TO_BLOCK_LIST *to_blocks)
bool MatchingStrokeWidth(const ColPartition &other, double fractional_tolerance, double constant_tolerance) const
float SpecialBlobsDensity(const BlobSpecialTextType type) const
static ColPartition * FakePartition(const TBOX &box, PolyBlockType block_type, BlobRegionType blob_type, BlobTextFlowType flow)
static TO_BLOCK * MakeBlock(const ICOORD &bleft, const ICOORD &tright, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts)
TBOX BoundsWithoutBox(BLOBNBOX *box)
int SpecialBlobsCount(const BlobSpecialTextType type)
PolyBlockType type() const
static ColPartition * MakeBigPartition(BLOBNBOX *box, ColPartition_LIST *big_part_list)
void set_side_step(int step)
ColPartition * CopyButDontOwnBlobs()
ColPartition * SplitAt(int split_x)
void set_right_margin(int margin)
void AddToWorkingSet(const ICOORD &bleft, const ICOORD &tright, int resolution, ColPartition_LIST *used_parts, WorkingPartSet_LIST *working_set)
void RefinePartners(PolyBlockType type, bool get_desperate, ColPartitionGrid *grid)
int median_bottom() const
void SetRightTab(const TabVector *tab_vector)
static ColPartition * MakeLinePartition(BlobRegionType blob_type, const ICOORD &vertical, int left, int bottom, int right, int top)
int bottom_spacing() const
void SetRegionAndFlowTypesFromProjectionValue(int value)
void set_left_margin(int margin)
bool IsPulloutType() const
bool VSignificantCoreOverlap(const ColPartition &other) const
BlobRegionType blob_type() const
void ColumnRange(int resolution, ColPartitionSet *columns, int *first_col, int *last_col)
void AddBox(BLOBNBOX *box)
void SmoothPartnerRun(int working_set_count)
BLOBNBOX * OverlapSplitBlob(const TBOX &box)
int VCoreOverlap(const ColPartition &other) const
int XAtY(int sort_key, int y) const
static TO_BLOCK * MakeVerticalTextBlock(const ICOORD &bleft, const ICOORD &tright, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts)
bool IsVerticalType() const
void DisownBoxesNoAssert()
void SetLeftTab(const TabVector *tab_vector)
ScrollView::Color BoxColor() const
int CountOverlappingBoxes(const TBOX &box)
bool MatchingTextColor(const ColPartition &other) const
bool OKDiacriticMerge(const ColPartition &candidate, bool debug) const
bool OKMergeOverlap(const ColPartition &merge1, const ColPartition &merge2, int ok_box_overlap, bool debug)
void SetColumnGoodness(WidthCallback *cb)
void set_owns_blobs(bool owns_blobs)
bool MatchingSizes(const ColPartition &other) const
void set_type(PolyBlockType t)
void ComputeSpecialBlobsDensity()
void RemovePartner(bool upper, ColPartition *partner)
static bool TypesSimilar(PolyBlockType type1, PolyBlockType type2)
ColPartition * ShallowCopy() const
PolyBlockType PartitionType(ColumnSpanningType flow) const
bool IsInSameColumnAs(const ColPartition &part) const
void CopyRightTab(const ColPartition &src, bool take_box)
ColPartition * SplitAtBlob(BLOBNBOX *split_blob)
ColPartition * SingletonPartner(bool upper)
bool MarkAsLeaderIfMonospaced()
bool ConfirmNoTabViolation(const ColPartition &other) const
void set_bottom_spacing(int spacing)
const TBOX & bounding_box() const
void SetPartitionType(int resolution, ColPartitionSet *columns)
int RightBlobRule() const
void set_top_spacing(int spacing)
int RightAtY(int y) const
void set_block_owned(bool owned)
static int SortByBBox(const void *p1, const void *p2)
bool MatchingColumns(const ColPartition &other) const
void Absorb(ColPartition *other, WidthCallback *cb)
void RemoveBox(BLOBNBOX *box)
void AddPartner(bool upper, ColPartition *partner)
int median_height() const
void set_flow(BlobTextFlowType f)
void CopyLeftTab(const ColPartition &src, bool take_box)
ColumnSpanningType SpanningType(int resolution, int left, int right, int height, int y, int left_margin, int right_margin, int *first_col, int *last_col, int *first_spanned_col)
static double ColorDistanceFromLine(const uint8_t *line1, const uint8_t *line2, const uint8_t *point)
static bool DifferentSizes(int size1, int size2)
void AddPartition(ColPartition *part)
void ExtractCompletedBlocks(const ICOORD &bleft, const ICOORD &tright, int resolution, ColPartition_LIST *used_parts, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
void InsertCompletedBlocks(BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)