tesseract  4.1.1
colpartition.cpp
Go to the documentation of this file.
1 // File: colpartition.cpp
3 // Description: Class to hold partitions of the page that correspond
4 // roughly to text lines.
5 // Author: Ray Smith
6 //
7 // (C) Copyright 2008, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
19 
20 #ifdef HAVE_CONFIG_H
21 #include "config_auto.h"
22 #endif
23 
24 #include "colpartition.h"
25 #include "colpartitiongrid.h"
26 #include "colpartitionset.h"
27 #include "detlinefit.h"
28 #include "dppoint.h"
29 #include "imagefind.h"
30 #include "workingpartset.h"
31 #include "host.h" // for NearlyEqual
32 
33 #include <algorithm>
34 
35 namespace tesseract {
36 
37 ELIST2IZE(ColPartition)
38 CLISTIZE(ColPartition)
39 
40 
42 // Maximum change in spacing (in inches) to ignore.
43 const double kMaxSpacingDrift = 1.0 / 72; // 1/72 is one point.
44 // Maximum fraction of line height used as an additional allowance
45 // for top spacing.
46 const double kMaxTopSpacingFraction = 0.25;
47 // What multiple of the largest line height should be used as an upper bound
48 // for whether lines are in the same text block?
49 const double kMaxSameBlockLineSpacing = 3;
50 // Maximum ratio of sizes for lines to be considered the same size.
51 const double kMaxSizeRatio = 1.5;
52 // Fraction of max of leader width and gap for max IQR of gaps.
53 const double kMaxLeaderGapFractionOfMax = 0.25;
54 // Fraction of min of leader width and gap for max IQR of gaps.
55 const double kMaxLeaderGapFractionOfMin = 0.5;
56 // Minimum number of blobs to be considered a leader.
57 const int kMinLeaderCount = 5;
58 // Minimum score for a STRONG_CHAIN textline.
59 const int kMinStrongTextValue = 6;
60 // Minimum score for a CHAIN textline.
61 const int kMinChainTextValue = 3;
62 // Minimum number of blobs for strong horizontal text lines.
64 // Minimum height (in image pixels) for strong horizontal text lines.
66 // Minimum aspect ratio for strong horizontal text lines.
68 // Maximum upper quartile error allowed on a baseline fit as a fraction
69 // of height.
70 const double kMaxBaselineError = 0.4375;
71 // Min coverage for a good baseline between vectors
72 const double kMinBaselineCoverage = 0.5;
73 // Max RMS color noise to compare colors.
74 const int kMaxRMSColorNoise = 128;
75 // Maximum distance to allow a partition color to be to use that partition
76 // in smoothing neighbouring types. This is a squared distance.
77 const int kMaxColorDistance = 900;
78 
79 // blob_type is the blob_region_type_ of the blobs in this partition.
80 // Vertical is the direction of logical vertical on the possibly skewed image.
81 ColPartition::ColPartition(BlobRegionType blob_type, const ICOORD& vertical)
82  : left_margin_(-INT32_MAX), right_margin_(INT32_MAX),
83  median_bottom_(INT32_MAX), median_top_(-INT32_MAX),
84  median_left_(INT32_MAX), median_right_(-INT32_MAX),
85  blob_type_(blob_type),
86  vertical_(vertical) {
87  memset(special_blobs_densities_, 0, sizeof(special_blobs_densities_));
88 }
89 
90 // Constructs a fake ColPartition with a single fake BLOBNBOX, all made
91 // from a single TBOX.
92 // WARNING: Despite being on C_LISTs, the BLOBNBOX owns the C_BLOB and
93 // the ColPartition owns the BLOBNBOX!!!
94 // Call DeleteBoxes before deleting the ColPartition.
96  PolyBlockType block_type,
97  BlobRegionType blob_type,
98  BlobTextFlowType flow) {
99  ColPartition* part = new ColPartition(blob_type, ICOORD(0, 1));
100  part->set_type(block_type);
101  part->set_flow(flow);
102  part->AddBox(new BLOBNBOX(C_BLOB::FakeBlob(box)));
103  part->set_left_margin(box.left());
104  part->set_right_margin(box.right());
105  part->SetBlobTypes();
106  part->ComputeLimits();
107  part->ClaimBoxes();
108  return part;
109 }
110 
111 // Constructs and returns a ColPartition with the given real BLOBNBOX,
112 // and sets it up to be a "big" partition (single-blob partition bigger
113 // than the surrounding text that may be a dropcap, two or more vertically
114 // touching characters, or some graphic element.
115 // If the given list is not nullptr, the partition is also added to the list.
117  ColPartition_LIST* big_part_list) {
118  box->set_owner(nullptr);
119  ColPartition* single = new ColPartition(BRT_UNKNOWN, ICOORD(0, 1));
120  single->set_flow(BTFT_NONE);
121  single->AddBox(box);
122  single->ComputeLimits();
123  single->ClaimBoxes();
124  single->SetBlobTypes();
125  single->set_block_owned(true);
126  if (big_part_list != nullptr) {
127  ColPartition_IT part_it(big_part_list);
128  part_it.add_to_end(single);
129  }
130  return single;
131 }
132 
134  // Remove this as a partner of all partners, as we don't want them
135  // referring to a deleted object.
136  ColPartition_C_IT it(&upper_partners_);
137  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
138  it.data()->RemovePartner(false, this);
139  }
140  it.set_to_list(&lower_partners_);
141  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
142  it.data()->RemovePartner(true, this);
143  }
144 }
145 
146 // Constructs a fake ColPartition with no BLOBNBOXes to represent a
147 // horizontal or vertical line, given a type and a bounding box.
149  const ICOORD& vertical,
150  int left, int bottom,
151  int right, int top) {
152  auto* part = new ColPartition(blob_type, vertical);
153  part->bounding_box_ = TBOX(left, bottom, right, top);
154  part->median_bottom_ = bottom;
155  part->median_top_ = top;
156  part->median_height_ = top - bottom;
157  part->median_left_ = left;
158  part->median_right_ = right;
159  part->median_width_ = right - left;
160  part->left_key_ = part->BoxLeftKey();
161  part->right_key_ = part->BoxRightKey();
162  return part;
163 }
164 
165 
166 // Adds the given box to the partition, updating the partition bounds.
167 // The list of boxes in the partition is updated, ensuring that no box is
168 // recorded twice, and the boxes are kept in increasing left position.
170  TBOX box = bbox->bounding_box();
171  // Update the partition limits.
172  if (boxes_.length() == 0) {
173  bounding_box_ = box;
174  } else {
175  bounding_box_ += box;
176  }
177 
178  if (IsVerticalType()) {
179  if (!last_add_was_vertical_) {
180  boxes_.sort(SortByBoxBottom<BLOBNBOX>);
181  last_add_was_vertical_ = true;
182  }
183  boxes_.add_sorted(SortByBoxBottom<BLOBNBOX>, true, bbox);
184  } else {
185  if (last_add_was_vertical_) {
186  boxes_.sort(SortByBoxLeft<BLOBNBOX>);
187  last_add_was_vertical_ = false;
188  }
189  boxes_.add_sorted(SortByBoxLeft<BLOBNBOX>, true, bbox);
190  }
191  if (!left_key_tab_)
192  left_key_ = BoxLeftKey();
193  if (!right_key_tab_)
194  right_key_ = BoxRightKey();
195  if (TabFind::WithinTestRegion(2, box.left(), box.bottom()))
196  tprintf("Added box (%d,%d)->(%d,%d) left_blob_x_=%d, right_blob_x_ = %d\n",
197  box.left(), box.bottom(), box.right(), box.top(),
198  bounding_box_.left(), bounding_box_.right());
199 }
200 
201 // Removes the given box from the partition, updating the bounds.
203  BLOBNBOX_C_IT bb_it(&boxes_);
204  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
205  if (box == bb_it.data()) {
206  bb_it.extract();
207  ComputeLimits();
208  return;
209  }
210  }
211 }
212 
213 // Returns the tallest box in the partition, as measured perpendicular to the
214 // presumed flow of text.
216  BLOBNBOX* biggest = nullptr;
217  BLOBNBOX_C_IT bb_it(&boxes_);
218  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
219  BLOBNBOX* bbox = bb_it.data();
220  if (IsVerticalType()) {
221  if (biggest == nullptr ||
222  bbox->bounding_box().width() > biggest->bounding_box().width())
223  biggest = bbox;
224  } else {
225  if (biggest == nullptr ||
226  bbox->bounding_box().height() > biggest->bounding_box().height())
227  biggest = bbox;
228  }
229  }
230  return biggest;
231 }
232 
233 // Returns the bounding box excluding the given box.
235  TBOX result;
236  BLOBNBOX_C_IT bb_it(&boxes_);
237  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
238  if (box != bb_it.data()) {
239  result += bb_it.data()->bounding_box();
240  }
241  }
242  return result;
243 }
244 
245 // Claims the boxes in the boxes_list by marking them with a this owner
246 // pointer. If a box is already owned, then it must be owned by this.
248  BLOBNBOX_C_IT bb_it(&boxes_);
249  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
250  BLOBNBOX* bblob = bb_it.data();
251  ColPartition* other = bblob->owner();
252  if (other == nullptr) {
253  // Normal case: ownership is available.
254  bblob->set_owner(this);
255  } else {
256  ASSERT_HOST(other == this);
257  }
258  }
259 }
260 
261 // nullptr the owner of the blobs in this partition, so they can be deleted
262 // independently of the ColPartition.
264  BLOBNBOX_C_IT bb_it(&boxes_);
265  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
266  BLOBNBOX* bblob = bb_it.data();
267  ASSERT_HOST(bblob->owner() == this || bblob->owner() == nullptr);
268  bblob->set_owner(nullptr);
269  }
270 }
271 
272 // nullptr the owner of the blobs in this partition that are owned by this
273 // partition, so they can be deleted independently of the ColPartition.
274 // Any blobs that are not owned by this partition get to keep their owner
275 // without an assert failure.
277  BLOBNBOX_C_IT bb_it(&boxes_);
278  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
279  BLOBNBOX* bblob = bb_it.data();
280  if (bblob->owner() == this)
281  bblob->set_owner(nullptr);
282  }
283 }
284 
285 // Nulls the owner of the blobs in this partition that are owned by this
286 // partition and not leader blobs, removing them from the boxes_ list, thus
287 // turning this partition back to a leader partition if it contains a leader,
288 // or otherwise leaving it empty. Returns true if any boxes remain.
290  BLOBNBOX_C_IT bb_it(&boxes_);
291  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
292  BLOBNBOX* bblob = bb_it.data();
293  if (bblob->flow() != BTFT_LEADER) {
294  if (bblob->owner() == this) bblob->set_owner(nullptr);
295  bb_it.extract();
296  }
297  }
298  if (bb_it.empty()) return false;
299  flow_ = BTFT_LEADER;
300  ComputeLimits();
301  return true;
302 }
303 
304 // Delete the boxes that this partition owns.
306  // Although the boxes_ list is a C_LIST, in some cases it owns the
307  // BLOBNBOXes, as the ColPartition takes ownership from the grid,
308  // and the BLOBNBOXes own the underlying C_BLOBs.
309  for (BLOBNBOX_C_IT bb_it(&boxes_); !bb_it.empty(); bb_it.forward()) {
310  BLOBNBOX* bblob = bb_it.extract();
311  delete bblob->cblob();
312  delete bblob;
313  }
314 }
315 
316 // Reflects the partition in the y-axis, assuming that its blobs have
317 // already been done. Corrects only a limited part of the members, since
318 // this function is assumed to be used shortly after initial creation, which
319 // is before a lot of the members are used.
321  BLOBNBOX_CLIST reversed_boxes;
322  BLOBNBOX_C_IT reversed_it(&reversed_boxes);
323  // Reverse the order of the boxes_.
324  BLOBNBOX_C_IT bb_it(&boxes_);
325  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
326  reversed_it.add_before_then_move(bb_it.extract());
327  }
328  bb_it.add_list_after(&reversed_boxes);
329  ASSERT_HOST(!left_key_tab_ && !right_key_tab_);
330  int tmp = left_margin_;
331  left_margin_ = -right_margin_;
332  right_margin_ = -tmp;
333  ComputeLimits();
334 }
335 
336 // Returns true if this is a legal partition - meaning that the conditions
337 // left_margin <= bounding_box left
338 // left_key <= bounding box left key
339 // bounding box left <= bounding box right
340 // and likewise for right margin and key
341 // are all met.
343  if (bounding_box_.left() > bounding_box_.right()) {
344  if (textord_debug_bugs) {
345  tprintf("Bounding box invalid\n");
346  Print();
347  }
348  return false; // Bounding box invalid.
349  }
350  if (left_margin_ > bounding_box_.left() ||
351  right_margin_ < bounding_box_.right()) {
352  if (textord_debug_bugs) {
353  tprintf("Margins invalid\n");
354  Print();
355  }
356  return false; // Margins invalid.
357  }
358  if (left_key_ > BoxLeftKey() || right_key_ < BoxRightKey()) {
359  if (textord_debug_bugs) {
360  tprintf("Key inside box: %d v %d or %d v %d\n",
361  left_key_, BoxLeftKey(), right_key_, BoxRightKey());
362  Print();
363  }
364  return false; // Keys inside the box.
365  }
366  return true;
367 }
368 
369 // Returns true if the left and right edges are approximately equal.
370 bool ColPartition::MatchingColumns(const ColPartition& other) const {
371  int y = (MidY() + other.MidY()) / 2;
372  if (!NearlyEqual(other.LeftAtY(y) / kColumnWidthFactor,
373  LeftAtY(y) / kColumnWidthFactor, 1))
374  return false;
375  if (!NearlyEqual(other.RightAtY(y) / kColumnWidthFactor,
376  RightAtY(y) / kColumnWidthFactor, 1))
377  return false;
378  return true;
379 }
380 
381 // Returns true if the colors match for two text partitions.
383  if (color1_[L_ALPHA_CHANNEL] > kMaxRMSColorNoise &&
384  other.color1_[L_ALPHA_CHANNEL] > kMaxRMSColorNoise)
385  return false; // Too noisy.
386 
387  // Colors must match for other to count.
388  double d_this1_o = ImageFind::ColorDistanceFromLine(other.color1_,
389  other.color2_,
390  color1_);
391  double d_this2_o = ImageFind::ColorDistanceFromLine(other.color1_,
392  other.color2_,
393  color2_);
394  double d_o1_this = ImageFind::ColorDistanceFromLine(color1_, color2_,
395  other.color1_);
396  double d_o2_this = ImageFind::ColorDistanceFromLine(color1_, color2_,
397  other.color2_);
398 // All 4 distances must be small enough.
399  return d_this1_o < kMaxColorDistance && d_this2_o < kMaxColorDistance &&
400  d_o1_this < kMaxColorDistance && d_o2_this < kMaxColorDistance;
401 }
402 
403 // Returns true if the sizes match for two text partitions,
404 // taking orientation into account. See also SizesSimilar.
405 bool ColPartition::MatchingSizes(const ColPartition& other) const {
406  if (blob_type_ == BRT_VERT_TEXT || other.blob_type_ == BRT_VERT_TEXT)
407  return !TabFind::DifferentSizes(median_width_, other.median_width_);
408  else
409  return !TabFind::DifferentSizes(median_height_, other.median_height_);
410 }
411 
412 // Returns true if there is no tabstop violation in merging this and other.
414  if (bounding_box_.right() < other.bounding_box_.left() &&
415  bounding_box_.right() < other.LeftBlobRule())
416  return false;
417  if (other.bounding_box_.right() < bounding_box_.left() &&
418  other.bounding_box_.right() < LeftBlobRule())
419  return false;
420  if (bounding_box_.left() > other.bounding_box_.right() &&
421  bounding_box_.left() > other.RightBlobRule())
422  return false;
423  if (other.bounding_box_.left() > bounding_box_.right() &&
424  other.bounding_box_.left() > RightBlobRule())
425  return false;
426  return true;
427 }
428 
429 // Returns true if other has a similar stroke width to this.
431  double fractional_tolerance,
432  double constant_tolerance) const {
433  int match_count = 0;
434  int nonmatch_count = 0;
435  BLOBNBOX_C_IT box_it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
436  BLOBNBOX_C_IT other_it(const_cast<BLOBNBOX_CLIST*>(&other.boxes_));
437  box_it.mark_cycle_pt();
438  other_it.mark_cycle_pt();
439  while (!box_it.cycled_list() && !other_it.cycled_list()) {
440  if (box_it.data()->MatchingStrokeWidth(*other_it.data(),
441  fractional_tolerance,
442  constant_tolerance))
443  ++match_count;
444  else
445  ++nonmatch_count;
446  box_it.forward();
447  other_it.forward();
448  }
449  return match_count > nonmatch_count;
450 }
451 
452 // Returns true if base is an acceptable diacritic base char merge
453 // with this as the diacritic.
454 // Returns true if:
455 // (1) this is a ColPartition containing only diacritics, and
456 // (2) the base characters indicated on the diacritics all believably lie
457 // within the text line of the candidate ColPartition.
459  bool debug) const {
460  BLOBNBOX_C_IT it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
461  int min_top = INT32_MAX;
462  int max_bottom = -INT32_MAX;
463  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
464  BLOBNBOX* blob = it.data();
465  if (!blob->IsDiacritic()) {
466  if (debug) {
467  tprintf("Blob is not a diacritic:");
468  blob->bounding_box().print();
469  }
470  return false; // All blobs must have diacritic bases.
471  }
472  if (blob->base_char_top() < min_top)
473  min_top = blob->base_char_top();
474  if (blob->base_char_bottom() > max_bottom)
475  max_bottom = blob->base_char_bottom();
476  }
477  // If the intersection of all vertical ranges of all base characters
478  // overlaps the median range of this, then it is OK.
479  bool result = min_top > candidate.median_bottom_ &&
480  max_bottom < candidate.median_top_;
481  if (debug) {
482  if (result)
483  tprintf("OKDiacritic!\n");
484  else
485  tprintf("y ranges don\'t overlap: %d-%d / %d-%d\n",
486  max_bottom, min_top, median_bottom_, median_top_);
487  }
488  return result;
489 }
490 
491 // Sets the sort key using either the tab vector, or the bounding box if
492 // the tab vector is nullptr. If the tab_vector lies inside the bounding_box,
493 // use the edge of the box as a key any way.
494 void ColPartition::SetLeftTab(const TabVector* tab_vector) {
495  if (tab_vector != nullptr) {
496  left_key_ = tab_vector->sort_key();
497  left_key_tab_ = left_key_ <= BoxLeftKey();
498  } else {
499  left_key_tab_ = false;
500  }
501  if (!left_key_tab_)
502  left_key_ = BoxLeftKey();
503 }
504 
505 // As SetLeftTab, but with the right.
506 void ColPartition::SetRightTab(const TabVector* tab_vector) {
507  if (tab_vector != nullptr) {
508  right_key_ = tab_vector->sort_key();
509  right_key_tab_ = right_key_ >= BoxRightKey();
510  } else {
511  right_key_tab_ = false;
512  }
513  if (!right_key_tab_)
514  right_key_ = BoxRightKey();
515 }
516 
517 // Copies the left/right tab from the src partition, but if take_box is
518 // true, copies the box instead and uses that as a key.
519 void ColPartition::CopyLeftTab(const ColPartition& src, bool take_box) {
520  left_key_tab_ = take_box ? false : src.left_key_tab_;
521  if (left_key_tab_) {
522  left_key_ = src.left_key_;
523  } else {
524  bounding_box_.set_left(XAtY(src.BoxLeftKey(), MidY()));
525  left_key_ = BoxLeftKey();
526  }
527  if (left_margin_ > bounding_box_.left())
528  left_margin_ = src.left_margin_;
529 }
530 
531 // As CopyLeftTab, but with the right.
532 void ColPartition::CopyRightTab(const ColPartition& src, bool take_box) {
533  right_key_tab_ = take_box ? false : src.right_key_tab_;
534  if (right_key_tab_) {
535  right_key_ = src.right_key_;
536  } else {
537  bounding_box_.set_right(XAtY(src.BoxRightKey(), MidY()));
538  right_key_ = BoxRightKey();
539  }
540  if (right_margin_ < bounding_box_.right())
541  right_margin_ = src.right_margin_;
542 }
543 
544 // Returns the left rule line x coord of the leftmost blob.
546  BLOBNBOX_C_IT it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
547  return it.data()->left_rule();
548 }
549 // Returns the right rule line x coord of the rightmost blob.
551  BLOBNBOX_C_IT it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
552  it.move_to_last();
553  return it.data()->right_rule();
554 }
555 
558  return special_blobs_densities_[type];
559 }
560 
563  BLOBNBOX_C_IT blob_it(&boxes_);
564  int count = 0;
565  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
566  BLOBNBOX* blob = blob_it.data();
568  if (blob_type == type) {
569  count++;
570  }
571  }
572 
573  return count;
574 }
575 
577  const BlobSpecialTextType type, const float density) {
579  special_blobs_densities_[type] = density;
580 }
581 
583  memset(special_blobs_densities_, 0, sizeof(special_blobs_densities_));
584  if (boxes_.empty()) {
585  return;
586  }
587 
588  BLOBNBOX_C_IT blob_it(&boxes_);
589  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
590  BLOBNBOX* blob = blob_it.data();
592  special_blobs_densities_[type]++;
593  }
594 
595  for (float& special_blobs_density : special_blobs_densities_) {
596  special_blobs_density /= boxes_.length();
597  }
598 }
599 
600 // Add a partner above if upper, otherwise below.
601 // Add them uniquely and keep the list sorted by box left.
602 // Partnerships are added symmetrically to partner and this.
603 void ColPartition::AddPartner(bool upper, ColPartition* partner) {
604  if (upper) {
605  partner->lower_partners_.add_sorted(SortByBoxLeft<ColPartition>,
606  true, this);
607  upper_partners_.add_sorted(SortByBoxLeft<ColPartition>, true, partner);
608  } else {
609  partner->upper_partners_.add_sorted(SortByBoxLeft<ColPartition>,
610  true, this);
611  lower_partners_.add_sorted(SortByBoxLeft<ColPartition>, true, partner);
612  }
613 }
614 
615 // Removes the partner from this, but does not remove this from partner.
616 // This asymmetric removal is so as not to mess up the iterator that is
617 // working on partner's partner list.
618 void ColPartition::RemovePartner(bool upper, ColPartition* partner) {
619  ColPartition_C_IT it(upper ? &upper_partners_ : &lower_partners_);
620  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
621  if (it.data() == partner) {
622  it.extract();
623  break;
624  }
625  }
626 }
627 
628 // Returns the partner if the given partner is a singleton, otherwise nullptr.
630  ColPartition_CLIST* partners = upper ? &upper_partners_ : &lower_partners_;
631  if (!partners->singleton())
632  return nullptr;
633  ColPartition_C_IT it(partners);
634  return it.data();
635 }
636 
637 // Merge with the other partition and delete it.
639  // The result has to either own all of the blobs or none of them.
640  // Verify the flag is consistent.
641  ASSERT_HOST(owns_blobs() == other->owns_blobs());
642  // TODO(nbeato): check owns_blobs better. Right now owns_blobs
643  // should always be true when this is called. So there is no issues.
644  if (TabFind::WithinTestRegion(2, bounding_box_.left(),
645  bounding_box_.bottom()) ||
646  TabFind::WithinTestRegion(2, other->bounding_box_.left(),
647  other->bounding_box_.bottom())) {
648  tprintf("Merging:");
649  Print();
650  other->Print();
651  }
652 
653  // Update the special_blobs_densities_.
654  memset(special_blobs_densities_, 0, sizeof(special_blobs_densities_));
655  for (int type = 0; type < BSTT_COUNT; ++type) {
656  unsigned w1 = boxes_.length();
657  unsigned w2 = other->boxes_.length();
658  float new_val = special_blobs_densities_[type] * w1 +
659  other->special_blobs_densities_[type] * w2;
660  if (!w1 || !w2) {
661  ASSERT_HOST((w1 + w2) > 0);
662  special_blobs_densities_[type] = new_val / (w1 + w2);
663  }
664  }
665 
666  // Merge the two sorted lists.
667  BLOBNBOX_C_IT it(&boxes_);
668  BLOBNBOX_C_IT it2(&other->boxes_);
669  for (; !it2.empty(); it2.forward()) {
670  BLOBNBOX* bbox2 = it2.extract();
671  ColPartition* prev_owner = bbox2->owner();
672  if (prev_owner != other && prev_owner != nullptr) {
673  // A blob on other's list is owned by someone else; let them have it.
674  continue;
675  }
676  ASSERT_HOST(prev_owner == other || prev_owner == nullptr);
677  if (prev_owner == other)
678  bbox2->set_owner(this);
679  it.add_to_end(bbox2);
680  }
681  left_margin_ = std::min(left_margin_, other->left_margin_);
682  right_margin_ = std::max(right_margin_, other->right_margin_);
683  if (other->left_key_ < left_key_) {
684  left_key_ = other->left_key_;
685  left_key_tab_ = other->left_key_tab_;
686  }
687  if (other->right_key_ > right_key_) {
688  right_key_ = other->right_key_;
689  right_key_tab_ = other->right_key_tab_;
690  }
691  // Combine the flow and blob_type in a sensible way.
692  // Dominant flows stay.
693  if (!DominatesInMerge(flow_, other->flow_)) {
694  flow_ = other->flow_;
695  blob_type_ = other->blob_type_;
696  }
697  SetBlobTypes();
698  if (IsVerticalType()) {
699  boxes_.sort(SortByBoxBottom<BLOBNBOX>);
700  last_add_was_vertical_ = true;
701  } else {
702  boxes_.sort(SortByBoxLeft<BLOBNBOX>);
703  last_add_was_vertical_ = false;
704  }
705  ComputeLimits();
706  // Fix partner lists. other is going away, so remove it as a
707  // partner of all its partners and add this in its place.
708  for (int upper = 0; upper < 2; ++upper) {
709  ColPartition_CLIST partners;
710  ColPartition_C_IT part_it(&partners);
711  part_it.add_list_after(upper ? &other->upper_partners_
712  : &other->lower_partners_);
713  for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
714  ColPartition* partner = part_it.extract();
715  partner->RemovePartner(!upper, other);
716  partner->RemovePartner(!upper, this);
717  partner->AddPartner(!upper, this);
718  }
719  }
720  delete other;
721  if (cb != nullptr) {
722  SetColumnGoodness(cb);
723  }
724 }
725 
726 // Merge1 and merge2 are candidates to be merged, yet their combined box
727 // overlaps this. Is that allowed?
728 // Returns true if the overlap between this and the merged pair of
729 // merge candidates is sufficiently trivial to be allowed.
730 // The merged box can graze the edge of this by the ok_box_overlap
731 // if that exceeds the margin to the median top and bottom.
732 // ok_box_overlap should be set by the caller appropriate to the sizes of
733 // the text involved, and is usually a fraction of the median size of merge1
734 // and/or merge2, or this.
735 // TODO(rays) Determine whether vertical text needs to be considered.
737  const ColPartition& merge2,
738  int ok_box_overlap, bool debug) {
739  // Vertical partitions are not allowed to be involved.
740  if (IsVerticalType() || merge1.IsVerticalType() || merge2.IsVerticalType()) {
741  if (debug)
742  tprintf("Vertical partition\n");
743  return false;
744  }
745  // The merging partitions must strongly overlap each other.
746  if (!merge1.VSignificantCoreOverlap(merge2)) {
747  if (debug)
748  tprintf("Voverlap %d (%d)\n",
749  merge1.VCoreOverlap(merge2),
750  merge1.VSignificantCoreOverlap(merge2));
751  return false;
752  }
753  // The merged box must not overlap the median bounds of this.
754  TBOX merged_box(merge1.bounding_box());
755  merged_box += merge2.bounding_box();
756  if (merged_box.bottom() < median_top_ && merged_box.top() > median_bottom_ &&
757  merged_box.bottom() < bounding_box_.top() - ok_box_overlap &&
758  merged_box.top() > bounding_box_.bottom() + ok_box_overlap) {
759  if (debug)
760  tprintf("Excessive box overlap\n");
761  return false;
762  }
763  // Looks OK!
764  return true;
765 }
766 
767 // Find the blob at which to split this to minimize the overlap with the
768 // given box. Returns the first blob to go in the second partition.
770  if (boxes_.empty() || boxes_.singleton())
771  return nullptr;
772  BLOBNBOX_C_IT it(&boxes_);
773  TBOX left_box(it.data()->bounding_box());
774  for (it.forward(); !it.at_first(); it.forward()) {
775  BLOBNBOX* bbox = it.data();
776  left_box += bbox->bounding_box();
777  if (left_box.overlap(box))
778  return bbox;
779  }
780  return nullptr;
781 }
782 
783 // Split this partition keeping the first half in this and returning
784 // the second half.
785 // Splits by putting the split_blob and the blobs that follow
786 // in the second half, and the rest in the first half.
788  ColPartition* split_part = ShallowCopy();
789  split_part->set_owns_blobs(owns_blobs());
790  BLOBNBOX_C_IT it(&boxes_);
791  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
792  BLOBNBOX* bbox = it.data();
793  ColPartition* prev_owner = bbox->owner();
794  ASSERT_HOST(!owns_blobs() || prev_owner == this || prev_owner == nullptr);
795  if (bbox == split_blob || !split_part->boxes_.empty()) {
796  split_part->AddBox(it.extract());
797  if (owns_blobs() && prev_owner != nullptr)
798  bbox->set_owner(split_part);
799  }
800  }
801  ASSERT_HOST(!it.empty());
802  if (split_part->IsEmpty()) {
803  // Split part ended up with nothing. Possible if split_blob is not
804  // in the list of blobs.
805  delete split_part;
806  return nullptr;
807  }
808  right_key_tab_ = false;
809  split_part->left_key_tab_ = false;
810  ComputeLimits();
811  // TODO(nbeato) Merge Ray's CL like this:
812  // if (owns_blobs())
813  // SetBlobTextlineGoodness();
814  split_part->ComputeLimits();
815  // TODO(nbeato) Merge Ray's CL like this:
816  // if (split_part->owns_blobs())
817  // split_part->SetBlobTextlineGoodness();
818  return split_part;
819 }
820 
821 // Split this partition at the given x coordinate, returning the right
822 // half and keeping the left half in this.
824  if (split_x <= bounding_box_.left() || split_x >= bounding_box_.right())
825  return nullptr; // There will be no change.
826  ColPartition* split_part = ShallowCopy();
827  split_part->set_owns_blobs(owns_blobs());
828  BLOBNBOX_C_IT it(&boxes_);
829  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
830  BLOBNBOX* bbox = it.data();
831  ColPartition* prev_owner = bbox->owner();
832  ASSERT_HOST(!owns_blobs() || prev_owner == this || prev_owner == nullptr);
833  const TBOX& box = bbox->bounding_box();
834  if (box.left() >= split_x) {
835  split_part->AddBox(it.extract());
836  if (owns_blobs() && prev_owner != nullptr)
837  bbox->set_owner(split_part);
838  }
839  }
840  if (it.empty()) {
841  // Possible if split-x passes through the first blob.
842  it.add_list_after(&split_part->boxes_);
843  }
844  ASSERT_HOST(!it.empty());
845  if (split_part->IsEmpty()) {
846  // Split part ended up with nothing. Possible if split_x passes
847  // through the last blob.
848  delete split_part;
849  return nullptr;
850  }
851  right_key_tab_ = false;
852  split_part->left_key_tab_ = false;
853  right_margin_ = split_x;
854  split_part->left_margin_ = split_x;
855  ComputeLimits();
856  split_part->ComputeLimits();
857  return split_part;
858 }
859 
860 // Recalculates all the coordinate limits of the partition.
862  bounding_box_ = TBOX(); // Clear it
863  BLOBNBOX_C_IT it(&boxes_);
864  BLOBNBOX* bbox = nullptr;
865  int non_leader_count = 0;
866  if (it.empty()) {
867  bounding_box_.set_left(left_margin_);
868  bounding_box_.set_right(right_margin_);
869  bounding_box_.set_bottom(0);
870  bounding_box_.set_top(0);
871  } else {
872  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
873  bbox = it.data();
874  bounding_box_ += bbox->bounding_box();
875  if (bbox->flow() != BTFT_LEADER)
876  ++non_leader_count;
877  }
878  }
879  if (!left_key_tab_)
880  left_key_ = BoxLeftKey();
881  if (left_key_ > BoxLeftKey() && textord_debug_bugs) {
882  // TODO(rays) investigate the causes of these error messages, to find
883  // out if they are genuinely harmful, or just indicative of junk input.
884  tprintf("Computed left-illegal partition\n");
885  Print();
886  }
887  if (!right_key_tab_)
888  right_key_ = BoxRightKey();
889  if (right_key_ < BoxRightKey() && textord_debug_bugs) {
890  tprintf("Computed right-illegal partition\n");
891  Print();
892  }
893  if (it.empty())
894  return;
895  if (IsImageType() || blob_type() == BRT_RECTIMAGE ||
896  blob_type() == BRT_POLYIMAGE) {
897  median_top_ = bounding_box_.top();
898  median_bottom_ = bounding_box_.bottom();
899  median_height_ = bounding_box_.height();
900  median_left_ = bounding_box_.left();
901  median_right_ = bounding_box_.right();
902  median_width_ = bounding_box_.width();
903  } else {
904  STATS top_stats(bounding_box_.bottom(), bounding_box_.top() + 1);
905  STATS bottom_stats(bounding_box_.bottom(), bounding_box_.top() + 1);
906  STATS height_stats(0, bounding_box_.height() + 1);
907  STATS left_stats(bounding_box_.left(), bounding_box_.right() + 1);
908  STATS right_stats(bounding_box_.left(), bounding_box_.right() + 1);
909  STATS width_stats(0, bounding_box_.width() + 1);
910  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
911  bbox = it.data();
912  if (non_leader_count == 0 || bbox->flow() != BTFT_LEADER) {
913  const TBOX& box = bbox->bounding_box();
914  int area = box.area();
915  top_stats.add(box.top(), area);
916  bottom_stats.add(box.bottom(), area);
917  height_stats.add(box.height(), area);
918  left_stats.add(box.left(), area);
919  right_stats.add(box.right(), area);
920  width_stats.add(box.width(), area);
921  }
922  }
923  median_top_ = static_cast<int>(top_stats.median() + 0.5);
924  median_bottom_ = static_cast<int>(bottom_stats.median() + 0.5);
925  median_height_ = static_cast<int>(height_stats.median() + 0.5);
926  median_left_ = static_cast<int>(left_stats.median() + 0.5);
927  median_right_ = static_cast<int>(right_stats.median() + 0.5);
928  median_width_ = static_cast<int>(width_stats.median() + 0.5);
929  }
930 
931  if (right_margin_ < bounding_box_.right() && textord_debug_bugs) {
932  tprintf("Made partition with bad right coords");
933  Print();
934  }
935  if (left_margin_ > bounding_box_.left() && textord_debug_bugs) {
936  tprintf("Made partition with bad left coords");
937  Print();
938  }
939  // Fix partner lists. The bounding box has changed and partners are stored
940  // in bounding box order, so remove and reinsert this as a partner
941  // of all its partners.
942  for (int upper = 0; upper < 2; ++upper) {
943  ColPartition_CLIST partners;
944  ColPartition_C_IT part_it(&partners);
945  part_it.add_list_after(upper ? &upper_partners_ : &lower_partners_);
946  for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
947  ColPartition* partner = part_it.extract();
948  partner->RemovePartner(!upper, this);
949  partner->AddPartner(!upper, this);
950  }
951  }
952  if (TabFind::WithinTestRegion(2, bounding_box_.left(),
953  bounding_box_.bottom())) {
954  tprintf("Recomputed box for partition %p\n", this);
955  Print();
956  }
957 }
958 
959 // Returns the number of boxes that overlap the given box.
961  BLOBNBOX_C_IT it(&boxes_);
962  int overlap_count = 0;
963  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
964  BLOBNBOX* bbox = it.data();
965  if (box.overlap(bbox->bounding_box()))
966  ++overlap_count;
967  }
968  return overlap_count;
969 }
970 
971 // Computes and sets the type_ and first_column_, last_column_ and column_set_.
972 // resolution refers to the ppi resolution of the image.
973 void ColPartition::SetPartitionType(int resolution, ColPartitionSet* columns) {
974  int first_spanned_col = -1;
975  ColumnSpanningType span_type =
976  columns->SpanningType(resolution,
977  bounding_box_.left(), bounding_box_.right(),
978  std::min(bounding_box_.height(), bounding_box_.width()),
979  MidY(), left_margin_, right_margin_,
980  &first_column_, &last_column_,
981  &first_spanned_col);
982  column_set_ = columns;
983  if (first_column_ < last_column_ && span_type == CST_PULLOUT &&
984  !IsLineType()) {
985  // Unequal columns may indicate that the pullout spans one of the columns
986  // it lies in, so force it to be allocated to just that column.
987  if (first_spanned_col >= 0) {
988  first_column_ = first_spanned_col;
989  last_column_ = first_spanned_col;
990  } else {
991  if ((first_column_ & 1) == 0)
992  last_column_ = first_column_;
993  else if ((last_column_ & 1) == 0)
994  first_column_ = last_column_;
995  else
996  first_column_ = last_column_ = (first_column_ + last_column_) / 2;
997  }
998  }
999  type_ = PartitionType(span_type);
1000 }
1001 
1002 // Returns the PartitionType from the current BlobRegionType and a column
1003 // flow spanning type ColumnSpanningType, generated by
1004 // ColPartitionSet::SpanningType, that indicates how the partition sits
1005 // in the columns.
1007  if (flow == CST_NOISE) {
1008  if (blob_type_ != BRT_HLINE && blob_type_ != BRT_VLINE &&
1009  blob_type_ != BRT_RECTIMAGE && blob_type_ != BRT_VERT_TEXT)
1010  return PT_NOISE;
1011  flow = CST_FLOWING;
1012  }
1013 
1014  switch (blob_type_) {
1015  case BRT_NOISE:
1016  return PT_NOISE;
1017  case BRT_HLINE:
1018  return PT_HORZ_LINE;
1019  case BRT_VLINE:
1020  return PT_VERT_LINE;
1021  case BRT_RECTIMAGE:
1022  case BRT_POLYIMAGE:
1023  switch (flow) {
1024  case CST_FLOWING:
1025  return PT_FLOWING_IMAGE;
1026  case CST_HEADING:
1027  return PT_HEADING_IMAGE;
1028  case CST_PULLOUT:
1029  return PT_PULLOUT_IMAGE;
1030  default:
1031  ASSERT_HOST(!"Undefined flow type for image!");
1032  }
1033  break;
1034  case BRT_VERT_TEXT:
1035  return PT_VERTICAL_TEXT;
1036  case BRT_TEXT:
1037  case BRT_UNKNOWN:
1038  default:
1039  switch (flow) {
1040  case CST_FLOWING:
1041  return PT_FLOWING_TEXT;
1042  case CST_HEADING:
1043  return PT_HEADING_TEXT;
1044  case CST_PULLOUT:
1045  return PT_PULLOUT_TEXT;
1046  default:
1047  ASSERT_HOST(!"Undefined flow type for text!");
1048  }
1049  }
1050  ASSERT_HOST(!"Should never get here!");
1051  return PT_NOISE;
1052 }
1053 
1054 // Returns the first and last column touched by this partition.
1055 // resolution refers to the ppi resolution of the image.
1056 void ColPartition::ColumnRange(int resolution, ColPartitionSet* columns,
1057  int* first_col, int* last_col) {
1058  int first_spanned_col = -1;
1059  ColumnSpanningType span_type =
1060  columns->SpanningType(resolution,
1061  bounding_box_.left(), bounding_box_.right(),
1062  std::min(bounding_box_.height(), bounding_box_.width()),
1063  MidY(), left_margin_, right_margin_,
1064  first_col, last_col,
1065  &first_spanned_col);
1066  type_ = PartitionType(span_type);
1067 }
1068 
1069 // Sets the internal flags good_width_ and good_column_.
1071  int y = MidY();
1072  int width = RightAtY(y) - LeftAtY(y);
1073  good_width_ = cb->Run(width);
1074  good_column_ = blob_type_ == BRT_TEXT && left_key_tab_ && right_key_tab_;
1075 }
1076 
1077 // Determines whether the blobs in this partition mostly represent
1078 // a leader (fixed pitch sequence) and sets the member blobs accordingly.
1079 // Note that height is assumed to have been tested elsewhere, and that this
1080 // function will find most fixed-pitch text as leader without a height filter.
1081 // Leader detection is limited to sequences of identical width objects,
1082 // such as .... or ----, so patterns, such as .-.-.-.-. will not be found.
1084  bool result = false;
1085  // Gather statistics on the gaps between blobs and the widths of the blobs.
1086  int part_width = bounding_box_.width();
1087  STATS gap_stats(0, part_width);
1088  STATS width_stats(0, part_width);
1089  BLOBNBOX_C_IT it(&boxes_);
1090  BLOBNBOX* prev_blob = it.data();
1091  prev_blob->set_flow(BTFT_NEIGHBOURS);
1092  width_stats.add(prev_blob->bounding_box().width(), 1);
1093  int blob_count = 1;
1094  for (it.forward(); !it.at_first(); it.forward()) {
1095  BLOBNBOX* blob = it.data();
1096  int left = blob->bounding_box().left();
1097  int right = blob->bounding_box().right();
1098  gap_stats.add(left - prev_blob->bounding_box().right(), 1);
1099  width_stats.add(right - left, 1);
1100  blob->set_flow(BTFT_NEIGHBOURS);
1101  prev_blob = blob;
1102  ++blob_count;
1103  }
1104  double median_gap = gap_stats.median();
1105  double median_width = width_stats.median();
1106  double max_width = std::max(median_gap, median_width);
1107  double min_width = std::min(median_gap, median_width);
1108  double gap_iqr = gap_stats.ile(0.75f) - gap_stats.ile(0.25f);
1109  if (textord_debug_tabfind >= 4) {
1110  tprintf("gap iqr = %g, blob_count=%d, limits=%g,%g\n",
1111  gap_iqr, blob_count, max_width * kMaxLeaderGapFractionOfMax,
1112  min_width * kMaxLeaderGapFractionOfMin);
1113  }
1114  if (gap_iqr < max_width * kMaxLeaderGapFractionOfMax &&
1115  gap_iqr < min_width * kMaxLeaderGapFractionOfMin &&
1116  blob_count >= kMinLeaderCount) {
1117  // This is stable enough to be called a leader, so check the widths.
1118  // Since leader dashes can join, run a dp cutting algorithm and go
1119  // on the cost.
1120  int offset = static_cast<int>(ceil(gap_iqr * 2));
1121  int min_step = static_cast<int>(median_gap + median_width + 0.5);
1122  int max_step = min_step + offset;
1123  min_step -= offset;
1124  // Pad the buffer with min_step/2 on each end.
1125  int part_left = bounding_box_.left() - min_step / 2;
1126  part_width += min_step;
1127  auto* projection = new DPPoint[part_width];
1128  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1129  BLOBNBOX* blob = it.data();
1130  int left = blob->bounding_box().left();
1131  int right = blob->bounding_box().right();
1132  int height = blob->bounding_box().height();
1133  for (int x = left; x < right; ++x) {
1134  projection[left - part_left].AddLocalCost(height);
1135  }
1136  }
1137  DPPoint* best_end = DPPoint::Solve(min_step, max_step, false,
1139  part_width, projection);
1140  if (best_end != nullptr && best_end->total_cost() < blob_count) {
1141  // Good enough. Call it a leader.
1142  result = true;
1143  bool modified_blob_list = false;
1144  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1145  BLOBNBOX* blob = it.data();
1146  // If the first or last blob is spaced too much, don't mark it.
1147  if (it.at_first()) {
1148  int gap = it.data_relative(1)->bounding_box().left() -
1149  blob->bounding_box().right();
1150  if (blob->bounding_box().width() + gap > max_step) {
1151  it.extract();
1152  modified_blob_list = true;
1153  continue;
1154  }
1155  }
1156  if (it.at_last()) {
1157  int gap = blob->bounding_box().left() -
1158  it.data_relative(-1)->bounding_box().right();
1159  if (blob->bounding_box().width() + gap > max_step) {
1160  it.extract();
1161  modified_blob_list = true;
1162  break;
1163  }
1164  }
1165  blob->set_region_type(BRT_TEXT);
1166  blob->set_flow(BTFT_LEADER);
1167  }
1168  if (modified_blob_list) ComputeLimits();
1169  blob_type_ = BRT_TEXT;
1170  flow_ = BTFT_LEADER;
1171  } else if (textord_debug_tabfind) {
1172  if (best_end == nullptr) {
1173  tprintf("No path\n");
1174  } else {
1175  tprintf("Total cost = %d vs allowed %d\n", best_end->total_cost(),
1176  blob_count);
1177  }
1178  }
1179  delete [] projection;
1180  }
1181  return result;
1182 }
1183 
1184 // Given the result of TextlineProjection::EvaluateColPartition, (positive for
1185 // horizontal text, negative for vertical text, and near zero for non-text),
1186 // sets the blob_type_ and flow_ for this partition to indicate whether it
1187 // is strongly or weakly vertical or horizontal text, or non-text.
1188 // The function assumes that the blob neighbours are valid (from
1189 // StrokeWidth::SetNeighbours) and that those neighbours have their
1190 // region_type() set.
1192  int blob_count = 0; // Total # blobs.
1193  int good_blob_score_ = 0; // Total # good strokewidth neighbours.
1194  int noisy_count = 0; // Total # neighbours marked as noise.
1195  int hline_count = 0;
1196  int vline_count = 0;
1197  BLOBNBOX_C_IT it(&boxes_);
1198  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1199  BLOBNBOX* blob = it.data();
1200  ++blob_count;
1201  noisy_count += blob->NoisyNeighbours();
1202  good_blob_score_ += blob->GoodTextBlob();
1203  if (blob->region_type() == BRT_HLINE) ++hline_count;
1204  if (blob->region_type() == BRT_VLINE) ++vline_count;
1205  }
1206  flow_ = BTFT_NEIGHBOURS;
1207  blob_type_ = BRT_UNKNOWN;
1208  if (hline_count > vline_count) {
1209  flow_ = BTFT_NONE;
1210  blob_type_ = BRT_HLINE;
1211  } else if (vline_count > hline_count) {
1212  flow_ = BTFT_NONE;
1213  blob_type_ = BRT_VLINE;
1214  } else if (value < -1 || 1 < value) {
1215  int long_side;
1216  int short_side;
1217  if (value > 0) {
1218  long_side = bounding_box_.width();
1219  short_side = bounding_box_.height();
1220  blob_type_ = BRT_TEXT;
1221  } else {
1222  long_side = bounding_box_.height();
1223  short_side = bounding_box_.width();
1224  blob_type_ = BRT_VERT_TEXT;
1225  }
1226  // We will combine the old metrics using aspect ratio and blob counts
1227  // with the input value by allowing a strong indication to flip the
1228  // STRONG_CHAIN/CHAIN flow values.
1229  int strong_score = blob_count >= kHorzStrongTextlineCount ? 1 : 0;
1230  if (short_side > kHorzStrongTextlineHeight) ++strong_score;
1231  if (short_side * kHorzStrongTextlineAspect < long_side) ++strong_score;
1232  if (abs(value) >= kMinStrongTextValue)
1233  flow_ = BTFT_STRONG_CHAIN;
1234  else if (abs(value) >= kMinChainTextValue)
1235  flow_ = BTFT_CHAIN;
1236  else
1237  flow_ = BTFT_NEIGHBOURS;
1238  // Upgrade chain to strong chain if the other indicators are good
1239  if (flow_ == BTFT_CHAIN && strong_score == 3)
1240  flow_ = BTFT_STRONG_CHAIN;
1241  // Downgrade strong vertical text to chain if the indicators are bad.
1242  if (flow_ == BTFT_STRONG_CHAIN && value < 0 && strong_score < 2)
1243  flow_ = BTFT_CHAIN;
1244  }
1245  if (flow_ == BTFT_NEIGHBOURS) {
1246  // Check for noisy neighbours.
1247  if (noisy_count >= blob_count) {
1248  flow_ = BTFT_NONTEXT;
1249  blob_type_= BRT_NOISE;
1250  }
1251  }
1252  if (TabFind::WithinTestRegion(2, bounding_box_.left(),
1253  bounding_box_.bottom())) {
1254  tprintf("RegionFlowTypesFromProjectionValue count=%d, noisy=%d, score=%d,",
1255  blob_count, noisy_count, good_blob_score_);
1256  tprintf(" Projection value=%d, flow=%d, blob_type=%d\n",
1257  value, flow_, blob_type_);
1258  Print();
1259  }
1260  SetBlobTypes();
1261 }
1262 
1263 // Sets all blobs with the partition blob type and flow, but never overwrite
1264 // leader blobs, as we need to be able to identify them later.
1266  if (!owns_blobs())
1267  return;
1268  BLOBNBOX_C_IT it(&boxes_);
1269  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1270  BLOBNBOX* blob = it.data();
1271  if (blob->flow() != BTFT_LEADER)
1272  blob->set_flow(flow_);
1273  blob->set_region_type(blob_type_);
1274  ASSERT_HOST(blob->owner() == nullptr || blob->owner() == this);
1275  }
1276 }
1277 
1278 // Returns true if a decent baseline can be fitted through the blobs.
1279 // Works for both horizontal and vertical text.
1281  // Approximation of the baseline.
1282  DetLineFit linepoints;
1283  // Calculation of the mean height on this line segment. Note that these
1284  // variable names apply to the context of a horizontal line, and work
1285  // analogously, rather than literally in the case of a vertical line.
1286  int total_height = 0;
1287  int coverage = 0;
1288  int height_count = 0;
1289  int width = 0;
1290  BLOBNBOX_C_IT it(&boxes_);
1291  TBOX box(it.data()->bounding_box());
1292  // Accumulate points representing the baseline at the middle of each blob,
1293  // but add an additional point for each end of the line. This makes it
1294  // harder to fit a severe skew angle, as it is most likely not right.
1295  if (IsVerticalType()) {
1296  // For a vertical line, use the right side as the baseline.
1297  ICOORD first_pt(box.right(), box.bottom());
1298  // Use the bottom-right of the first (bottom) box, the top-right of the
1299  // last, and the middle-right of all others.
1300  linepoints.Add(first_pt);
1301  for (it.forward(); !it.at_last(); it.forward()) {
1302  BLOBNBOX* blob = it.data();
1303  box = blob->bounding_box();
1304  ICOORD box_pt(box.right(), (box.top() + box.bottom()) / 2);
1305  linepoints.Add(box_pt);
1306  total_height += box.width();
1307  coverage += box.height();
1308  ++height_count;
1309  }
1310  box = it.data()->bounding_box();
1311  ICOORD last_pt(box.right(), box.top());
1312  linepoints.Add(last_pt);
1313  width = last_pt.y() - first_pt.y();
1314 
1315  } else {
1316  // Horizontal lines use the bottom as the baseline.
1317  TBOX box(it.data()->bounding_box());
1318  // Use the bottom-left of the first box, the the bottom-right of the last,
1319  // and the middle of all others.
1320  ICOORD first_pt(box.left(), box.bottom());
1321  linepoints.Add(first_pt);
1322  for (it.forward(); !it.at_last(); it.forward()) {
1323  BLOBNBOX* blob = it.data();
1324  box = blob->bounding_box();
1325  ICOORD box_pt((box.left() + box.right()) / 2, box.bottom());
1326  linepoints.Add(box_pt);
1327  total_height += box.height();
1328  coverage += box.width();
1329  ++height_count;
1330  }
1331  box = it.data()->bounding_box();
1332  ICOORD last_pt(box.right(), box.bottom());
1333  linepoints.Add(last_pt);
1334  width = last_pt.x() - first_pt.x();
1335  }
1336  // Maximum median error allowed to be a good text line.
1337  if (height_count == 0)
1338  return false;
1339  double max_error = kMaxBaselineError * total_height / height_count;
1340  ICOORD start_pt, end_pt;
1341  double error = linepoints.Fit(&start_pt, &end_pt);
1342  return error < max_error && coverage >= kMinBaselineCoverage * width;
1343 }
1344 
1345 // Adds this ColPartition to a matching WorkingPartSet if one can be found,
1346 // otherwise starts a new one in the appropriate column, ending the previous.
1347 void ColPartition::AddToWorkingSet(const ICOORD& bleft, const ICOORD& tright,
1348  int resolution,
1349  ColPartition_LIST* used_parts,
1350  WorkingPartSet_LIST* working_sets) {
1351  if (block_owned_)
1352  return; // Done it already.
1353  block_owned_ = true;
1354  WorkingPartSet_IT it(working_sets);
1355  // If there is an upper partner use its working_set_ directly.
1356  ColPartition* partner = SingletonPartner(true);
1357  if (partner != nullptr && partner->working_set_ != nullptr) {
1358  working_set_ = partner->working_set_;
1359  working_set_->AddPartition(this);
1360  return;
1361  }
1362  if (partner != nullptr && textord_debug_bugs) {
1363  tprintf("Partition with partner has no working set!:");
1364  Print();
1365  partner->Print();
1366  }
1367  // Search for the column that the left edge fits in.
1368  WorkingPartSet* work_set = nullptr;
1369  it.move_to_first();
1370  int col_index = 0;
1371  for (it.mark_cycle_pt(); !it.cycled_list() &&
1372  col_index != first_column_;
1373  it.forward(), ++col_index);
1374  if (textord_debug_tabfind >= 2) {
1375  tprintf("Match is %s for:", (col_index & 1) ? "Real" : "Between");
1376  Print();
1377  }
1378  if (it.cycled_list() && textord_debug_bugs) {
1379  tprintf("Target column=%d, only had %d\n", first_column_, col_index);
1380  }
1381  ASSERT_HOST(!it.cycled_list());
1382  work_set = it.data();
1383  // If last_column_ != first_column, then we need to scoop up all blocks
1384  // between here and the last_column_ and put back in work_set.
1385  if (!it.cycled_list() && last_column_ != first_column_ && !IsPulloutType()) {
1386  // Find the column that the right edge falls in.
1387  BLOCK_LIST completed_blocks;
1388  TO_BLOCK_LIST to_blocks;
1389  for (; !it.cycled_list() && col_index <= last_column_;
1390  it.forward(), ++col_index) {
1391  WorkingPartSet* end_set = it.data();
1392  end_set->ExtractCompletedBlocks(bleft, tright, resolution, used_parts,
1393  &completed_blocks, &to_blocks);
1394  }
1395  work_set->InsertCompletedBlocks(&completed_blocks, &to_blocks);
1396  }
1397  working_set_ = work_set;
1398  work_set->AddPartition(this);
1399 }
1400 
1401 // From the given block_parts list, builds one or more BLOCKs and
1402 // corresponding TO_BLOCKs, such that the line spacing is uniform in each.
1403 // Created blocks are appended to the end of completed_blocks and to_blocks.
1404 // The used partitions are put onto used_parts, as they may still be referred
1405 // to in the partition grid. bleft, tright and resolution are the bounds
1406 // and resolution of the original image.
1407 void ColPartition::LineSpacingBlocks(const ICOORD& bleft, const ICOORD& tright,
1408  int resolution,
1409  ColPartition_LIST* block_parts,
1410  ColPartition_LIST* used_parts,
1411  BLOCK_LIST* completed_blocks,
1412  TO_BLOCK_LIST* to_blocks) {
1413  int page_height = tright.y() - bleft.y();
1414  // Compute the initial spacing stats.
1415  ColPartition_IT it(block_parts);
1416  int part_count = 0;
1417  int max_line_height = 0;
1418 
1419  // TODO(joeliu): We should add some special logic for PT_INLINE_EQUATION type
1420  // because their line spacing with their neighbors maybe smaller and their
1421  // height may be slightly larger.
1422 
1423  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1424  ColPartition* part = it.data();
1425  ASSERT_HOST(!part->boxes()->empty());
1426  STATS side_steps(0, part->bounding_box().height());
1427  if (part->bounding_box().height() > max_line_height)
1428  max_line_height = part->bounding_box().height();
1429  BLOBNBOX_C_IT blob_it(part->boxes());
1430  int prev_bottom = blob_it.data()->bounding_box().bottom();
1431  for (blob_it.forward(); !blob_it.at_first(); blob_it.forward()) {
1432  BLOBNBOX* blob = blob_it.data();
1433  int bottom = blob->bounding_box().bottom();
1434  int step = bottom - prev_bottom;
1435  if (step < 0)
1436  step = -step;
1437  side_steps.add(step, 1);
1438  prev_bottom = bottom;
1439  }
1440  part->set_side_step(static_cast<int>(side_steps.median() + 0.5));
1441  if (!it.at_last()) {
1442  ColPartition* next_part = it.data_relative(1);
1443  part->set_bottom_spacing(part->median_bottom() -
1444  next_part->median_bottom());
1445  part->set_top_spacing(part->median_top() - next_part->median_top());
1446  } else {
1447  part->set_bottom_spacing(page_height);
1448  part->set_top_spacing(page_height);
1449  }
1450  if (textord_debug_tabfind) {
1451  part->Print();
1452  tprintf("side step = %.2f, top spacing = %d, bottom spacing=%d\n",
1453  side_steps.median(), part->top_spacing(), part->bottom_spacing());
1454  }
1455  ++part_count;
1456  }
1457  if (part_count == 0)
1458  return;
1459 
1460  SmoothSpacings(resolution, page_height, block_parts);
1461 
1462  // Move the partitions into individual block lists and make the blocks.
1463  BLOCK_IT block_it(completed_blocks);
1464  TO_BLOCK_IT to_block_it(to_blocks);
1465  ColPartition_LIST spacing_parts;
1466  ColPartition_IT sp_block_it(&spacing_parts);
1467  int same_block_threshold = max_line_height * kMaxSameBlockLineSpacing;
1468  for (it.mark_cycle_pt(); !it.empty();) {
1469  ColPartition* part = it.extract();
1470  sp_block_it.add_to_end(part);
1471  it.forward();
1472  if (it.empty() || part->bottom_spacing() > same_block_threshold ||
1473  !part->SpacingsEqual(*it.data(), resolution)) {
1474  // There is a spacing boundary. Check to see if it.data() belongs
1475  // better in the current block or the next one.
1476  if (!it.empty() && part->bottom_spacing() <= same_block_threshold) {
1477  ColPartition* next_part = it.data();
1478  // If there is a size match one-way, then the middle line goes with
1479  // its matched size, otherwise it goes with the smallest spacing.
1480  ColPartition* third_part = it.at_last() ? nullptr : it.data_relative(1);
1481  if (textord_debug_tabfind) {
1482  tprintf("Spacings unequal: upper:%d/%d, lower:%d/%d,"
1483  " sizes %d %d %d\n",
1484  part->top_spacing(), part->bottom_spacing(),
1485  next_part->top_spacing(), next_part->bottom_spacing(),
1486  part->median_height(), next_part->median_height(),
1487  third_part != nullptr ? third_part->median_height() : 0);
1488  }
1489  // We can only consider adding the next line to the block if the sizes
1490  // match and the lines are close enough for their size.
1491  if (part->SizesSimilar(*next_part) &&
1492  next_part->median_height() * kMaxSameBlockLineSpacing >
1493  part->bottom_spacing() &&
1495  part->top_spacing()) {
1496  // Even now, we can only add it as long as the third line doesn't
1497  // match in the same way and have a smaller bottom spacing.
1498  if (third_part == nullptr ||
1499  !next_part->SizesSimilar(*third_part) ||
1500  third_part->median_height() * kMaxSameBlockLineSpacing <=
1501  next_part->bottom_spacing() ||
1502  next_part->median_height() * kMaxSameBlockLineSpacing <=
1503  next_part->top_spacing() ||
1504  next_part->bottom_spacing() > part->bottom_spacing()) {
1505  // Add to the current block.
1506  sp_block_it.add_to_end(it.extract());
1507  it.forward();
1508  if (textord_debug_tabfind) {
1509  tprintf("Added line to current block.\n");
1510  }
1511  }
1512  }
1513  }
1514  TO_BLOCK* to_block = MakeBlock(bleft, tright, &spacing_parts, used_parts);
1515  if (to_block != nullptr) {
1516  to_block_it.add_to_end(to_block);
1517  block_it.add_to_end(to_block->block);
1518  }
1519  sp_block_it.set_to_list(&spacing_parts);
1520  } else {
1521  if (textord_debug_tabfind && !it.empty()) {
1522  ColPartition* next_part = it.data();
1523  tprintf("Spacings equal: upper:%d/%d, lower:%d/%d, median:%d/%d\n",
1524  part->top_spacing(), part->bottom_spacing(),
1525  next_part->top_spacing(), next_part->bottom_spacing(),
1526  part->median_height(), next_part->median_height());
1527  }
1528  }
1529  }
1530 }
1531 
1532 // Helper function to clip the input pos to the given bleft, tright bounds.
1533 static void ClipCoord(const ICOORD& bleft, const ICOORD& tright, ICOORD* pos) {
1534  if (pos->x() < bleft.x())
1535  pos->set_x(bleft.x());
1536  if (pos->x() > tright.x())
1537  pos->set_x(tright.x());
1538  if (pos->y() < bleft.y())
1539  pos->set_y(bleft.y());
1540  if (pos->y() > tright.y())
1541  pos->set_y(tright.y());
1542 }
1543 
1544 // Helper moves the blobs from the given list of block_parts into the block
1545 // itself. Sets up the block for (old) textline formation correctly for
1546 // vertical and horizontal text. The partitions are moved to used_parts
1547 // afterwards, as they cannot be deleted yet.
1548 static TO_BLOCK* MoveBlobsToBlock(bool vertical_text, int line_spacing,
1549  BLOCK* block,
1550  ColPartition_LIST* block_parts,
1551  ColPartition_LIST* used_parts) {
1552  // Make a matching TO_BLOCK and put all the BLOBNBOXes from the parts in it.
1553  // Move all the parts to a done list as they are no longer needed, except
1554  // that have have to continue to exist until the part grid is deleted.
1555  // Compute the median blob size as we go, as the block needs to know.
1556  TBOX block_box(block->pdblk.bounding_box());
1557  STATS sizes(0, std::max(block_box.width(), block_box.height()));
1558  bool text_type = block->pdblk.poly_block()->IsText();
1559  ColPartition_IT it(block_parts);
1560  auto* to_block = new TO_BLOCK(block);
1561  BLOBNBOX_IT blob_it(&to_block->blobs);
1562  ColPartition_IT used_it(used_parts);
1563  for (it.move_to_first(); !it.empty(); it.forward()) {
1564  ColPartition* part = it.extract();
1565  // Transfer blobs from all regions to the output blocks.
1566  // Blobs for non-text regions will be used to define the polygonal
1567  // bounds of the region.
1568  for (BLOBNBOX_C_IT bb_it(part->boxes()); !bb_it.empty();
1569  bb_it.forward()) {
1570  BLOBNBOX* bblob = bb_it.extract();
1571  if (bblob->owner() != part) {
1572  tprintf("Ownership incorrect for blob:");
1573  bblob->bounding_box().print();
1574  tprintf("Part=");
1575  part->Print();
1576  if (bblob->owner() == nullptr) {
1577  tprintf("Not owned\n");
1578  } else {
1579  tprintf("Owner part:");
1580  bblob->owner()->Print();
1581  }
1582  }
1583  ASSERT_HOST(bblob->owner() == part);
1584  // Assert failure here is caused by arbitrarily changing the partition
1585  // type without also changing the blob type, such as in
1586  // InsertSmallBlobsAsUnknowns.
1587  ASSERT_HOST(!text_type || bblob->region_type() >= BRT_UNKNOWN);
1588  C_OUTLINE_LIST* outlines = bblob->cblob()->out_list();
1589  C_OUTLINE_IT ol_it(outlines);
1590  ASSERT_HOST(!text_type || ol_it.data()->pathlength() > 0);
1591  if (vertical_text)
1592  sizes.add(bblob->bounding_box().width(), 1);
1593  else
1594  sizes.add(bblob->bounding_box().height(), 1);
1595  blob_it.add_after_then_move(bblob);
1596  }
1597  used_it.add_to_end(part);
1598  }
1599  if (text_type && blob_it.empty()) {
1600  delete block;
1601  delete to_block;
1602  return nullptr;
1603  }
1604  to_block->line_size = sizes.median();
1605  if (vertical_text) {
1606  int block_width = block->pdblk.bounding_box().width();
1607  if (block_width < line_spacing)
1608  line_spacing = block_width;
1609  to_block->line_spacing = static_cast<float>(line_spacing);
1610  to_block->max_blob_size = static_cast<float>(block_width + 1);
1611  } else {
1612  int block_height = block->pdblk.bounding_box().height();
1613  if (block_height < line_spacing)
1614  line_spacing = block_height;
1615  to_block->line_spacing = static_cast<float>(line_spacing);
1616  to_block->max_blob_size = static_cast<float>(block_height + 1);
1617  }
1618  return to_block;
1619 }
1620 
1621 // Constructs a block from the given list of partitions.
1622 // Arguments are as LineSpacingBlocks above.
1623 TO_BLOCK* ColPartition::MakeBlock(const ICOORD& bleft, const ICOORD& tright,
1624  ColPartition_LIST* block_parts,
1625  ColPartition_LIST* used_parts) {
1626  if (block_parts->empty())
1627  return nullptr; // Nothing to do.
1628  // If the block_parts are not in reading order, then it will make an invalid
1629  // block polygon and bounding_box, so sort by bounding box now just to make
1630  // sure.
1631  block_parts->sort(&ColPartition::SortByBBox);
1632  ColPartition_IT it(block_parts);
1633  ColPartition* part = it.data();
1634  PolyBlockType type = part->type();
1635  if (type == PT_VERTICAL_TEXT)
1636  return MakeVerticalTextBlock(bleft, tright, block_parts, used_parts);
1637  // LineSpacingBlocks has handed us a collection of evenly spaced lines and
1638  // put the average spacing in each partition, so we can just take the
1639  // linespacing from the first partition.
1640  int line_spacing = part->bottom_spacing();
1641  if (line_spacing < part->median_height())
1642  line_spacing = part->bounding_box().height();
1643  ICOORDELT_LIST vertices;
1644  ICOORDELT_IT vert_it(&vertices);
1645  ICOORD start, end;
1646  int min_x = INT32_MAX;
1647  int max_x = -INT32_MAX;
1648  int min_y = INT32_MAX;
1649  int max_y = -INT32_MAX;
1650  int iteration = 0;
1651  do {
1652  if (iteration == 0)
1653  ColPartition::LeftEdgeRun(&it, &start, &end);
1654  else
1655  ColPartition::RightEdgeRun(&it, &start, &end);
1656  ClipCoord(bleft, tright, &start);
1657  ClipCoord(bleft, tright, &end);
1658  vert_it.add_after_then_move(new ICOORDELT(start));
1659  vert_it.add_after_then_move(new ICOORDELT(end));
1660  UpdateRange(start.x(), &min_x, &max_x);
1661  UpdateRange(end.x(), &min_x, &max_x);
1662  UpdateRange(start.y(), &min_y, &max_y);
1663  UpdateRange(end.y(), &min_y, &max_y);
1664  if ((iteration == 0 && it.at_first()) ||
1665  (iteration == 1 && it.at_last())) {
1666  ++iteration;
1667  it.move_to_last();
1668  }
1669  } while (iteration < 2);
1671  tprintf("Making block at (%d,%d)->(%d,%d)\n",
1672  min_x, min_y, max_x, max_y);
1673  auto* block = new BLOCK("", true, 0, 0, min_x, min_y, max_x, max_y);
1674  block->pdblk.set_poly_block(new POLY_BLOCK(&vertices, type));
1675  return MoveBlobsToBlock(false, line_spacing, block, block_parts, used_parts);
1676 }
1677 
1678 // Constructs a block from the given list of vertical text partitions.
1679 // Currently only creates rectangular blocks.
1681  const ICOORD& tright,
1682  ColPartition_LIST* block_parts,
1683  ColPartition_LIST* used_parts) {
1684  if (block_parts->empty())
1685  return nullptr; // Nothing to do.
1686  ColPartition_IT it(block_parts);
1687  ColPartition* part = it.data();
1688  TBOX block_box = part->bounding_box();
1689  int line_spacing = block_box.width();
1690  PolyBlockType type = it.data()->type();
1691  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1692  block_box += it.data()->bounding_box();
1693  }
1694  if (textord_debug_tabfind) {
1695  tprintf("Making block at:");
1696  block_box.print();
1697  }
1698  auto* block = new BLOCK("", true, 0, 0, block_box.left(), block_box.bottom(),
1699  block_box.right(), block_box.top());
1700  block->pdblk.set_poly_block(new POLY_BLOCK(block_box, type));
1701  return MoveBlobsToBlock(true, line_spacing, block, block_parts, used_parts);
1702 }
1703 
1704 // Makes a TO_ROW matching this and moves all the blobs to it, transferring
1705 // ownership to to returned TO_ROW.
1707  BLOBNBOX_C_IT blob_it(&boxes_);
1708  TO_ROW* row = nullptr;
1709  int line_size = IsVerticalType() ? median_width_ : median_height_;
1710  // Add all the blobs to a single TO_ROW.
1711  for (; !blob_it.empty(); blob_it.forward()) {
1712  BLOBNBOX* blob = blob_it.extract();
1713 // blob->compute_bounding_box();
1714  int top = blob->bounding_box().top();
1715  int bottom = blob->bounding_box().bottom();
1716  if (row == nullptr) {
1717  row = new TO_ROW(blob, static_cast<float>(top),
1718  static_cast<float>(bottom),
1719  static_cast<float>(line_size));
1720  } else {
1721  row->add_blob(blob, static_cast<float>(top),
1722  static_cast<float>(bottom),
1723  static_cast<float>(line_size));
1724  }
1725  }
1726  return row;
1727 }
1728 
1729 // Returns a copy of everything except the list of boxes. The resulting
1730 // ColPartition is only suitable for keeping in a column candidate list.
1732  auto* part = new ColPartition(blob_type_, vertical_);
1733  part->left_margin_ = left_margin_;
1734  part->right_margin_ = right_margin_;
1735  part->bounding_box_ = bounding_box_;
1736  memcpy(part->special_blobs_densities_, special_blobs_densities_,
1737  sizeof(special_blobs_densities_));
1738  part->median_bottom_ = median_bottom_;
1739  part->median_top_ = median_top_;
1740  part->median_height_ = median_height_;
1741  part->median_left_ = median_left_;
1742  part->median_right_ = median_right_;
1743  part->median_width_ = median_width_;
1744  part->good_width_ = good_width_;
1745  part->good_column_ = good_column_;
1746  part->left_key_tab_ = left_key_tab_;
1747  part->right_key_tab_ = right_key_tab_;
1748  part->type_ = type_;
1749  part->flow_ = flow_;
1750  part->left_key_ = left_key_;
1751  part->right_key_ = right_key_;
1752  part->first_column_ = first_column_;
1753  part->last_column_ = last_column_;
1754  part->owns_blobs_ = false;
1755  return part;
1756 }
1757 
1759  ColPartition* copy = ShallowCopy();
1760  copy->set_owns_blobs(false);
1761  BLOBNBOX_C_IT inserter(copy->boxes());
1762  BLOBNBOX_C_IT traverser(boxes());
1763  for (traverser.mark_cycle_pt(); !traverser.cycled_list(); traverser.forward())
1764  inserter.add_after_then_move(traverser.data());
1765  return copy;
1766 }
1767 
1768 #ifndef GRAPHICS_DISABLED
1769 // Provides a color for BBGrid to draw the rectangle.
1770 // Must be kept in sync with PolyBlockType.
1772  if (type_ == PT_UNKNOWN)
1773  return BLOBNBOX::TextlineColor(blob_type_, flow_);
1774  return POLY_BLOCK::ColorForPolyBlockType(type_);
1775 }
1776 #endif // GRAPHICS_DISABLED
1777 
1778 // Keep in sync with BlobRegionType.
1779 static char kBlobTypes[BRT_COUNT + 1] = "NHSRIUVT";
1780 
1781 // Prints debug information on this.
1782 void ColPartition::Print() const {
1783  int y = MidY();
1784  tprintf("ColPart:%c(M%d-%c%d-B%d/%d,%d/%d)->(%dB-%d%c-%dM/%d,%d/%d)"
1785  " w-ok=%d, v-ok=%d, type=%d%c%d, fc=%d, lc=%d, boxes=%d"
1786  " ts=%d bs=%d ls=%d rs=%d\n",
1787  boxes_.empty() ? 'E' : ' ',
1788  left_margin_, left_key_tab_ ? 'T' : 'B', LeftAtY(y),
1789  bounding_box_.left(), median_left_,
1790  bounding_box_.bottom(), median_bottom_,
1791  bounding_box_.right(), RightAtY(y), right_key_tab_ ? 'T' : 'B',
1792  right_margin_, median_right_, bounding_box_.top(), median_top_,
1793  good_width_, good_column_, type_,
1794  kBlobTypes[blob_type_], flow_,
1795  first_column_, last_column_, boxes_.length(),
1796  space_above_, space_below_, space_to_left_, space_to_right_);
1797 }
1798 
1799 // Prints debug information on the colors.
1801  tprintf("Colors:(%d, %d, %d)%d -> (%d, %d, %d)\n",
1802  color1_[COLOR_RED], color1_[COLOR_GREEN], color1_[COLOR_BLUE],
1803  color1_[L_ALPHA_CHANNEL],
1804  color2_[COLOR_RED], color2_[COLOR_GREEN], color2_[COLOR_BLUE]);
1805 }
1806 
1807 // Sets the types of all partitions in the run to be the max of the types.
1808 void ColPartition::SmoothPartnerRun(int working_set_count) {
1809  STATS left_stats(0, working_set_count);
1810  STATS right_stats(0, working_set_count);
1811  PolyBlockType max_type = type_;
1812  ColPartition* partner;
1813  for (partner = SingletonPartner(false); partner != nullptr;
1814  partner = partner->SingletonPartner(false)) {
1815  if (partner->type_ > max_type)
1816  max_type = partner->type_;
1817  if (column_set_ == partner->column_set_) {
1818  left_stats.add(partner->first_column_, 1);
1819  right_stats.add(partner->last_column_, 1);
1820  }
1821  }
1822  type_ = max_type;
1823  // TODO(rays) Either establish that it isn't necessary to set the columns,
1824  // or find a way to do it that does not cause an assert failure in
1825  // AddToWorkingSet.
1826 #if 0
1827  first_column_ = left_stats.mode();
1828  last_column_ = right_stats.mode();
1829  if (last_column_ < first_column_)
1830  last_column_ = first_column_;
1831 #endif
1832 
1833  for (partner = SingletonPartner(false); partner != nullptr;
1834  partner = partner->SingletonPartner(false)) {
1835  partner->type_ = max_type;
1836 #if 0 // See TODO above
1837  if (column_set_ == partner->column_set_) {
1838  partner->first_column_ = first_column_;
1839  partner->last_column_ = last_column_;
1840  }
1841 #endif
1842  }
1843 }
1844 
1845 // ======= Scenario common to all Refine*Partners* functions =======
1846 // ColPartitions are aiming to represent textlines, or horizontal slices
1847 // of images, and we are trying to form bi-directional (upper/lower) chains
1848 // of UNIQUE partner ColPartitions that can be made into blocks.
1849 // The ColPartitions have previously been typed (see SetPartitionType)
1850 // according to a combination of the content type and
1851 // how they lie on the columns. We want to chain text into
1852 // groups of a single type, but image ColPartitions may have been typed
1853 // differently in different parts of the image, due to being non-rectangular.
1854 //
1855 // We previously ran a search for upper and lower partners, but there may
1856 // be more than one, and they may be of mixed types, so now we wish to
1857 // refine the partners down to at most one.
1858 // A heading may have multiple partners:
1859 // ===============================
1860 // ======== ========== =========
1861 // ======== ========== =========
1862 // but it should be a different type.
1863 // A regular flowing text line may have multiple partners:
1864 // ================== ===================
1865 // ======= ================= ===========
1866 // This could be the start of a pull-out, or it might all be in a single
1867 // column and might be caused by tightly spaced text, bold words, bullets,
1868 // funny punctuation etc, all of which can cause textlines to be split into
1869 // multiple ColPartitions. Pullouts and figure captions should now be different
1870 // types so we can more aggressively merge groups of partners that all sit
1871 // in a single column.
1872 //
1873 // Cleans up the partners of the given type so that there is at most
1874 // one partner. This makes block creation simpler.
1875 // If get_desperate is true, goes to more desperate merge methods
1876 // to merge flowing text before breaking partnerships.
1878  ColPartitionGrid* grid) {
1879  if (TypesSimilar(type_, type)) {
1880  RefinePartnersInternal(true, get_desperate, grid);
1881  RefinePartnersInternal(false, get_desperate, grid);
1882  } else if (type == PT_COUNT) {
1883  // This is the final pass. Make sure only the correctly typed
1884  // partners surivive, however many there are.
1885  RefinePartnersByType(true, &upper_partners_);
1886  RefinePartnersByType(false, &lower_partners_);
1887  // It is possible for a merge to have given a partition multiple
1888  // partners again, so the last resort is to use overlap which is
1889  // guaranteed to leave at most one partner left.
1890  if (!upper_partners_.empty() && !upper_partners_.singleton())
1891  RefinePartnersByOverlap(true, &upper_partners_);
1892  if (!lower_partners_.empty() && !lower_partners_.singleton())
1893  RefinePartnersByOverlap(false, &lower_partners_);
1894  }
1895 }
1896 
1898 
1899 // Cleans up the partners above if upper is true, else below.
1900 // If get_desperate is true, goes to more desperate merge methods
1901 // to merge flowing text before breaking partnerships.
1902 void ColPartition::RefinePartnersInternal(bool upper, bool get_desperate,
1903  ColPartitionGrid* grid) {
1904  ColPartition_CLIST* partners = upper ? &upper_partners_ : &lower_partners_;
1905  if (!partners->empty() && !partners->singleton()) {
1906  RefinePartnersByType(upper, partners);
1907  if (!partners->empty() && !partners->singleton()) {
1908  // Check for transitive partnerships and break the cycle.
1909  RefinePartnerShortcuts(upper, partners);
1910  if (!partners->empty() && !partners->singleton()) {
1911  // Types didn't fix it. Flowing text keeps the one with the longest
1912  // sequence of singleton matching partners. All others max overlap.
1913  if (TypesSimilar(type_, PT_FLOWING_TEXT) && get_desperate) {
1914  RefineTextPartnersByMerge(upper, false, partners, grid);
1915  if (!partners->empty() && !partners->singleton())
1916  RefineTextPartnersByMerge(upper, true, partners, grid);
1917  }
1918  // The last resort is to use overlap.
1919  if (!partners->empty() && !partners->singleton())
1920  RefinePartnersByOverlap(upper, partners);
1921  }
1922  }
1923  }
1924 }
1925 
1926 // Cleans up the partners above if upper is true, else below.
1927 // Restricts the partners to only desirable types. For text and BRT_HLINE this
1928 // means the same type_ , and for image types it means any image type.
1929 void ColPartition::RefinePartnersByType(bool upper,
1930  ColPartition_CLIST* partners) {
1931  bool debug = TabFind::WithinTestRegion(2, bounding_box_.left(),
1932  bounding_box_.bottom());
1933  if (debug) {
1934  tprintf("Refining %d %s partners by type for:\n",
1935  partners->length(), upper ? "Upper" : "Lower");
1936  Print();
1937  }
1938  ColPartition_C_IT it(partners);
1939  // Purify text by type.
1940  if (!IsImageType() && !IsLineType() && type() != PT_TABLE) {
1941  // Keep only partners matching type_.
1942  // Exception: PT_VERTICAL_TEXT is allowed to stay with the other
1943  // text types if it is the only partner.
1944  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1945  ColPartition* partner = it.data();
1946  if (!TypesSimilar(type_, partner->type_)) {
1947  if (debug) {
1948  tprintf("Removing partner:");
1949  partner->Print();
1950  }
1951  partner->RemovePartner(!upper, this);
1952  it.extract();
1953  } else if (debug) {
1954  tprintf("Keeping partner:");
1955  partner->Print();
1956  }
1957  }
1958  } else {
1959  // Only polyimages are allowed to have partners of any kind!
1960  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1961  ColPartition* partner = it.data();
1962  if (partner->blob_type() != BRT_POLYIMAGE ||
1963  blob_type() != BRT_POLYIMAGE) {
1964  if (debug) {
1965  tprintf("Removing partner:");
1966  partner->Print();
1967  }
1968  partner->RemovePartner(!upper, this);
1969  it.extract();
1970  } else if (debug) {
1971  tprintf("Keeping partner:");
1972  partner->Print();
1973  }
1974  }
1975  }
1976 }
1977 
1978 // Cleans up the partners above if upper is true, else below.
1979 // Remove transitive partnerships: this<->a, and a<->b and this<->b.
1980 // Gets rid of this<->b, leaving a clean chain.
1981 // Also if we have this<->a and a<->this, then gets rid of this<->a, as
1982 // this has multiple partners.
1983 void ColPartition::RefinePartnerShortcuts(bool upper,
1984  ColPartition_CLIST* partners) {
1985  bool done_any = false;
1986  do {
1987  done_any = false;
1988  ColPartition_C_IT it(partners);
1989  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1990  ColPartition* a = it.data();
1991  // Check for a match between all of a's partners (it1/b1) and all
1992  // of this's partners (it2/b2).
1993  ColPartition_C_IT it1(upper ? &a->upper_partners_ : &a->lower_partners_);
1994  for (it1.mark_cycle_pt(); !it1.cycled_list(); it1.forward()) {
1995  ColPartition* b1 = it1.data();
1996  if (b1 == this) {
1997  done_any = true;
1998  it.extract();
1999  a->RemovePartner(!upper, this);
2000  break;
2001  }
2002  ColPartition_C_IT it2(partners);
2003  for (it2.mark_cycle_pt(); !it2.cycled_list(); it2.forward()) {
2004  ColPartition* b2 = it2.data();
2005  if (b1 == b2) {
2006  // Jackpot! b2 should not be a partner of this.
2007  it2.extract();
2008  b2->RemovePartner(!upper, this);
2009  done_any = true;
2010  // That potentially invalidated all the iterators, so break out
2011  // and start again.
2012  break;
2013  }
2014  }
2015  if (done_any)
2016  break;
2017  }
2018  if (done_any)
2019  break;
2020  }
2021  } while (done_any && !partners->empty() && !partners->singleton());
2022 }
2023 
2024 // Cleans up the partners above if upper is true, else below.
2025 // If multiple text partners can be merged, (with each other, NOT with this),
2026 // then do so.
2027 // If desperate is true, then an increase in overlap with the merge is
2028 // allowed. If the overlap increases, then the desperately_merged_ flag
2029 // is set, indicating that the textlines probably need to be regenerated
2030 // by aggressive line fitting/splitting, as there are probably vertically
2031 // joined blobs that cross textlines.
2032 void ColPartition::RefineTextPartnersByMerge(bool upper, bool desperate,
2033  ColPartition_CLIST* partners,
2034  ColPartitionGrid* grid) {
2035  bool debug = TabFind::WithinTestRegion(2, bounding_box_.left(),
2036  bounding_box_.bottom());
2037  if (debug) {
2038  tprintf("Refining %d %s partners by merge for:\n",
2039  partners->length(), upper ? "Upper" : "Lower");
2040  Print();
2041  }
2042  while (!partners->empty() && !partners->singleton()) {
2043  // Absorb will mess up the iterators, so we have to merge one partition
2044  // at a time and rebuild the iterators each time.
2045  ColPartition_C_IT it(partners);
2046  ColPartition* part = it.data();
2047  // Gather a list of merge candidates, from the list of partners, that
2048  // are all in the same single column. See general scenario comment above.
2049  ColPartition_CLIST candidates;
2050  ColPartition_C_IT cand_it(&candidates);
2051  for (it.forward(); !it.at_first(); it.forward()) {
2052  ColPartition* candidate = it.data();
2053  if (part->first_column_ == candidate->last_column_ &&
2054  part->last_column_ == candidate->first_column_)
2055  cand_it.add_after_then_move(it.data());
2056  }
2057  int overlap_increase;
2058  ColPartition* candidate = grid->BestMergeCandidate(part, &candidates, debug,
2059  nullptr, &overlap_increase);
2060  if (candidate != nullptr && (overlap_increase <= 0 || desperate)) {
2061  if (debug) {
2062  tprintf("Merging:hoverlap=%d, voverlap=%d, OLI=%d\n",
2063  part->HCoreOverlap(*candidate), part->VCoreOverlap(*candidate),
2064  overlap_increase);
2065  }
2066  // Remove before merge and re-insert to keep the integrity of the grid.
2067  grid->RemoveBBox(candidate);
2068  grid->RemoveBBox(part);
2069  part->Absorb(candidate, nullptr);
2070  // We modified the box of part, so re-insert it into the grid.
2071  grid->InsertBBox(true, true, part);
2072  if (overlap_increase > 0)
2073  part->desperately_merged_ = true;
2074  } else {
2075  break; // Can't merge.
2076  }
2077  }
2078 }
2079 
2080 // Cleans up the partners above if upper is true, else below.
2081 // Keep the partner with the biggest overlap.
2082 void ColPartition::RefinePartnersByOverlap(bool upper,
2083  ColPartition_CLIST* partners) {
2084  bool debug = TabFind::WithinTestRegion(2, bounding_box_.left(),
2085  bounding_box_.bottom());
2086  if (debug) {
2087  tprintf("Refining %d %s partners by overlap for:\n",
2088  partners->length(), upper ? "Upper" : "Lower");
2089  Print();
2090  }
2091  ColPartition_C_IT it(partners);
2092  ColPartition* best_partner = it.data();
2093  // Find the partner with the best overlap.
2094  int best_overlap = 0;
2095  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
2096  ColPartition* partner = it.data();
2097  int overlap = std::min(bounding_box_.right(), partner->bounding_box_.right())
2098  - std::max(bounding_box_.left(), partner->bounding_box_.left());
2099  if (overlap > best_overlap) {
2100  best_overlap = overlap;
2101  best_partner = partner;
2102  }
2103  }
2104  // Keep only the best partner.
2105  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
2106  ColPartition* partner = it.data();
2107  if (partner != best_partner) {
2108  if (debug) {
2109  tprintf("Removing partner:");
2110  partner->Print();
2111  }
2112  partner->RemovePartner(!upper, this);
2113  it.extract();
2114  }
2115  }
2116 }
2117 
2118 // Return true if bbox belongs better in this than other.
2119 bool ColPartition::ThisPartitionBetter(BLOBNBOX* bbox,
2120  const ColPartition& other) {
2121  const TBOX& box = bbox->bounding_box();
2122  // Margins take priority.
2123  int left = box.left();
2124  int right = box.right();
2125  if (left < left_margin_ || right > right_margin_)
2126  return false;
2127  if (left < other.left_margin_ || right > other.right_margin_)
2128  return true;
2129  int top = box.top();
2130  int bottom = box.bottom();
2131  int this_overlap = std::min(top, median_top_) - std::max(bottom, median_bottom_);
2132  int other_overlap = std::min(top, other.median_top_) -
2133  std::max(bottom, other.median_bottom_);
2134  int this_miss = median_top_ - median_bottom_ - this_overlap;
2135  int other_miss = other.median_top_ - other.median_bottom_ - other_overlap;
2136  if (TabFind::WithinTestRegion(3, box.left(), box.bottom())) {
2137  tprintf("Unique on (%d,%d)->(%d,%d) overlap %d/%d, miss %d/%d, mt=%d/%d\n",
2138  box.left(), box.bottom(), box.right(), box.top(),
2139  this_overlap, other_overlap, this_miss, other_miss,
2140  median_top_, other.median_top_);
2141  }
2142  if (this_miss < other_miss)
2143  return true;
2144  if (this_miss > other_miss)
2145  return false;
2146  if (this_overlap > other_overlap)
2147  return true;
2148  if (this_overlap < other_overlap)
2149  return false;
2150  return median_top_ >= other.median_top_;
2151 }
2152 
2153 // Returns the median line-spacing between the current position and the end
2154 // of the list.
2155 // The iterator is passed by value so the iteration does not modify the
2156 // caller's iterator.
2157 static int MedianSpacing(int page_height, ColPartition_IT it) {
2158  STATS stats(0, page_height);
2159  while (!it.cycled_list()) {
2160  ColPartition* part = it.data();
2161  it.forward();
2162  stats.add(part->bottom_spacing(), 1);
2163  stats.add(part->top_spacing(), 1);
2164  }
2165  return static_cast<int>(stats.median() + 0.5);
2166 }
2167 
2168 // Returns true if this column partition is in the same column as
2169 // part. This function will only work after the SetPartitionType function
2170 // has been called on both column partitions. This is useful for
2171 // doing a SideSearch when you want things in the same page column.
2172 //
2173 // Currently called by the table detection code to identify if potential table
2174 // partitions exist in the same column.
2176  // Overlap does not occur when last < part.first or first > part.last.
2177  // In other words, one is completely to the side of the other.
2178  // This is just DeMorgan's law applied to that so the function returns true.
2179  return (last_column_ >= part.first_column_) &&
2180  (first_column_ <= part.last_column_);
2181 }
2182 
2183 // Smoothes the spacings in the list into groups of equal linespacing.
2184 // resolution is the resolution of the original image, used as a basis
2185 // for thresholds in change of spacing. page_height is in pixels.
2186 void ColPartition::SmoothSpacings(int resolution, int page_height,
2187  ColPartition_LIST* parts) {
2188  // The task would be trivial if we didn't have to allow for blips -
2189  // occasional offsets in spacing caused by anomalous text, such as all
2190  // caps, groups of descenders, joined words, Arabic etc.
2191  // The neighbourhood stores a consecutive group of partitions so that
2192  // blips can be detected correctly, yet conservatively enough to not
2193  // mistake genuine spacing changes for blips. See example below.
2194  ColPartition* neighbourhood[PN_COUNT];
2195  ColPartition_IT it(parts);
2196  it.mark_cycle_pt();
2197  // Although we know nothing about the spacings is this list, the median is
2198  // used as an approximation to allow blips.
2199  // If parts of this block aren't spaced to the median, then we can't
2200  // accept blips in those parts, but we'll recalculate it each time we
2201  // split the block, so the median becomes more likely to match all the text.
2202  int median_space = MedianSpacing(page_height, it);
2203  ColPartition_IT start_it(it);
2204  ColPartition_IT end_it(it);
2205  for (int i = 0; i < PN_COUNT; ++i) {
2206  if (i < PN_UPPER || it.cycled_list()) {
2207  neighbourhood[i] = nullptr;
2208  } else {
2209  if (i == PN_LOWER)
2210  end_it = it;
2211  neighbourhood[i] = it.data();
2212  it.forward();
2213  }
2214  }
2215  while (neighbourhood[PN_UPPER] != nullptr) {
2216  // Test for end of a group. Normally SpacingsEqual is true within a group,
2217  // but in the case of a blip, it will be false. Here is an example:
2218  // Line enum Spacing below (spacing between tops of lines)
2219  // 1 ABOVE2 20
2220  // 2 ABOVE1 20
2221  // 3 UPPER 15
2222  // 4 LOWER 25
2223  // 5 BELOW1 20
2224  // 6 BELOW2 20
2225  // Line 4 is all in caps (regular caps), so the spacing between line 3
2226  // and line 4 (looking at the tops) is smaller than normal, and the
2227  // spacing between line 4 and line 5 is larger than normal, but the
2228  // two of them add to twice the normal spacing.
2229  // The following if has to accept unequal spacings 3 times to pass the
2230  // blip (20/15, 15/25 and 25/20)
2231  // When the blip is in the middle, OKSpacingBlip tests that one of
2232  // ABOVE1 and BELOW1 matches the median.
2233  // The first time, everything is shifted down 1, so we present
2234  // OKSpacingBlip with neighbourhood+1 and check that PN_UPPER is median.
2235  // The last time, everything is shifted up 1, so we present OKSpacingBlip
2236  // with neighbourhood-1 and check that PN_LOWER matches the median.
2237  if (neighbourhood[PN_LOWER] == nullptr ||
2238  (!neighbourhood[PN_UPPER]->SpacingsEqual(*neighbourhood[PN_LOWER],
2239  resolution) &&
2240  !OKSpacingBlip(resolution, median_space, neighbourhood) &&
2241  (!OKSpacingBlip(resolution, median_space, neighbourhood - 1) ||
2242  !neighbourhood[PN_LOWER]->SpacingEqual(median_space, resolution)) &&
2243  (!OKSpacingBlip(resolution, median_space, neighbourhood + 1) ||
2244  !neighbourhood[PN_UPPER]->SpacingEqual(median_space, resolution)))) {
2245  // The group has ended. PN_UPPER is the last member.
2246  // Compute the mean spacing over the group.
2247  ColPartition_IT sum_it(start_it);
2248  ColPartition* last_part = neighbourhood[PN_UPPER];
2249  double total_bottom = 0.0;
2250  double total_top = 0.0;
2251  int total_count = 0;
2252  ColPartition* upper = sum_it.data();
2253  // We do not process last_part, as its spacing is different.
2254  while (upper != last_part) {
2255  total_bottom += upper->bottom_spacing();
2256  total_top += upper->top_spacing();
2257  ++total_count;
2258  sum_it.forward();
2259  upper = sum_it.data();
2260  }
2261  if (total_count > 0) {
2262  // There were at least 2 lines, so set them all to the mean.
2263  int top_spacing = static_cast<int>(total_top / total_count + 0.5);
2264  int bottom_spacing = static_cast<int>(total_bottom / total_count + 0.5);
2265  if (textord_debug_tabfind) {
2266  tprintf("Spacing run ended. Cause:");
2267  if (neighbourhood[PN_LOWER] == nullptr) {
2268  tprintf("No more lines\n");
2269  } else {
2270  tprintf("Spacing change. Spacings:\n");
2271  for (int i = 0; i < PN_COUNT; ++i) {
2272  if (neighbourhood[i] == nullptr) {
2273  tprintf("NULL");
2274  if (i > 0 && neighbourhood[i - 1] != nullptr) {
2275  if (neighbourhood[i - 1]->SingletonPartner(false) != nullptr) {
2276  tprintf(" Lower partner:");
2277  neighbourhood[i - 1]->SingletonPartner(false)->Print();
2278  } else {
2279  tprintf(" nullptr lower partner:\n");
2280  }
2281  } else {
2282  tprintf("\n");
2283  }
2284  } else {
2285  tprintf("Top = %d, bottom = %d\n",
2286  neighbourhood[i]->top_spacing(),
2287  neighbourhood[i]->bottom_spacing());
2288  }
2289  }
2290  }
2291  tprintf("Mean spacing = %d/%d\n", top_spacing, bottom_spacing);
2292  }
2293  sum_it = start_it;
2294  upper = sum_it.data();
2295  while (upper != last_part) {
2296  upper->set_top_spacing(top_spacing);
2297  upper->set_bottom_spacing(bottom_spacing);
2298  if (textord_debug_tabfind) {
2299  tprintf("Setting mean on:");
2300  upper->Print();
2301  }
2302  sum_it.forward();
2303  upper = sum_it.data();
2304  }
2305  }
2306  // PN_LOWER starts the next group and end_it is the next start_it.
2307  start_it = end_it;
2308  // Recalculate the median spacing to maximize the chances of detecting
2309  // spacing blips.
2310  median_space = MedianSpacing(page_height, end_it);
2311  }
2312  // Shuffle pointers.
2313  for (int j = 1; j < PN_COUNT; ++j) {
2314  neighbourhood[j - 1] = neighbourhood[j];
2315  }
2316  if (it.cycled_list()) {
2317  neighbourhood[PN_COUNT - 1] = nullptr;
2318  } else {
2319  neighbourhood[PN_COUNT - 1] = it.data();
2320  it.forward();
2321  }
2322  end_it.forward();
2323  }
2324 }
2325 
2326 // Returns true if the parts array of pointers to partitions matches the
2327 // condition for a spacing blip. See SmoothSpacings for what this means
2328 // and how it is used.
2329 bool ColPartition::OKSpacingBlip(int resolution, int median_spacing,
2330  ColPartition** parts) {
2331  if (parts[PN_UPPER] == nullptr || parts[PN_LOWER] == nullptr)
2332  return false;
2333  // The blip is OK if upper and lower sum to an OK value and at least
2334  // one of above1 and below1 is equal to the median.
2335  return parts[PN_UPPER]->SummedSpacingOK(*parts[PN_LOWER],
2336  median_spacing, resolution) &&
2337  ((parts[PN_ABOVE1] != nullptr &&
2338  parts[PN_ABOVE1]->SpacingEqual(median_spacing, resolution)) ||
2339  (parts[PN_BELOW1] != nullptr &&
2340  parts[PN_BELOW1]->SpacingEqual(median_spacing, resolution)));
2341 }
2342 
2343 // Returns true if both the top and bottom spacings of this match the given
2344 // spacing to within suitable margins dictated by the image resolution.
2345 bool ColPartition::SpacingEqual(int spacing, int resolution) const {
2346  int bottom_error = BottomSpacingMargin(resolution);
2347  int top_error = TopSpacingMargin(resolution);
2348  return NearlyEqual(bottom_spacing_, spacing, bottom_error) &&
2349  NearlyEqual(top_spacing_, spacing, top_error);
2350 }
2351 
2352 // Returns true if both the top and bottom spacings of this and other
2353 // match to within suitable margins dictated by the image resolution.
2354 bool ColPartition::SpacingsEqual(const ColPartition& other,
2355  int resolution) const {
2356  int bottom_error = std::max(BottomSpacingMargin(resolution),
2357  other.BottomSpacingMargin(resolution));
2358  int top_error = std::max(TopSpacingMargin(resolution),
2359  other.TopSpacingMargin(resolution));
2360  return NearlyEqual(bottom_spacing_, other.bottom_spacing_, bottom_error) &&
2361  (NearlyEqual(top_spacing_, other.top_spacing_, top_error) ||
2362  NearlyEqual(top_spacing_ + other.top_spacing_, bottom_spacing_ * 2,
2363  bottom_error));
2364 }
2365 
2366 // Returns true if the sum spacing of this and other match the given
2367 // spacing (or twice the given spacing) to within a suitable margin dictated
2368 // by the image resolution.
2369 bool ColPartition::SummedSpacingOK(const ColPartition& other,
2370  int spacing, int resolution) const {
2371  int bottom_error = std::max(BottomSpacingMargin(resolution),
2372  other.BottomSpacingMargin(resolution));
2373  int top_error = std::max(TopSpacingMargin(resolution),
2374  other.TopSpacingMargin(resolution));
2375  int bottom_total = bottom_spacing_ + other.bottom_spacing_;
2376  int top_total = top_spacing_ + other.top_spacing_;
2377  return (NearlyEqual(spacing, bottom_total, bottom_error) &&
2378  NearlyEqual(spacing, top_total, top_error)) ||
2379  (NearlyEqual(spacing * 2, bottom_total, bottom_error) &&
2380  NearlyEqual(spacing * 2, top_total, top_error));
2381 }
2382 
2383 // Returns a suitable spacing margin that can be applied to bottoms of
2384 // text lines, based on the resolution and the stored side_step_.
2385 int ColPartition::BottomSpacingMargin(int resolution) const {
2386  return static_cast<int>(kMaxSpacingDrift * resolution + 0.5) + side_step_;
2387 }
2388 
2389 // Returns a suitable spacing margin that can be applied to tops of
2390 // text lines, based on the resolution and the stored side_step_.
2391 int ColPartition::TopSpacingMargin(int resolution) const {
2392  return static_cast<int>(kMaxTopSpacingFraction * median_height_ + 0.5) +
2393  BottomSpacingMargin(resolution);
2394 }
2395 
2396 // Returns true if the median text sizes of this and other agree to within
2397 // a reasonable multiplicative factor.
2398 bool ColPartition::SizesSimilar(const ColPartition& other) const {
2399  return median_height_ <= other.median_height_ * kMaxSizeRatio &&
2400  other.median_height_ <= median_height_ * kMaxSizeRatio;
2401 }
2402 
2403 // Helper updates margin_left and margin_right, being the bounds of the left
2404 // margin of part of a block. Returns false and does not update the bounds if
2405 // this partition has a disjoint margin with the established margin.
2406 static bool UpdateLeftMargin(const ColPartition& part,
2407  int* margin_left, int* margin_right) {
2408  const TBOX& part_box = part.bounding_box();
2409  int top = part_box.top();
2410  int bottom = part_box.bottom();
2411  int tl_key = part.SortKey(part.left_margin(), top);
2412  int tr_key = part.SortKey(part_box.left(), top);
2413  int bl_key = part.SortKey(part.left_margin(), bottom);
2414  int br_key = part.SortKey(part_box.left(), bottom);
2415  int left_key = std::max(tl_key, bl_key);
2416  int right_key = std::min(tr_key, br_key);
2417  if (left_key <= *margin_right && right_key >= *margin_left) {
2418  // This part is good - let's keep it.
2419  *margin_right = std::min(*margin_right, right_key);
2420  *margin_left = std::max(*margin_left, left_key);
2421  return true;
2422  }
2423  return false;
2424 }
2425 
2426 // Computes and returns in start, end a line segment formed from a
2427 // forwards-iterated group of left edges of partitions that satisfy the
2428 // condition that the intersection of the left margins is non-empty, ie the
2429 // rightmost left margin is to the left of the leftmost left bounding box edge.
2430 // On return the iterator is set to the start of the next run.
2431 void ColPartition::LeftEdgeRun(ColPartition_IT* part_it,
2432  ICOORD* start, ICOORD* end) {
2433  ColPartition* part = part_it->data();
2434  ColPartition* start_part = part;
2435  int start_y = part->bounding_box_.top();
2436  if (!part_it->at_first()) {
2437  int prev_bottom = part_it->data_relative(-1)->bounding_box_.bottom();
2438  if (prev_bottom < start_y)
2439  start_y = prev_bottom;
2440  else if (prev_bottom > start_y)
2441  start_y = (start_y + prev_bottom) / 2;
2442  }
2443  int end_y = part->bounding_box_.bottom();
2444  int margin_right = INT32_MAX;
2445  int margin_left = -INT32_MAX;
2446  UpdateLeftMargin(*part, &margin_left, &margin_right);
2447  do {
2448  part_it->forward();
2449  part = part_it->data();
2450  } while (!part_it->at_first() &&
2451  UpdateLeftMargin(*part, &margin_left, &margin_right));
2452  // The run ended. If we were pushed inwards, compute the next run and
2453  // extend it backwards into the run we just calculated to find the end of
2454  // this run that provides a tight box.
2455  int next_margin_right = INT32_MAX;
2456  int next_margin_left = -INT32_MAX;
2457  UpdateLeftMargin(*part, &next_margin_left, &next_margin_right);
2458  if (next_margin_left > margin_right) {
2459  ColPartition_IT next_it(*part_it);
2460  do {
2461  next_it.forward();
2462  part = next_it.data();
2463  } while (!next_it.at_first() &&
2464  UpdateLeftMargin(*part, &next_margin_left, &next_margin_right));
2465  // Now extend the next run backwards into the original run to get the
2466  // tightest fit.
2467  do {
2468  part_it->backward();
2469  part = part_it->data();
2470  } while (part != start_part &&
2471  UpdateLeftMargin(*part, &next_margin_left, &next_margin_right));
2472  part_it->forward();
2473  }
2474  // Now calculate the end_y.
2475  part = part_it->data_relative(-1);
2476  end_y = part->bounding_box_.bottom();
2477  if (!part_it->at_first() && part_it->data()->bounding_box_.top() < end_y)
2478  end_y = (end_y + part_it->data()->bounding_box_.top()) / 2;
2479  start->set_y(start_y);
2480  start->set_x(part->XAtY(margin_right, start_y));
2481  end->set_y(end_y);
2482  end->set_x(part->XAtY(margin_right, end_y));
2483  if (textord_debug_tabfind && !part_it->at_first())
2484  tprintf("Left run from y=%d to %d terminated with sum %d-%d, new %d-%d\n",
2485  start_y, end_y, part->XAtY(margin_left, end_y),
2486  end->x(), part->left_margin_, part->bounding_box_.left());
2487 }
2488 
2489 // Helper updates margin_left and margin_right, being the bounds of the right
2490 // margin of part of a block. Returns false and does not update the bounds if
2491 // this partition has a disjoint margin with the established margin.
2492 static bool UpdateRightMargin(const ColPartition& part,
2493  int* margin_left, int* margin_right) {
2494  const TBOX& part_box = part.bounding_box();
2495  int top = part_box.top();
2496  int bottom = part_box.bottom();
2497  int tl_key = part.SortKey(part_box.right(), top);
2498  int tr_key = part.SortKey(part.right_margin(), top);
2499  int bl_key = part.SortKey(part_box.right(), bottom);
2500  int br_key = part.SortKey(part.right_margin(), bottom);
2501  int left_key = std::max(tl_key, bl_key);
2502  int right_key = std::min(tr_key, br_key);
2503  if (left_key <= *margin_right && right_key >= *margin_left) {
2504  // This part is good - let's keep it.
2505  *margin_right = std::min(*margin_right, right_key);
2506  *margin_left = std::max(*margin_left, left_key);
2507  return true;
2508  }
2509  return false;
2510 }
2511 
2512 // Computes and returns in start, end a line segment formed from a
2513 // backwards-iterated group of right edges of partitions that satisfy the
2514 // condition that the intersection of the right margins is non-empty, ie the
2515 // leftmost right margin is to the right of the rightmost right bounding box
2516 // edge.
2517 // On return the iterator is set to the start of the next run.
2518 void ColPartition::RightEdgeRun(ColPartition_IT* part_it,
2519  ICOORD* start, ICOORD* end) {
2520  ColPartition* part = part_it->data();
2521  ColPartition* start_part = part;
2522  int start_y = part->bounding_box_.bottom();
2523  if (!part_it->at_last()) {
2524  int next_y = part_it->data_relative(1)->bounding_box_.top();
2525  if (next_y > start_y)
2526  start_y = next_y;
2527  else if (next_y < start_y)
2528  start_y = (start_y + next_y) / 2;
2529  }
2530  int end_y = part->bounding_box_.top();
2531  int margin_right = INT32_MAX;
2532  int margin_left = -INT32_MAX;
2533  UpdateRightMargin(*part, &margin_left, &margin_right);
2534  do {
2535  part_it->backward();
2536  part = part_it->data();
2537  } while (!part_it->at_last() &&
2538  UpdateRightMargin(*part, &margin_left, &margin_right));
2539  // The run ended. If we were pushed inwards, compute the next run and
2540  // extend it backwards to find the end of this run for a tight box.
2541  int next_margin_right = INT32_MAX;
2542  int next_margin_left = -INT32_MAX;
2543  UpdateRightMargin(*part, &next_margin_left, &next_margin_right);
2544  if (next_margin_right < margin_left) {
2545  ColPartition_IT next_it(*part_it);
2546  do {
2547  next_it.backward();
2548  part = next_it.data();
2549  } while (!next_it.at_last() &&
2550  UpdateRightMargin(*part, &next_margin_left,
2551  &next_margin_right));
2552  // Now extend the next run forwards into the original run to get the
2553  // tightest fit.
2554  do {
2555  part_it->forward();
2556  part = part_it->data();
2557  } while (part != start_part &&
2558  UpdateRightMargin(*part, &next_margin_left,
2559  &next_margin_right));
2560  part_it->backward();
2561  }
2562  // Now calculate the end_y.
2563  part = part_it->data_relative(1);
2564  end_y = part->bounding_box().top();
2565  if (!part_it->at_last() &&
2566  part_it->data()->bounding_box_.bottom() > end_y)
2567  end_y = (end_y + part_it->data()->bounding_box_.bottom()) / 2;
2568  start->set_y(start_y);
2569  start->set_x(part->XAtY(margin_left, start_y));
2570  end->set_y(end_y);
2571  end->set_x(part->XAtY(margin_left, end_y));
2572  if (textord_debug_tabfind && !part_it->at_last())
2573  tprintf("Right run from y=%d to %d terminated with sum %d-%d, new %d-%d\n",
2574  start_y, end_y, end->x(), part->XAtY(margin_right, end_y),
2575  part->bounding_box_.right(), part->right_margin_);
2576 }
2577 
2578 } // namespace tesseract.
@ PT_VERT_LINE
Definition: capi.h:142
@ PT_PULLOUT_TEXT
Definition: capi.h:132
@ PT_COUNT
Definition: capi.h:144
@ PT_HEADING_TEXT
Definition: capi.h:131
@ PT_TABLE
Definition: capi.h:135
@ PT_NOISE
Definition: capi.h:143
@ PT_PULLOUT_IMAGE
Definition: capi.h:140
@ PT_HEADING_IMAGE
Definition: capi.h:139
@ PT_FLOWING_TEXT
Definition: capi.h:130
@ PT_UNKNOWN
Definition: capi.h:129
@ PT_HORZ_LINE
Definition: capi.h:141
@ PT_VERTICAL_TEXT
Definition: capi.h:136
@ PT_FLOWING_IMAGE
Definition: capi.h:138
BlobSpecialTextType
Definition: blobbox.h:96
@ BSTT_COUNT
Definition: blobbox.h:103
bool DominatesInMerge(BlobTextFlowType type1, BlobTextFlowType type2)
Definition: blobbox.h:129
BlobTextFlowType
Definition: blobbox.h:114
@ BTFT_LEADER
Definition: blobbox.h:121
@ BTFT_NONE
Definition: blobbox.h:115
@ BTFT_CHAIN
Definition: blobbox.h:118
@ BTFT_STRONG_CHAIN
Definition: blobbox.h:119
@ BTFT_NEIGHBOURS
Definition: blobbox.h:117
@ BTFT_NONTEXT
Definition: blobbox.h:116
BlobRegionType
Definition: blobbox.h:72
@ BRT_RECTIMAGE
Definition: blobbox.h:76
@ BRT_COUNT
Definition: blobbox.h:82
@ BRT_POLYIMAGE
Definition: blobbox.h:77
@ BRT_TEXT
Definition: blobbox.h:80
@ BRT_HLINE
Definition: blobbox.h:74
@ BRT_VLINE
Definition: blobbox.h:75
@ BRT_UNKNOWN
Definition: blobbox.h:78
@ BRT_NOISE
Definition: blobbox.h:73
@ BRT_VERT_TEXT
Definition: blobbox.h:79
CLISTIZE(BLOCK_RES) ELISTIZE(ROW_RES) ELISTIZE(WERD_RES) static const double kStopperAmbiguityThresholdGain
PolyBlockType
Definition: publictypes.h:53
#define ELIST2IZE(CLASSNAME)
Definition: elst2.h:939
#define ASSERT_HOST(x)
Definition: errcode.h:88
void UpdateRange(const T1 &x, T2 *lower_bound, T2 *upper_bound)
Definition: helpers.h:120
bool NearlyEqual(T x, T y, T tolerance)
Definition: host.h:37
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
int count(LIST var_list)
Definition: oldlist.cpp:95
int textord_debug_bugs
Definition: alignedblob.cpp:28
int textord_debug_tabfind
Definition: alignedblob.cpp:27
const double kMaxLeaderGapFractionOfMin
const int kColumnWidthFactor
Definition: tabfind.h:42
const int kMinChainTextValue
const double kMaxSizeRatio
const int kHorzStrongTextlineCount
const int kMaxColorDistance
const int kHorzStrongTextlineHeight
const double kMinBaselineCoverage
const double kMaxBaselineError
const int kHorzStrongTextlineAspect
const int kMinLeaderCount
const double kMaxSameBlockLineSpacing
const double kMaxTopSpacingFraction
const int kMaxRMSColorNoise
const int kMinStrongTextValue
const double kMaxSpacingDrift
const double kMaxLeaderGapFractionOfMax
int NoisyNeighbours() const
Definition: blobbox.cpp:237
BlobRegionType region_type() const
Definition: blobbox.h:283
C_BLOB * cblob() const
Definition: blobbox.h:268
static ScrollView::Color TextlineColor(BlobRegionType region_type, BlobTextFlowType flow_type)
Definition: blobbox.cpp:444
void set_flow(BlobTextFlowType value)
Definition: blobbox.h:298
BlobSpecialTextType special_text_type() const
Definition: blobbox.h:289
bool IsDiacritic() const
Definition: blobbox.h:380
void set_region_type(BlobRegionType new_type)
Definition: blobbox.h:286
tesseract::ColPartition * owner() const
Definition: blobbox.h:352
int base_char_bottom() const
Definition: blobbox.h:386
int base_char_top() const
Definition: blobbox.h:383
int GoodTextBlob() const
Definition: blobbox.cpp:226
BlobTextFlowType flow() const
Definition: blobbox.h:295
const TBOX & bounding_box() const
Definition: blobbox.h:230
void set_owner(tesseract::ColPartition *new_owner)
Definition: blobbox.h:355
void add_blob(BLOBNBOX *blob, float top, float bottom, float row_size)
Definition: blobbox.cpp:733
BLOCK * block
Definition: blobbox.h:777
void Add(const ICOORD &pt)
Definition: detlinefit.cpp:51
double Fit(ICOORD *pt1, ICOORD *pt2)
Definition: detlinefit.h:75
int total_cost() const
Definition: dppoint.h:72
static DPPoint * Solve(int min_step, int max_step, bool debug, CostFunc cost_func, int size, DPPoint *points)
Definition: dppoint.cpp:31
int64_t CostWithVariance(const DPPoint *prev)
Definition: dppoint.cpp:69
Definition: ocrblock.h:31
PDBLK pdblk
Page Description Block.
Definition: ocrblock.h:190
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
Definition: pdblock.h:59
void set_poly_block(POLY_BLOCK *blk)
set the poly block
Definition: pdblock.h:57
POLY_BLOCK * poly_block() const
Definition: pdblock.h:55
integer coordinate
Definition: points.h:32
void set_x(int16_t xin)
rewrite function
Definition: points.h:61
int16_t y() const
access_function
Definition: points.h:56
void set_y(int16_t yin)
rewrite function
Definition: points.h:65
int16_t x() const
access function
Definition: points.h:52
bool IsText() const
Definition: polyblk.h:49
static ScrollView::Color ColorForPolyBlockType(PolyBlockType type)
Returns a color to draw the given type.
Definition: polyblk.cpp:393
Definition: rect.h:34
void set_right(int x)
Definition: rect.h:82
int16_t top() const
Definition: rect.h:58
void print() const
Definition: rect.h:278
void set_bottom(int y)
Definition: rect.h:68
int16_t width() const
Definition: rect.h:115
int32_t area() const
Definition: rect.h:122
int16_t height() const
Definition: rect.h:108
bool overlap(const TBOX &box) const
Definition: rect.h:355
void set_top(int y)
Definition: rect.h:61
int16_t left() const
Definition: rect.h:72
int16_t bottom() const
Definition: rect.h:65
void set_left(int x)
Definition: rect.h:75
int16_t right() const
Definition: rect.h:79
Definition: statistc.h:31
void add(int32_t value, int32_t count)
Definition: statistc.cpp:93
double median() const
Definition: statistc.cpp:231
double ile(double frac) const
Definition: statistc.cpp:166
int32_t mode() const
Definition: statistc.cpp:107
static C_BLOB * FakeBlob(const TBOX &box)
Definition: stepblob.cpp:241
C_OUTLINE_LIST * out_list()
Definition: stepblob.h:70
virtual R Run(A1)=0
static bool WithinTestRegion(int detail_level, int x, int y)
bool IsImageType() const
Definition: colpartition.h:430
void SetSpecialBlobsDensity(const BlobSpecialTextType type, const float density)
BlobTextFlowType flow() const
Definition: colpartition.h:155
static void LineSpacingBlocks(const ICOORD &bleft, const ICOORD &tright, int resolution, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts, BLOCK_LIST *completed_blocks, TO_BLOCK_LIST *to_blocks)
bool MatchingStrokeWidth(const ColPartition &other, double fractional_tolerance, double constant_tolerance) const
float SpecialBlobsDensity(const BlobSpecialTextType type) const
static ColPartition * FakePartition(const TBOX &box, PolyBlockType block_type, BlobRegionType blob_type, BlobTextFlowType flow)
static TO_BLOCK * MakeBlock(const ICOORD &bleft, const ICOORD &tright, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts)
TBOX BoundsWithoutBox(BLOBNBOX *box)
int SpecialBlobsCount(const BlobSpecialTextType type)
PolyBlockType type() const
Definition: colpartition.h:182
static ColPartition * MakeBigPartition(BLOBNBOX *box, ColPartition_LIST *big_part_list)
void set_side_step(int step)
Definition: colpartition.h:218
ColPartition * CopyButDontOwnBlobs()
ColPartition * SplitAt(int split_x)
void set_right_margin(int margin)
Definition: colpartition.h:122
void AddToWorkingSet(const ICOORD &bleft, const ICOORD &tright, int resolution, ColPartition_LIST *used_parts, WorkingPartSet_LIST *working_set)
void RefinePartners(PolyBlockType type, bool get_desperate, ColPartitionGrid *grid)
BLOBNBOX_CLIST * boxes()
Definition: colpartition.h:188
void SetRightTab(const TabVector *tab_vector)
static ColPartition * MakeLinePartition(BlobRegionType blob_type, const ICOORD &vertical, int left, int bottom, int right, int top)
int bottom_spacing() const
Definition: colpartition.h:221
void SetRegionAndFlowTypesFromProjectionValue(int value)
void set_left_margin(int margin)
Definition: colpartition.h:116
bool IsPulloutType() const
Definition: colpartition.h:438
bool VSignificantCoreOverlap(const ColPartition &other) const
Definition: colpartition.h:391
BlobRegionType blob_type() const
Definition: colpartition.h:149
void ColumnRange(int resolution, ColPartitionSet *columns, int *first_col, int *last_col)
void AddBox(BLOBNBOX *box)
void SmoothPartnerRun(int working_set_count)
BLOBNBOX * OverlapSplitBlob(const TBOX &box)
int VCoreOverlap(const ColPartition &other) const
Definition: colpartition.h:376
int XAtY(int sort_key, int y) const
Definition: colpartition.h:321
static TO_BLOCK * MakeVerticalTextBlock(const ICOORD &bleft, const ICOORD &tright, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts)
bool IsVerticalType() const
Definition: colpartition.h:442
void SetLeftTab(const TabVector *tab_vector)
ScrollView::Color BoxColor() const
int CountOverlappingBoxes(const TBOX &box)
bool MatchingTextColor(const ColPartition &other) const
bool OKDiacriticMerge(const ColPartition &candidate, bool debug) const
bool OKMergeOverlap(const ColPartition &merge1, const ColPartition &merge2, int ok_box_overlap, bool debug)
void SetColumnGoodness(WidthCallback *cb)
void set_owns_blobs(bool owns_blobs)
Definition: colpartition.h:295
bool MatchingSizes(const ColPartition &other) const
void set_type(PolyBlockType t)
Definition: colpartition.h:185
int LeftAtY(int y) const
Definition: colpartition.h:341
void RemovePartner(bool upper, ColPartition *partner)
static bool TypesSimilar(PolyBlockType type1, PolyBlockType type2)
Definition: colpartition.h:419
ColPartition * ShallowCopy() const
PolyBlockType PartitionType(ColumnSpanningType flow) const
bool IsInSameColumnAs(const ColPartition &part) const
void CopyRightTab(const ColPartition &src, bool take_box)
ColPartition * SplitAtBlob(BLOBNBOX *split_blob)
ColPartition * SingletonPartner(bool upper)
bool ConfirmNoTabViolation(const ColPartition &other) const
void set_bottom_spacing(int spacing)
Definition: colpartition.h:224
const TBOX & bounding_box() const
Definition: colpartition.h:110
void SetPartitionType(int resolution, ColPartitionSet *columns)
void set_top_spacing(int spacing)
Definition: colpartition.h:230
int RightAtY(int y) const
Definition: colpartition.h:345
void set_block_owned(bool owned)
Definition: colpartition.h:209
static int SortByBBox(const void *p1, const void *p2)
Definition: colpartition.h:715
bool MatchingColumns(const ColPartition &other) const
void Absorb(ColPartition *other, WidthCallback *cb)
void RemoveBox(BLOBNBOX *box)
void AddPartner(bool upper, ColPartition *partner)
void set_flow(BlobTextFlowType f)
Definition: colpartition.h:158
void CopyLeftTab(const ColPartition &src, bool take_box)
ColumnSpanningType SpanningType(int resolution, int left, int right, int height, int y, int left_margin, int right_margin, int *first_col, int *last_col, int *first_spanned_col)
static double ColorDistanceFromLine(const uint8_t *line1, const uint8_t *line2, const uint8_t *point)
Definition: imagefind.cpp:355
static bool DifferentSizes(int size1, int size2)
Definition: tabfind.cpp:407
int sort_key() const
Definition: tabvector.h:157
void AddPartition(ColPartition *part)
void ExtractCompletedBlocks(const ICOORD &bleft, const ICOORD &tright, int resolution, ColPartition_LIST *used_parts, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
void InsertCompletedBlocks(BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)