tesseract  4.1.1
split.cpp
Go to the documentation of this file.
1 /* -*-C-*-
2  ********************************************************************************
3  *
4  * File: split.cpp (Formerly split.c)
5  * Author: Mark Seaman, OCR Technology
6  *
7  * (c) Copyright 1987, Hewlett-Packard Company.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  *************************************************************************/
19 /*----------------------------------------------------------------------
20  I n c l u d e s
21 ----------------------------------------------------------------------*/
22 // Include automatically generated configuration file if running autoconf.
23 #ifdef HAVE_CONFIG_H
24 #include "config_auto.h"
25 #endif
26 
27 #include "split.h"
28 #include "coutln.h"
29 #include "tprintf.h"
30 
31 #include <algorithm>
32 
33 /*----------------------------------------------------------------------
34  V a r i a b l e s
35 ----------------------------------------------------------------------*/
36 // Limit on the amount of penalty for the chop being off-center.
37 const int kCenterGradeCap = 25;
38 // Ridiculously large priority for splits that are no use.
39 const double kBadPriority = 999.0;
40 
41 BOOL_VAR(wordrec_display_splits, 0, "Display splits");
42 
43 // Returns the bounding box of all the points in the split.
45  return TBOX(
46  std::min(point1->pos.x, point2->pos.x), std::min(point1->pos.y, point2->pos.y),
47  std::max(point1->pos.x, point2->pos.x), std::max(point1->pos.y, point2->pos.y));
48 }
49 
50 // Hides the SPLIT so the outlines appear not to be cut by it.
51 void SPLIT::Hide() const {
52  EDGEPT* edgept = point1;
53  do {
54  edgept->Hide();
55  edgept = edgept->next;
56  } while (!edgept->EqualPos(*point2) && edgept != point1);
57  edgept = point2;
58  do {
59  edgept->Hide();
60  edgept = edgept->next;
61  } while (!edgept->EqualPos(*point1) && edgept != point2);
62 }
63 
64 // Undoes hide, so the outlines are cut by the SPLIT.
65 void SPLIT::Reveal() const {
66  EDGEPT* edgept = point1;
67  do {
68  edgept->Reveal();
69  edgept = edgept->next;
70  } while (!edgept->EqualPos(*point2) && edgept != point1);
71  edgept = point2;
72  do {
73  edgept->Reveal();
74  edgept = edgept->next;
75  } while (!edgept->EqualPos(*point1) && edgept != point2);
76 }
77 
78 // Compute a split priority based on the bounding boxes of the parts.
79 // The arguments here are config parameters defined in Wordrec. Add chop_
80 // to the beginning of the name.
81 float SPLIT::FullPriority(int xmin, int xmax, double overlap_knob,
82  int centered_maxwidth, double center_knob,
83  double width_change_knob) const {
84  TBOX box1 = Box12();
85  TBOX box2 = Box21();
86  int min_left = std::min(box1.left(), box2.left());
87  int max_right = std::max(box1.right(), box2.right());
88  if (xmin < min_left && xmax > max_right) return kBadPriority;
89 
90  float grade = 0.0f;
91  // grade_overlap.
92  int width1 = box1.width();
93  int width2 = box2.width();
94  int min_width = std::min(width1, width2);
95  int overlap = -box1.x_gap(box2);
96  if (overlap == min_width) {
97  grade += 100.0f; // Total overlap.
98  } else {
99  if (2 * overlap > min_width) overlap += 2 * overlap - min_width;
100  if (overlap > 0) grade += overlap_knob * overlap;
101  }
102  // grade_center_of_blob.
103  if (width1 <= centered_maxwidth || width2 <= centered_maxwidth) {
104  grade += std::min(static_cast<double>(kCenterGradeCap), center_knob * abs(width1 - width2));
105  }
106  // grade_width_change.
107  float width_change_grade = 20 - (max_right - min_left - std::max(width1, width2));
108  if (width_change_grade > 0.0f)
109  grade += width_change_grade * width_change_knob;
110  return grade;
111 }
112 
113 // Returns true if *this SPLIT appears OK in the sense that it does not cross
114 // any outlines and does not chop off any ridiculously small pieces.
115 bool SPLIT::IsHealthy(const TBLOB& blob, int min_points, int min_area) const {
116  return !IsLittleChunk(min_points, min_area) &&
118 }
119 
120 // Returns true if the split generates a small chunk in terms of either area
121 // or number of points.
122 bool SPLIT::IsLittleChunk(int min_points, int min_area) const {
123  if (point1->ShortNonCircularSegment(min_points, point2) &&
124  point1->SegmentArea(point2) < min_area) {
125  return true;
126  }
127  if (point2->ShortNonCircularSegment(min_points, point1) &&
128  point2->SegmentArea(point1) < min_area) {
129  return true;
130  }
131  return false;
132 }
133 
134 /**********************************************************************
135  * make_edgept
136  *
137  * Create an EDGEPT and hook it into an existing list of edge points.
138  **********************************************************************/
139 EDGEPT *make_edgept(int x, int y, EDGEPT *next, EDGEPT *prev) {
140  EDGEPT *this_edgept;
141  /* Create point */
142  this_edgept = new EDGEPT;
143  this_edgept->pos.x = x;
144  this_edgept->pos.y = y;
145  // Now deal with the src_outline steps.
146  C_OUTLINE* prev_ol = prev->src_outline;
147  if (prev_ol != nullptr && prev->next == next) {
148  // Compute the fraction of the segment that is being cut.
149  FCOORD segment_vec(next->pos.x - prev->pos.x, next->pos.y - prev->pos.y);
150  FCOORD target_vec(x - prev->pos.x, y - prev->pos.y);
151  double cut_fraction = target_vec.length() / segment_vec.length();
152  // Get the start and end at the step level.
153  ICOORD step_start = prev_ol->position_at_index(prev->start_step);
154  int end_step = prev->start_step + prev->step_count;
155  int step_length = prev_ol->pathlength();
156  ICOORD step_end = prev_ol->position_at_index(end_step % step_length);
157  ICOORD step_vec = step_end - step_start;
158  double target_length = step_vec.length() * cut_fraction;
159  // Find the point on the segment that gives the length nearest to target.
160  int best_step = prev->start_step;
161  ICOORD total_step(0, 0);
162  double best_dist = target_length;
163  for (int s = prev->start_step; s < end_step; ++s) {
164  total_step += prev_ol->step(s % step_length);
165  double dist = fabs(target_length - total_step.length());
166  if (dist < best_dist) {
167  best_dist = dist;
168  best_step = s + 1;
169  }
170  }
171  // The new point is an intermediate point.
172  this_edgept->src_outline = prev_ol;
173  this_edgept->step_count = end_step - best_step;
174  this_edgept->start_step = best_step % step_length;
175  prev->step_count = best_step - prev->start_step;
176  } else {
177  // The new point is poly only.
178  this_edgept->src_outline = nullptr;
179  this_edgept->step_count = 0;
180  this_edgept->start_step = 0;
181  }
182  /* Hook it up */
183  this_edgept->next = next;
184  this_edgept->prev = prev;
185  prev->next = this_edgept;
186  next->prev = this_edgept;
187  /* Set up vec entries */
188  this_edgept->vec.x = this_edgept->next->pos.x - x;
189  this_edgept->vec.y = this_edgept->next->pos.y - y;
190  this_edgept->prev->vec.x = x - this_edgept->prev->pos.x;
191  this_edgept->prev->vec.y = y - this_edgept->prev->pos.y;
192  return this_edgept;
193 }
194 
195 /**********************************************************************
196  * remove_edgept
197  *
198  * Remove a given EDGEPT from its list and delete it.
199  **********************************************************************/
200 void remove_edgept(EDGEPT *point) {
201  EDGEPT *prev = point->prev;
202  EDGEPT *next = point->next;
203  // Add point's steps onto prev's steps if they are from the same outline.
204  if (prev->src_outline == point->src_outline && prev->src_outline != nullptr) {
205  prev->step_count += point->step_count;
206  }
207  prev->next = next;
208  next->prev = prev;
209  prev->vec.x = next->pos.x - prev->pos.x;
210  prev->vec.y = next->pos.y - prev->pos.y;
211  delete point;
212 }
213 
214 /**********************************************************************
215  * Print
216  *
217  * Shows the coordinates of both points in a split.
218  **********************************************************************/
219 void SPLIT::Print() const {
220  tprintf("(%d,%d)--(%d,%d)", point1->pos.x, point1->pos.y, point2->pos.x,
221  point2->pos.y);
222 }
223 
224 #ifndef GRAPHICS_DISABLED
225 // Draws the split in the given window.
226 void SPLIT::Mark(ScrollView* window) const {
227  window->Pen(ScrollView::GREEN);
228  window->Line(point1->pos.x, point1->pos.y, point2->pos.x, point2->pos.y);
229  window->UpdateWindow();
230 }
231 #endif
232 
233 // Creates two outlines out of one by splitting the original one in half.
234 // Inserts the resulting outlines into the given list.
235 void SPLIT::SplitOutlineList(TESSLINE* outlines) const {
236  SplitOutline();
237  while (outlines->next != nullptr) outlines = outlines->next;
238 
239  outlines->next = new TESSLINE;
240  outlines->next->loop = point1;
241  outlines->next->ComputeBoundingBox();
242 
243  outlines = outlines->next;
244 
245  outlines->next = new TESSLINE;
246  outlines->next->loop = point2;
247  outlines->next->ComputeBoundingBox();
248 
249  outlines->next->next = nullptr;
250 }
251 
252 // Makes a split between these two edge points, but does not affect the
253 // outlines to which they belong.
254 void SPLIT::SplitOutline() const {
255  EDGEPT* temp2 = point2->next;
256  EDGEPT* temp1 = point1->next;
257  /* Create two new points */
258  EDGEPT* new_point1 = make_edgept(point1->pos.x, point1->pos.y, temp1, point2);
259  EDGEPT* new_point2 = make_edgept(point2->pos.x, point2->pos.y, temp2, point1);
260  // point1 and 2 are now cross-over points, so they must have nullptr
261  // src_outlines and give their src_outline information their new
262  // replacements.
263  new_point1->src_outline = point1->src_outline;
264  new_point1->start_step = point1->start_step;
265  new_point1->step_count = point1->step_count;
266  new_point2->src_outline = point2->src_outline;
267  new_point2->start_step = point2->start_step;
268  new_point2->step_count = point2->step_count;
269  point1->src_outline = nullptr;
270  point1->start_step = 0;
271  point1->step_count = 0;
272  point2->src_outline = nullptr;
273  point2->start_step = 0;
274  point2->step_count = 0;
275 }
276 
277 // Undoes the effect of SplitOutlineList, correcting the outlines for undoing
278 // the split, but possibly leaving some duplicate outlines.
279 void SPLIT::UnsplitOutlineList(TBLOB* blob) const {
280  /* Modify edge points */
281  UnsplitOutlines();
282 
283  auto* outline1 = new TESSLINE;
284  outline1->next = blob->outlines;
285  blob->outlines = outline1;
286  outline1->loop = point1;
287 
288  auto* outline2 = new TESSLINE;
289  outline2->next = blob->outlines;
290  blob->outlines = outline2;
291  outline2->loop = point2;
292 }
293 
294 // Removes the split that was put between these two points.
296  EDGEPT* tmp1 = point1->next;
297  EDGEPT* tmp2 = point2->next;
298 
299  tmp1->next->prev = point2;
300  tmp2->next->prev = point1;
301 
302  // tmp2 is coincident with point1. point1 takes tmp2's place as tmp2 is
303  // deleted.
304  point1->next = tmp2->next;
305  point1->src_outline = tmp2->src_outline;
306  point1->start_step = tmp2->start_step;
307  point1->step_count = tmp2->step_count;
308  // Likewise point2 takes tmp1's place.
309  point2->next = tmp1->next;
310  point2->src_outline = tmp1->src_outline;
311  point2->start_step = tmp1->start_step;
312  point2->step_count = tmp1->step_count;
313 
314  delete tmp1;
315  delete tmp2;
316 
317  point1->vec.x = point1->next->pos.x - point1->pos.x;
318  point1->vec.y = point1->next->pos.y - point1->pos.y;
319 
320  point2->vec.x = point2->next->pos.x - point2->pos.x;
321  point2->vec.y = point2->next->pos.y - point2->pos.y;
322 }
FCOORD
Definition: points.h:189
ScrollView::Line
void Line(int x1, int y1, int x2, int y2)
Definition: scrollview.cpp:532
TBOX
Definition: rect.h:34
SPLIT::point1
EDGEPT * point1
Definition: split.h:103
SPLIT::Hide
void Hide() const
Definition: split.cpp:51
EDGEPT::prev
EDGEPT * prev
Definition: blobs.h:193
SPLIT::Reveal
void Reveal() const
Definition: split.cpp:65
EDGEPT::ShortNonCircularSegment
bool ShortNonCircularSegment(int min_points, const EDGEPT *end) const
Definition: blobs.h:158
EDGEPT
Definition: blobs.h:99
EDGEPT::vec
VECTOR vec
Definition: blobs.h:187
TESSLINE::ComputeBoundingBox
void ComputeBoundingBox()
Definition: blobs.cpp:213
TBLOB::SegmentCrossesOutline
bool SegmentCrossesOutline(const TPOINT &pt1, const TPOINT &pt2) const
Definition: blobs.h:339
TPOINT::y
int16_t y
Definition: blobs.h:94
TBOX::right
int16_t right() const
Definition: rect.h:79
ScrollView::Pen
void Pen(Color color)
Definition: scrollview.cpp:719
TESSLINE::loop
EDGEPT * loop
Definition: blobs.h:280
C_OUTLINE::step
ICOORD step(int index) const
Definition: coutln.h:144
C_OUTLINE::pathlength
int32_t pathlength() const
Definition: coutln.h:135
kBadPriority
const double kBadPriority
Definition: split.cpp:39
TBOX::x_gap
int x_gap(const TBOX &box) const
Definition: rect.h:225
SPLIT::IsHealthy
bool IsHealthy(const TBLOB &blob, int min_points, int min_area) const
Definition: split.cpp:115
EDGEPT::SegmentArea
int SegmentArea(const EDGEPT *end) const
Definition: blobs.h:145
SPLIT::Box21
TBOX Box21() const
Definition: split.h:46
TBOX::left
int16_t left() const
Definition: rect.h:72
tprintf
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
TESSLINE::next
TESSLINE * next
Definition: blobs.h:281
kCenterGradeCap
const int kCenterGradeCap
Definition: split.cpp:37
ScrollView
Definition: scrollview.h:98
EDGEPT::Hide
void Hide()
Definition: blobs.h:170
TBOX::width
int16_t width() const
Definition: rect.h:115
SPLIT::SplitOutlineList
void SplitOutlineList(TESSLINE *outlines) const
Definition: split.cpp:235
ICOORD
integer coordinate
Definition: points.h:32
C_OUTLINE::position_at_index
ICOORD position_at_index(int index) const
Definition: coutln.h:153
SPLIT::IsLittleChunk
bool IsLittleChunk(int min_points, int min_area) const
Definition: split.cpp:122
EDGEPT::step_count
int step_count
Definition: blobs.h:197
split.h
EDGEPT::Reveal
void Reveal()
Definition: blobs.h:173
make_edgept
EDGEPT * make_edgept(int x, int y, EDGEPT *next, EDGEPT *prev)
Definition: split.cpp:139
tprintf.h
TPOINT::x
int16_t x
Definition: blobs.h:93
EDGEPT::EqualPos
bool EqualPos(const EDGEPT &other) const
Definition: blobs.h:128
EDGEPT::src_outline
C_OUTLINE * src_outline
Definition: blobs.h:194
EDGEPT::pos
TPOINT pos
Definition: blobs.h:186
EDGEPT::next
EDGEPT * next
Definition: blobs.h:192
ICOORD::length
float length() const
find length
Definition: points.h:78
SPLIT::Print
void Print() const
Definition: split.cpp:219
SPLIT::Mark
void Mark(ScrollView *window) const
Definition: split.cpp:226
ScrollView::UpdateWindow
void UpdateWindow()
Definition: scrollview.cpp:704
TBLOB
Definition: blobs.h:284
EDGEPT::start_step
int start_step
Definition: blobs.h:196
wordrec_display_splits
bool wordrec_display_splits
Definition: split.cpp:41
SPLIT::UnsplitOutlineList
void UnsplitOutlineList(TBLOB *blob) const
Definition: split.cpp:279
SPLIT::UnsplitOutlines
void UnsplitOutlines() const
Definition: split.cpp:295
SPLIT::bounding_box
TBOX bounding_box() const
Definition: split.cpp:44
remove_edgept
void remove_edgept(EDGEPT *point)
Definition: split.cpp:200
coutln.h
SPLIT::FullPriority
float FullPriority(int xmin, int xmax, double overlap_knob, int centered_maxwidth, double center_knob, double width_change_knob) const
Definition: split.cpp:81
ScrollView::GREEN
@ GREEN
Definition: scrollview.h:107
FCOORD::length
float length() const
find length
Definition: points.h:228
SPLIT::point2
EDGEPT * point2
Definition: split.h:104
SPLIT::SplitOutline
void SplitOutline() const
Definition: split.cpp:254
TBLOB::outlines
TESSLINE * outlines
Definition: blobs.h:400
BOOL_VAR
#define BOOL_VAR(name, val, comment)
Definition: params.h:306
SPLIT::Box12
TBOX Box12() const
Definition: split.h:44
TESSLINE
Definition: blobs.h:203
C_OUTLINE
Definition: coutln.h:72