tesseract  4.1.1
findseam.cpp
Go to the documentation of this file.
1 /* -*-C-*-
2  ********************************************************************************
3  *
4  * File: findseam.cpp (Formerly findseam.c)
5  * Author: Mark Seaman, OCR Technology
6  *
7  * (c) Copyright 1987, Hewlett-Packard Company.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  *********************************************************************************/
19 /*----------------------------------------------------------------------
20  I n c l u d e s
21 ----------------------------------------------------------------------*/
22 #include "findseam.h"
23 #include "plotedges.h"
24 #include "outlines.h"
25 #include "seam.h"
26 #include "wordrec.h"
27 
28 // Include automatically generated configuration file if running autoconf.
29 #ifdef HAVE_CONFIG_H
30 #include "config_auto.h"
31 #endif
32 
33 /**********************************************************************
34  * partial_split_priority
35  *
36  * Assign a priority to this split based on the features that it has.
37  * Grade it according to the different rating schemes and return the
38  * value of its goodness.
39  **********************************************************************/
40 
41 #define partial_split_priority(split) \
42  (grade_split_length(split) + grade_sharpness(split))
43 
44 /*----------------------------------------------------------------------
45  T y p e s
46 ----------------------------------------------------------------------*/
47 #define SPLIT_CLOSENESS 20/* Difference in x value */
48  /* How many to keep */
49 #define MAX_NUM_SEAMS 150
50  /* How many to keep */
51 #define NO_FULL_PRIORITY -1/* Special marker for pri. */
52  /* Evaluate right away */
53 #define BAD_PRIORITY 9999.0
54 
55 /*----------------------------------------------------------------------
56  F u n c t i o n s
57 ----------------------------------------------------------------------*/
58 namespace tesseract {
59 
60 /**********************************************************************
61  * add_seam_to_queue
62  *
63  * Adds the given new_seam to the seams priority queue, unless it is full
64  * and the new seam is worse than the worst.
65  **********************************************************************/
66 void Wordrec::add_seam_to_queue(float new_priority, SEAM *new_seam,
67  SeamQueue* seams) {
68  if (new_seam == nullptr) return;
69  if (chop_debug) {
70  tprintf("Pushing new seam with priority %g :", new_priority);
71  new_seam->Print("seam: ");
72  }
73  if (seams->size() >= MAX_NUM_SEAMS) {
74  SeamPair old_pair(0, nullptr);
75  if (seams->PopWorst(&old_pair) && old_pair.key() <= new_priority) {
76  if (chop_debug) {
77  tprintf("Old seam staying with priority %g\n", old_pair.key());
78  }
79  delete new_seam;
80  seams->Push(&old_pair);
81  return;
82  } else if (chop_debug) {
83  tprintf("New seam with priority %g beats old worst seam with %g\n",
84  new_priority, old_pair.key());
85  }
86  }
87  SeamPair new_pair(new_priority, new_seam);
88  seams->Push(&new_pair);
89 }
90 
91 
92 /**********************************************************************
93  * choose_best_seam
94  *
95  * Choose the best seam that can be created by assembling this a
96  * collection of splits. A queue of all the possible seams is
97  * maintained. Each new split received is placed in that queue with
98  * its partial priority value. These values in the seam queue are
99  * evaluated and combined until a good enough seam is found. If no
100  * further good seams are being found then this function returns to the
101  * caller, who will send more splits. If this function is called with
102  * a split of nullptr, then no further splits can be supplied by the
103  * caller.
104  **********************************************************************/
105 void Wordrec::choose_best_seam(SeamQueue *seam_queue, const SPLIT *split,
106  PRIORITY priority, SEAM **seam_result,
107  TBLOB *blob, SeamPile *seam_pile) {
108  SEAM *seam;
109  char str[80];
110  float my_priority;
111  /* Add seam of split */
112  my_priority = priority;
113  if (split != nullptr) {
114  TPOINT split_point = split->point1->pos;
115  split_point += split->point2->pos;
116  split_point /= 2;
117  seam = new SEAM(my_priority, split_point, *split);
118  if (chop_debug > 1) seam->Print("Partial priority ");
119  add_seam_to_queue(my_priority, seam, seam_queue);
120 
121  if (my_priority > chop_good_split)
122  return;
123  }
124 
125  TBOX bbox = blob->bounding_box();
126  /* Queue loop */
127  while (!seam_queue->empty()) {
128  SeamPair seam_pair;
129  seam_queue->Pop(&seam_pair);
130  seam = seam_pair.extract_data();
131  /* Set full priority */
132  my_priority = seam->FullPriority(bbox.left(), bbox.right(),
135  if (chop_debug) {
136  sprintf (str, "Full my_priority %0.0f, ", my_priority);
137  seam->Print(str);
138  }
139 
140  if ((*seam_result == nullptr || (*seam_result)->priority() > my_priority) &&
141  my_priority < chop_ok_split) {
142  /* No crossing */
143  if (seam->IsHealthy(*blob, chop_min_outline_points,
145  delete *seam_result;
146  *seam_result = new SEAM(*seam);
147  (*seam_result)->set_priority(my_priority);
148  } else {
149  delete seam;
150  seam = nullptr;
151  my_priority = BAD_PRIORITY;
152  }
153  }
154 
155  if (my_priority < chop_good_split) {
156  delete seam;
157  return; /* Made good answer */
158  }
159 
160  if (seam) {
161  /* Combine with others */
162  if (seam_pile->size() < chop_seam_pile_size) {
163  combine_seam(*seam_pile, seam, seam_queue);
164  SeamDecPair pair(seam_pair.key(), seam);
165  seam_pile->Push(&pair);
166  } else if (chop_new_seam_pile &&
167  seam_pile->size() == chop_seam_pile_size &&
168  seam_pile->PeekTop().key() > seam_pair.key()) {
169  combine_seam(*seam_pile, seam, seam_queue);
170  SeamDecPair pair;
171  seam_pile->Pop(&pair); // pop the worst.
172  // Replace the seam in pair (deleting the old one) with
173  // the new seam and score, then push back into the heap.
174  pair.set_key(seam_pair.key());
175  pair.set_data(seam);
176  seam_pile->Push(&pair);
177  } else {
178  delete seam;
179  }
180  }
181 
182  my_priority = seam_queue->empty() ? NO_FULL_PRIORITY
183  : seam_queue->PeekTop().key();
184  if ((my_priority > chop_ok_split) ||
185  (my_priority > chop_good_split && split))
186  return;
187  }
188 }
189 
190 
191 /**********************************************************************
192  * combine_seam
193  *
194  * Find other seams to combine with this one. The new seams that result
195  * from this union should be added to the seam queue. The return value
196  * tells whether or not any additional seams were added to the queue.
197  **********************************************************************/
198 void Wordrec::combine_seam(const SeamPile& seam_pile,
199  const SEAM* seam, SeamQueue* seam_queue) {
200  for (int x = 0; x < seam_pile.size(); ++x) {
201  const SEAM *this_one = seam_pile.get(x).data();
202  if (seam->CombineableWith(*this_one, SPLIT_CLOSENESS, chop_ok_split)) {
203  SEAM *new_one = new SEAM(*seam);
204  new_one->CombineWith(*this_one);
205  if (chop_debug > 1) new_one->Print("Combo priority ");
206  add_seam_to_queue(new_one->priority(), new_one, seam_queue);
207  }
208  }
209 }
210 
211 /**********************************************************************
212  * pick_good_seam
213  *
214  * Find and return a good seam that will split this blob into two pieces.
215  * Work from the outlines provided.
216  **********************************************************************/
218  SeamPile seam_pile(chop_seam_pile_size);
219  EDGEPT *points[MAX_NUM_POINTS];
220  EDGEPT_CLIST new_points;
221  SEAM *seam = nullptr;
222  TESSLINE *outline;
223  int16_t num_points = 0;
224 
225 #ifndef GRAPHICS_DISABLED
226  if (chop_debug > 2)
227  wordrec_display_splits.set_value(true);
228 
229  draw_blob_edges(blob);
230 #endif
231 
232  PointHeap point_heap(MAX_NUM_POINTS);
233  for (outline = blob->outlines; outline; outline = outline->next)
234  prioritize_points(outline, &point_heap);
235 
236  while (!point_heap.empty() && num_points < MAX_NUM_POINTS) {
237  points[num_points++] = point_heap.PeekTop().data;
238  point_heap.Pop(nullptr);
239  }
240 
241  /* Initialize queue */
242  SeamQueue seam_queue(MAX_NUM_SEAMS);
243 
244  try_point_pairs(points, num_points, &seam_queue, &seam_pile, &seam, blob);
245  try_vertical_splits(points, num_points, &new_points,
246  &seam_queue, &seam_pile, &seam, blob);
247 
248  if (seam == nullptr) {
249  choose_best_seam(&seam_queue, nullptr, BAD_PRIORITY, &seam, blob, &seam_pile);
250  } else if (seam->priority() > chop_good_split) {
251  choose_best_seam(&seam_queue, nullptr, seam->priority(), &seam, blob,
252  &seam_pile);
253  }
254 
255  EDGEPT_C_IT it(&new_points);
256  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
257  EDGEPT *inserted_point = it.data();
258  if (seam == nullptr || !seam->UsesPoint(inserted_point)) {
259  for (outline = blob->outlines; outline; outline = outline->next) {
260  if (outline->loop == inserted_point) {
261  outline->loop = outline->loop->next;
262  }
263  }
264  remove_edgept(inserted_point);
265  }
266  }
267 
268  if (seam) {
269  if (seam->priority() > chop_ok_split) {
270  delete seam;
271  seam = nullptr;
272  }
273 #ifndef GRAPHICS_DISABLED
274  else if (wordrec_display_splits) {
275  seam->Mark(edge_window);
276  if (chop_debug > 2) {
279  }
280  }
281 #endif
282  }
283 
284  if (chop_debug)
285  wordrec_display_splits.set_value(false);
286 
287  return (seam);
288 }
289 
290 
291 /**********************************************************************
292  * try_point_pairs
293  *
294  * Try all the splits that are produced by pairing critical points
295  * together. See if any of them are suitable for use. Use a seam
296  * queue and seam pile that have already been initialized and used.
297  **********************************************************************/
299  int16_t num_points,
300  SeamQueue* seam_queue,
301  SeamPile* seam_pile,
302  SEAM ** seam,
303  TBLOB * blob) {
304  int16_t x;
305  int16_t y;
306  PRIORITY priority;
307 
308  for (x = 0; x < num_points; x++) {
309  for (y = x + 1; y < num_points; y++) {
310  if (points[y] &&
311  points[x]->WeightedDistance(*points[y], chop_x_y_weight) <
313  points[x] != points[y]->next && points[y] != points[x]->next &&
314  !is_exterior_point(points[x], points[y]) &&
315  !is_exterior_point(points[y], points[x])) {
316  SPLIT split(points[x], points[y]);
317  priority = partial_split_priority(&split);
318 
319  choose_best_seam(seam_queue, &split, priority, seam, blob, seam_pile);
320  }
321  }
322  }
323 }
324 
325 
326 /**********************************************************************
327  * try_vertical_splits
328  *
329  * Try all the splits that are produced by vertical projection to see
330  * if any of them are suitable for use. Use a seam queue and seam pile
331  * that have already been initialized and used.
332  * Return in new_points a collection of points that were inserted into
333  * the blob while examining vertical splits and which may safely be
334  * removed once a seam is chosen if they are not part of the seam.
335  **********************************************************************/
337  int16_t num_points,
338  EDGEPT_CLIST *new_points,
339  SeamQueue* seam_queue,
340  SeamPile* seam_pile,
341  SEAM ** seam,
342  TBLOB * blob) {
343  EDGEPT *vertical_point = nullptr;
344  int16_t x;
345  PRIORITY priority;
346  TESSLINE *outline;
347 
348  for (x = 0; x < num_points; x++) {
349  vertical_point = nullptr;
350  for (outline = blob->outlines; outline; outline = outline->next) {
351  vertical_projection_point(points[x], outline->loop,
352  &vertical_point, new_points);
353  }
354 
355  if (vertical_point && points[x] != vertical_point->next &&
356  vertical_point != points[x]->next &&
357  points[x]->WeightedDistance(*vertical_point, chop_x_y_weight) <
359  SPLIT split(points[x], vertical_point);
360  priority = partial_split_priority(&split);
361  choose_best_seam(seam_queue, &split, priority, seam, blob, seam_pile);
362  }
363  }
364 }
365 
366 }
float PRIORITY
Definition: seam.h:36
bool wordrec_display_splits
Definition: split.cpp:41
void remove_edgept(EDGEPT *point)
Definition: split.cpp:200
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
#define MAX_NUM_POINTS
Definition: chop.h:33
#define SPLIT_CLOSENESS
Definition: findseam.cpp:47
#define NO_FULL_PRIORITY
Definition: findseam.cpp:51
#define BAD_PRIORITY
Definition: findseam.cpp:53
#define partial_split_priority(split)
Definition: findseam.cpp:41
#define MAX_NUM_SEAMS
Definition: findseam.cpp:49
#define is_exterior_point(edge, point)
Definition: outlines.h:93
ScrollView * edge_window
Definition: plotedges.cpp:35
void draw_blob_edges(TBLOB *blob)
Definition: plotedges.cpp:69
#define edge_window_wait()
Definition: plotedges.h:56
#define update_edge_window()
Definition: plotedges.h:44
Definition: blobs.h:51
Definition: blobs.h:99
EDGEPT * next
Definition: blobs.h:192
int WeightedDistance(const EDGEPT &other, int x_factor) const
Definition: blobs.h:122
TPOINT pos
Definition: blobs.h:186
EDGEPT * loop
Definition: blobs.h:280
TESSLINE * next
Definition: blobs.h:281
Definition: blobs.h:284
TESSLINE * outlines
Definition: blobs.h:400
TBOX bounding_box() const
Definition: blobs.cpp:468
Definition: rect.h:34
int16_t left() const
Definition: rect.h:72
int16_t right() const
Definition: rect.h:79
Definition: seam.h:38
float FullPriority(int xmin, int xmax, double overlap_knob, int centered_maxwidth, double center_knob, double width_change_knob) const
Definition: seam.cpp:239
float priority() const
Definition: seam.h:59
bool UsesPoint(const EDGEPT *point) const
Definition: seam.h:82
void Mark(ScrollView *window) const
Definition: seam.cpp:180
bool IsHealthy(const TBLOB &blob, int min_points, int min_area) const
Definition: seam.cpp:66
bool CombineableWith(const SEAM &other, int max_x_dist, float max_total_priority) const
Definition: seam.cpp:40
void Print(const char *label) const
Definition: seam.cpp:154
void CombineWith(const SEAM &other)
Definition: seam.cpp:54
Definition: split.h:37
EDGEPT * point1
Definition: split.h:103
EDGEPT * point2
Definition: split.h:104
const Pair & get(int index) const
Definition: genericheap.h:87
bool empty() const
Definition: genericheap.h:68
bool PopWorst(Pair *entry)
Definition: genericheap.h:140
bool Pop(Pair *entry)
Definition: genericheap.h:118
const Pair & PeekTop() const
Definition: genericheap.h:108
void Push(Pair *entry)
Definition: genericheap.h:95
void set_data(Data *new_data)
Definition: kdpair.h:126
const Key & key() const
Definition: kdpair.h:116
void set_key(const Key &new_key)
Definition: kdpair.h:119
Data * extract_data()
Definition: kdpair.h:131
void add_seam_to_queue(float new_priority, SEAM *new_seam, SeamQueue *seams)
Definition: findseam.cpp:66
void prioritize_points(TESSLINE *outline, PointHeap *points)
Definition: chop.cpp:173
bool chop_new_seam_pile
Definition: wordrec.h:211
void try_point_pairs(EDGEPT *points[MAX_NUM_POINTS], int16_t num_points, SeamQueue *seam_queue, SeamPile *seam_pile, SEAM **seam, TBLOB *blob)
Definition: findseam.cpp:298
double chop_center_knob
Definition: wordrec.h:216
void try_vertical_splits(EDGEPT *points[MAX_NUM_POINTS], int16_t num_points, EDGEPT_CLIST *new_points, SeamQueue *seam_queue, SeamPile *seam_pile, SEAM **seam, TBLOB *blob)
Definition: findseam.cpp:336
double chop_width_change_knob
Definition: wordrec.h:220
int chop_centered_maxwidth
Definition: wordrec.h:218
int chop_min_outline_area
Definition: wordrec.h:213
void combine_seam(const SeamPile &seam_pile, const SEAM *seam, SeamQueue *seam_queue)
Definition: findseam.cpp:198
void vertical_projection_point(EDGEPT *split_point, EDGEPT *target_point, EDGEPT **best_point, EDGEPT_CLIST *new_points)
Definition: chop.cpp:285
double chop_ok_split
Definition: wordrec.h:221
double chop_good_split
Definition: wordrec.h:222
double chop_overlap_knob
Definition: wordrec.h:215
int chop_min_outline_points
Definition: wordrec.h:209
SEAM * pick_good_seam(TBLOB *blob)
Definition: findseam.cpp:217
void choose_best_seam(SeamQueue *seam_queue, const SPLIT *split, PRIORITY priority, SEAM **seam_result, TBLOB *blob, SeamPile *seam_pile)
Definition: findseam.cpp:105
int chop_seam_pile_size
Definition: wordrec.h:210