tesseract
4.1.1
pageiterator.h
Go to the documentation of this file.
1
// File: pageiterator.h
3
// Description: Iterator for tesseract page structure that avoids using
4
// tesseract internal data structures.
5
// Author: Ray Smith
6
// Created: Fri Feb 26 11:01:06 PST 2010
7
//
8
// (C) Copyright 2010, Google Inc.
9
// Licensed under the Apache License, Version 2.0 (the "License");
10
// you may not use this file except in compliance with the License.
11
// You may obtain a copy of the License at
12
// http://www.apache.org/licenses/LICENSE-2.0
13
// Unless required by applicable law or agreed to in writing, software
14
// distributed under the License is distributed on an "AS IS" BASIS,
15
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
// See the License for the specific language governing permissions and
17
// limitations under the License.
18
//
20
21
#ifndef TESSERACT_CCMAIN_PAGEITERATOR_H_
22
#define TESSERACT_CCMAIN_PAGEITERATOR_H_
23
24
#include "
platform.h
"
25
#include "
publictypes.h
"
26
27
struct
BlamerBundle
;
28
class
C_BLOB_IT;
29
class
PAGE_RES
;
30
class
PAGE_RES_IT
;
31
class
WERD
;
32
struct
Pix;
33
struct
Pta;
34
35
namespace
tesseract
{
36
37
class
Tesseract;
38
52
class
TESS_API
PageIterator
{
53
public
:
68
PageIterator
(
PAGE_RES
* page_res,
Tesseract
*
tesseract
,
69
int
scale,
int
scaled_yres,
70
int
rect_left,
int
rect_top,
71
int
rect_width,
int
rect_height);
72
virtual
~
PageIterator
();
73
80
PageIterator
(
const
PageIterator
& src);
81
const
PageIterator
& operator=(
const
PageIterator
& src);
82
84
bool
PositionedAtSameWord(
const
PAGE_RES_IT
* other)
const
;
85
86
// ============= Moving around within the page ============.
87
92
virtual
void
Begin();
93
99
virtual
void
RestartParagraph();
100
105
bool
IsWithinFirstTextlineOfParagraph()
const
;
106
112
virtual
void
RestartRow();
113
125
virtual
bool
Next(
PageIteratorLevel
level);
126
140
virtual
bool
IsAtBeginningOf(
PageIteratorLevel
level)
const
;
141
158
virtual
bool
IsAtFinalElement(
PageIteratorLevel
level,
159
PageIteratorLevel
element)
const
;
160
167
int
Cmp(
const
PageIterator
&other)
const
;
168
169
// ============= Accessing data ==============.
170
// Coordinate system:
171
// Integer coordinates are at the cracks between the pixels.
172
// The top-left corner of the top-left pixel in the image is at (0,0).
173
// The bottom-right corner of the bottom-right pixel in the image is at
174
// (width, height).
175
// Every bounding box goes from the top-left of the top-left contained
176
// pixel to the bottom-right of the bottom-right contained pixel, so
177
// the bounding box of the single top-left pixel in the image is:
178
// (0,0)->(1,1).
179
// If an image rectangle has been set in the API, then returned coordinates
180
// relate to the original (full) image, rather than the rectangle.
181
191
void
SetBoundingBoxComponents
(
bool
include_upper_dots,
192
bool
include_lower_dots) {
193
include_upper_dots_ = include_upper_dots;
194
include_lower_dots_ = include_lower_dots;
195
}
196
206
bool
BoundingBox(
PageIteratorLevel
level,
207
int
* left,
int
* top,
int
* right,
int
* bottom)
const
;
208
bool
BoundingBox(
PageIteratorLevel
level,
int
padding,
209
int
* left,
int
* top,
int
* right,
int
* bottom)
const
;
215
bool
BoundingBoxInternal(
PageIteratorLevel
level,
216
int
* left,
int
* top,
int
* right,
int
* bottom)
const
;
217
219
bool
Empty(
PageIteratorLevel
level)
const
;
220
225
PolyBlockType
BlockType()
const
;
226
234
Pta* BlockPolygon()
const
;
235
242
Pix* GetBinaryImage(
PageIteratorLevel
level)
const
;
243
255
Pix* GetImage(
PageIteratorLevel
level,
int
padding, Pix* original_img,
256
int
* left,
int
* top)
const
;
257
264
bool
Baseline(
PageIteratorLevel
level,
265
int
* x1,
int
* y1,
int
* x2,
int
* y2)
const
;
266
275
void
Orientation
(
tesseract::Orientation
*orientation,
276
tesseract::WritingDirection
*writing_direction,
277
tesseract::TextlineOrder
*textline_order,
278
float
*deskew_angle)
const
;
279
308
void
ParagraphInfo(
tesseract::ParagraphJustification
*justification,
309
bool
*is_list_item,
310
bool
*is_crown,
311
int
*first_line_indent)
const
;
312
313
// If the current WERD_RES (it_->word()) is not nullptr, sets the BlamerBundle
314
// of the current word to the given pointer (takes ownership of the pointer)
315
// and returns true.
316
// Can only be used when iterating on the word level.
317
bool
SetWordBlamerBundle(
BlamerBundle
*blamer_bundle);
318
319
protected
:
324
TESS_LOCAL
void
BeginWord(
int
offset);
325
327
PAGE_RES
*
page_res_
;
329
Tesseract
*
tesseract_
;
334
PAGE_RES_IT
*
it_
;
339
WERD
*
word_
;
341
int
word_length_
;
343
int
blob_index_
;
349
C_BLOB_IT*
cblob_it_
;
351
bool
include_upper_dots_
;
352
bool
include_lower_dots_
;
354
int
scale_
;
355
int
scaled_yres_
;
356
int
rect_left_
;
357
int
rect_top_
;
358
int
rect_width_
;
359
int
rect_height_
;
360
};
361
362
}
// namespace tesseract.
363
364
#endif
// TESSERACT_CCMAIN_PAGEITERATOR_H_
publictypes.h
PolyBlockType
PolyBlockType
Definition:
publictypes.h:53
platform.h
TESS_API
#define TESS_API
Definition:
platform.h:54
TESS_LOCAL
#define TESS_LOCAL
Definition:
platform.h:55
tesseract
Definition:
altorenderer.cpp:25
tesseract::TextlineOrder
TextlineOrder
Definition:
publictypes.h:152
tesseract::ParagraphJustification
ParagraphJustification
Definition:
publictypes.h:251
tesseract::Orientation
Orientation
Definition:
publictypes.h:120
tesseract::PageIteratorLevel
PageIteratorLevel
Definition:
publictypes.h:219
tesseract::WritingDirection
WritingDirection
Definition:
publictypes.h:135
tesseract::PageIterator
Definition:
pageiterator.h:52
tesseract::PageIterator::rect_left_
int rect_left_
Definition:
pageiterator.h:356
tesseract::PageIterator::word_
WERD * word_
Definition:
pageiterator.h:339
tesseract::PageIterator::it_
PAGE_RES_IT * it_
Definition:
pageiterator.h:334
tesseract::PageIterator::scaled_yres_
int scaled_yres_
Definition:
pageiterator.h:355
tesseract::PageIterator::SetBoundingBoxComponents
void SetBoundingBoxComponents(bool include_upper_dots, bool include_lower_dots)
Definition:
pageiterator.h:191
tesseract::PageIterator::page_res_
PAGE_RES * page_res_
Definition:
pageiterator.h:327
tesseract::PageIterator::cblob_it_
C_BLOB_IT * cblob_it_
Definition:
pageiterator.h:349
tesseract::PageIterator::word_length_
int word_length_
Definition:
pageiterator.h:341
tesseract::PageIterator::include_upper_dots_
bool include_upper_dots_
Definition:
pageiterator.h:351
tesseract::PageIterator::rect_height_
int rect_height_
Definition:
pageiterator.h:359
tesseract::PageIterator::include_lower_dots_
bool include_lower_dots_
Definition:
pageiterator.h:352
tesseract::PageIterator::rect_top_
int rect_top_
Definition:
pageiterator.h:357
tesseract::PageIterator::blob_index_
int blob_index_
Definition:
pageiterator.h:343
tesseract::PageIterator::rect_width_
int rect_width_
Definition:
pageiterator.h:358
tesseract::PageIterator::tesseract_
Tesseract * tesseract_
Definition:
pageiterator.h:329
tesseract::PageIterator::scale_
int scale_
Definition:
pageiterator.h:354
tesseract::Tesseract
Definition:
tesseractclass.h:174
BlamerBundle
Definition:
blamer.h:102
PAGE_RES
Definition:
pageres.h:76
PAGE_RES_IT
Definition:
pageres.h:675
WERD
Definition:
werd.h:56
src
ccmain
pageiterator.h
Generated on Thu Mar 26 2020 00:00:00 for tesseract by
1.9.1