tesseract  4.1.1
picofeat.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  ** Filename: picofeat.c
3  ** Purpose: Definition of pico-features.
4  ** Author: Dan Johnson
5  **
6  ** (c) Copyright Hewlett-Packard Company, 1988.
7  ** Licensed under the Apache License, Version 2.0 (the "License");
8  ** you may not use this file except in compliance with the License.
9  ** You may obtain a copy of the License at
10  ** http://www.apache.org/licenses/LICENSE-2.0
11  ** Unless required by applicable law or agreed to in writing, software
12  ** distributed under the License is distributed on an "AS IS" BASIS,
13  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  ** See the License for the specific language governing permissions and
15  ** limitations under the License.
16  ******************************************************************************/
17 /*----------------------------------------------------------------------------
18  Include Files and Type Defines
19 ----------------------------------------------------------------------------*/
20 #include "picofeat.h"
21 
22 #include "classify.h"
23 #include "featdefs.h"
24 #include "fpoint.h"
25 #include "mfoutline.h"
26 #include "ocrfeatures.h"
27 #include "params.h"
28 #include "trainingsample.h"
29 
30 #include <cmath>
31 #include <cstdio>
32 
33 /*---------------------------------------------------------------------------
34  Variables
35 ----------------------------------------------------------------------------*/
36 
37 double_VAR(classify_pico_feature_length, 0.05, "Pico Feature Length");
38 
39 /*---------------------------------------------------------------------------
40  Private Function Prototypes
41 ----------------------------------------------------------------------------*/
43  FPOINT *End,
44  FEATURE_SET FeatureSet);
45 
46 void ConvertToPicoFeatures2(MFOUTLINE Outline, FEATURE_SET FeatureSet);
47 
48 void NormalizePicoX(FEATURE_SET FeatureSet);
49 
50 /*----------------------------------------------------------------------------
51  Public Code
52 ----------------------------------------------------------------------------*/
53 /*---------------------------------------------------------------------------*/
54 namespace tesseract {
64  LIST Outlines;
65  LIST RemainingOutlines;
66  MFOUTLINE Outline;
67  FEATURE_SET FeatureSet;
68  float XScale, YScale;
69 
70  FeatureSet = NewFeatureSet(MAX_PICO_FEATURES);
71  Outlines = ConvertBlob(Blob);
72  NormalizeOutlines(Outlines, &XScale, &YScale);
73  RemainingOutlines = Outlines;
74  iterate(RemainingOutlines) {
75  Outline = static_cast<MFOUTLINE>first_node (RemainingOutlines);
76  ConvertToPicoFeatures2(Outline, FeatureSet);
77  }
79  NormalizePicoX(FeatureSet);
80  FreeOutlines(Outlines);
81  return (FeatureSet);
82 
83 } /* ExtractPicoFeatures */
84 } // namespace tesseract
85 
86 /*----------------------------------------------------------------------------
87  Private Code
88 ----------------------------------------------------------------------------*/
89 /*---------------------------------------------------------------------------*/
104  FPOINT *End,
105  FEATURE_SET FeatureSet) {
106  FEATURE Feature;
107  float Angle;
108  float Length;
109  int NumFeatures;
110  FPOINT Center;
111  FPOINT Delta;
112  int i;
113 
114  Angle = NormalizedAngleFrom (Start, End, 1.0);
115  Length = DistanceBetween (*Start, *End);
116  NumFeatures = static_cast<int>(floor (Length / classify_pico_feature_length + 0.5));
117  if (NumFeatures < 1)
118  NumFeatures = 1;
119 
120  /* compute vector for one pico feature */
121  Delta.x = XDelta (*Start, *End) / NumFeatures;
122  Delta.y = YDelta (*Start, *End) / NumFeatures;
123 
124  /* compute position of first pico feature */
125  Center.x = Start->x + Delta.x / 2.0;
126  Center.y = Start->y + Delta.y / 2.0;
127 
128  /* compute each pico feature in segment and add to feature set */
129  for (i = 0; i < NumFeatures; i++) {
130  Feature = NewFeature (&PicoFeatDesc);
131  Feature->Params[PicoFeatDir] = Angle;
132  Feature->Params[PicoFeatX] = Center.x;
133  Feature->Params[PicoFeatY] = Center.y;
134  AddFeature(FeatureSet, Feature);
135 
136  Center.x += Delta.x;
137  Center.y += Delta.y;
138  }
139 } /* ConvertSegmentToPicoFeat */
140 
141 
142 /*---------------------------------------------------------------------------*/
155 void ConvertToPicoFeatures2(MFOUTLINE Outline, FEATURE_SET FeatureSet) {
156  MFOUTLINE Next;
157  MFOUTLINE First;
158  MFOUTLINE Current;
159 
160  if (DegenerateOutline(Outline))
161  return;
162 
163  First = Outline;
164  Current = First;
165  Next = NextPointAfter(Current);
166  do {
167  /* note that an edge is hidden if the ending point of the edge is
168  marked as hidden. This situation happens because the order of
169  the outlines is reversed when they are converted from the old
170  format. In the old format, a hidden edge is marked by the
171  starting point for that edge. */
172  if (!(PointAt(Next)->Hidden))
173  ConvertSegmentToPicoFeat (&(PointAt(Current)->Point),
174  &(PointAt(Next)->Point), FeatureSet);
175 
176  Current = Next;
177  Next = NextPointAfter(Current);
178  }
179  while (Current != First);
180 
181 } /* ConvertToPicoFeatures2 */
182 
183 
184 /*---------------------------------------------------------------------------*/
193 void NormalizePicoX(FEATURE_SET FeatureSet) {
194  int i;
195  FEATURE Feature;
196  float Origin = 0.0;
197 
198  for (i = 0; i < FeatureSet->NumFeatures; i++) {
199  Feature = FeatureSet->Features[i];
200  Origin += Feature->Params[PicoFeatX];
201  }
202  Origin /= FeatureSet->NumFeatures;
203 
204  for (i = 0; i < FeatureSet->NumFeatures; i++) {
205  Feature = FeatureSet->Features[i];
206  Feature->Params[PicoFeatX] -= Origin;
207  }
208 } /* NormalizePicoX */
209 
210 namespace tesseract {
211 /*---------------------------------------------------------------------------*/
218  const TBLOB& blob, const INT_FX_RESULT_STRUCT& fx_info) {
219  INT_FX_RESULT_STRUCT local_fx_info(fx_info);
222  blob, false, &local_fx_info, &bl_features);
223  if (sample == nullptr) return nullptr;
224 
225  uint32_t num_features = sample->num_features();
226  const INT_FEATURE_STRUCT* features = sample->features();
227  FEATURE_SET feature_set = NewFeatureSet(num_features);
228  for (uint32_t f = 0; f < num_features; ++f) {
229  FEATURE feature = NewFeature(&IntFeatDesc);
230 
231  feature->Params[IntX] = features[f].X;
232  feature->Params[IntY] = features[f].Y;
233  feature->Params[IntDir] = features[f].Theta;
234  AddFeature(feature_set, feature);
235  }
236  delete sample;
237 
238  return feature_set;
239 } /* ExtractIntCNFeatures */
240 
241 /*---------------------------------------------------------------------------*/
248  const TBLOB& blob, const INT_FX_RESULT_STRUCT& fx_info) {
249  INT_FX_RESULT_STRUCT local_fx_info(fx_info);
252  blob, false, &local_fx_info, &bl_features);
253  if (sample == nullptr) return nullptr;
254 
255  FEATURE_SET feature_set = NewFeatureSet(1);
256  FEATURE feature = NewFeature(&IntFeatDesc);
257 
258  feature->Params[GeoBottom] = sample->geo_feature(GeoBottom);
259  feature->Params[GeoTop] = sample->geo_feature(GeoTop);
260  feature->Params[GeoWidth] = sample->geo_feature(GeoWidth);
261  AddFeature(feature_set, feature);
262  delete sample;
263 
264  return feature_set;
265 } /* ExtractIntGeoFeatures */
266 
267 } // namespace tesseract.
#define double_VAR(name, val, comment)
Definition: params.h:312
TESS_API const FEATURE_DESC_STRUCT PicoFeatDesc
const FEATURE_DESC_STRUCT IntFeatDesc
float DistanceBetween(FPOINT A, FPOINT B)
Definition: fpoint.cpp:29
float NormalizedAngleFrom(FPOINT *Point1, FPOINT *Point2, float FullScale)
Definition: fpoint.cpp:44
#define XDelta(A, B)
Definition: fpoint.h:38
#define YDelta(A, B)
Definition: fpoint.h:39
LIST ConvertBlob(TBLOB *blob)
Definition: mfoutline.cpp:37
void FreeOutlines(LIST Outlines)
Definition: mfoutline.cpp:167
@ baseline
Definition: mfoutline.h:63
FEATURE NewFeature(const FEATURE_DESC_STRUCT *FeatureDesc)
Definition: ocrfeatures.cpp:78
bool AddFeature(FEATURE_SET FeatureSet, FEATURE Feature)
Definition: ocrfeatures.cpp:40
FEATURE_SET NewFeatureSet(int NumFeatures)
Definition: ocrfeatures.cpp:94
void NormalizePicoX(FEATURE_SET FeatureSet)
Definition: picofeat.cpp:193
void ConvertToPicoFeatures2(MFOUTLINE Outline, FEATURE_SET FeatureSet)
Definition: picofeat.cpp:155
void ConvertSegmentToPicoFeat(FPOINT *Start, FPOINT *End, FEATURE_SET FeatureSet)
Definition: picofeat.cpp:103
double classify_pico_feature_length
Definition: picofeat.cpp:37
@ IntDir
Definition: picofeat.h:32
@ IntX
Definition: picofeat.h:30
@ IntY
Definition: picofeat.h:31
@ GeoWidth
Definition: picofeat.h:39
@ GeoBottom
Definition: picofeat.h:37
@ GeoTop
Definition: picofeat.h:38
#define MAX_PICO_FEATURES
Definition: picofeat.h:46
@ PicoFeatY
Definition: picofeat.h:44
@ PicoFeatDir
Definition: picofeat.h:44
@ PicoFeatX
Definition: picofeat.h:44
#define iterate(l)
Definition: oldlist.h:101
#define first_node(l)
Definition: oldlist.h:92
TrainingSample * BlobToTrainingSample(const TBLOB &blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT *fx_info, GenericVector< INT_FEATURE_STRUCT > *bl_features)
Definition: intfx.cpp:77
Definition: blobs.h:284
FEATURE_SET ExtractIntCNFeatures(const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info)
Definition: picofeat.cpp:217
FEATURE_SET ExtractPicoFeatures(TBLOB *Blob)
Definition: picofeat.cpp:63
void NormalizeOutlines(LIST Outlines, float *XScale, float *YScale)
Definition: mfoutline.cpp:276
FEATURE_SET ExtractIntGeoFeatures(const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info)
Definition: picofeat.cpp:247
Definition: cluster.h:32
Definition: fpoint.h:29
float y
Definition: fpoint.h:30
float x
Definition: fpoint.h:30
float Params[1]
Definition: ocrfeatures.h:61
FEATURE Features[1]
Definition: ocrfeatures.h:68
uint16_t NumFeatures
Definition: ocrfeatures.h:66