tesseract  4.1.1
commontraining.h
Go to the documentation of this file.
1 // Copyright 2008 Google Inc. All Rights Reserved.
2 // Author: scharron@google.com (Samuel Charron)
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
8 // Unless required by applicable law or agreed to in writing, software
9 // distributed under the License is distributed on an "AS IS" BASIS,
10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 // See the License for the specific language governing permissions and
12 // limitations under the License.
13 
14 #ifndef TESSERACT_TRAINING_COMMONTRAINING_H_
15 #define TESSERACT_TRAINING_COMMONTRAINING_H_
16 
17 #ifdef HAVE_CONFIG_H
18 #include "config_auto.h"
19 #endif
20 
21 #include "baseapi.h"
22 
23 #ifdef DISABLED_LEGACY_ENGINE
24 
25 #include "tprintf.h"
26 #include "commandlineflags.h"
27 
28 
29 void ParseArguments(int* argc, char*** argv);
30 
31 
32 namespace tesseract {
33 
34 // Check whether the shared tesseract library is the right one.
35 // This function must be inline because otherwise it would be part of
36 // the shared library, so it could not compare the versions.
37 static inline void CheckSharedLibraryVersion()
38 {
39 #ifdef HAVE_CONFIG_H
41  tprintf("ERROR: shared library version mismatch (was %s, expected %s\n"
42  "Did you use a wrong shared tesseract library?\n",
44  exit(1);
45  }
46 #endif
47 }
48 
49 } // namespace tesseract
50 
51 
52 #else
53 
54 #include "cluster.h"
55 #include "commandlineflags.h"
56 #include "featdefs.h"
57 #include "intproto.h"
58 #include "oldlist.h"
59 
60 namespace tesseract {
61 class Classify;
62 class MasterTrainer;
63 class ShapeTable;
64 }
65 
67 // Globals ///////////////////////////////////////////////////////////////////
69 
71 
72 // Must be defined in the file that "implements" commonTraining facilities.
73 extern CLUSTERCONFIG Config;
74 
76 // Structs ///////////////////////////////////////////////////////////////////
78 typedef struct
79 {
80  char *Label;
84 }
86 
87 typedef struct
88 {
89  char* Label;
90  int NumMerged[MAX_NUM_PROTOS];
94 
95 
97 // Functions /////////////////////////////////////////////////////////////////
99 void ParseArguments(int* argc, char*** argv);
100 
101 namespace tesseract {
102 
103 // Check whether the shared tesseract library is the right one.
104 // This function must be inline because otherwise it would be part of
105 // the shared library, so it could not compare the versions.
106 static inline void CheckSharedLibraryVersion()
107 {
108 #ifdef HAVE_CONFIG_H
109  if (!!strcmp(TESSERACT_VERSION_STR, TessBaseAPI::Version())) {
110  tprintf("ERROR: shared library version mismatch (was %s, expected %s\n"
111  "Did you use a wrong shared tesseract library?\n",
113  exit(1);
114  }
115 #endif
116 }
117 
118 // Helper loads shape table from the given file.
119 ShapeTable* LoadShapeTable(const STRING& file_prefix);
120 // Helper to write the shape_table.
121 void WriteShapeTable(const STRING& file_prefix, const ShapeTable& shape_table);
122 
123 // Creates a MasterTraininer and loads the training data into it:
124 // Initializes feature_defs and IntegerFX.
125 // Loads the shape_table if shape_table != nullptr.
126 // Loads initial unicharset from -U command-line option.
127 // If FLAGS_input_trainer is set, loads the majority of data from there, else:
128 // Loads font info from -F option.
129 // Loads xheights from -X option.
130 // Loads samples from .tr files in remaining command-line args.
131 // Deletes outliers and computes canonical samples.
132 // If FLAGS_output_trainer is set, saves the trainer for future use.
133 // Computes canonical and cloud features.
134 // If shape_table is not nullptr, but failed to load, make a fake flat one,
135 // as shape clustering was not run.
136 MasterTrainer* LoadTrainingData(int argc, const char* const * argv,
137  bool replication,
138  ShapeTable** shape_table,
139  STRING* file_prefix);
140 } // namespace tesseract.
141 
142 const char *GetNextFilename(int argc, const char* const * argv);
143 
145  LIST List,
146  char *Label);
147 
149  const char *Label);
150 
152  const char *feature_name, int max_samples,
153  UNICHARSET* unicharset,
154  FILE* file, LIST* training_samples);
155 
157  const FEATURE_DEFS_STRUCT &FeatureDefs,
158  char *Directory,
159  LIST CharList,
160  const char *program_feature_type);
161 
163  LIST CharList);
164 
165 void FreeLabeledList(
166  LABELEDLIST LabeledList);
167 
169  LIST ClassListList);
170 
172  const FEATURE_DEFS_STRUCT &FeatureDefs,
173  LABELEDLIST CharSample,
174  const char *program_feature_type);
175 
177  LIST ProtoList,
178  bool KeepSigProtos,
179  bool KeepInsigProtos,
180  int N);
181 
182 void CleanUpUnusedData(
183  LIST ProtoList);
184 
186  LIST ProtoList,
187  const char *label,
188  CLUSTERER *Clusterer,
190 
192  LIST List,
193  const char *Label);
194 
196  const char *Label);
197 
199  LIST CharList);
200 
201 CLASS_STRUCT* SetUpForFloat2Int(const UNICHARSET& unicharset,
202  LIST LabeledClassList);
203 
204 void Normalize(
205  float *Values);
206 
207 void FreeNormProtoList(
208  LIST CharList);
209 
211  LIST* NormProtoList,
212  LIST ProtoList,
213  char *CharName);
214 
215 int NumberOfProtos(
216  LIST ProtoList,
217  bool CountSigProtos,
218  bool CountInsigProtos);
219 
220 
222 
223 #endif // def DISABLED_LEGACY_ENGINE
224 
225 #endif // TESSERACT_TRAINING_COMMONTRAINING_H_
#define TESSERACT_VERSION_STR
Definition: tess_version.h:28
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
#define MAX_NUM_PROTOS
Definition: intproto.h:48
void WriteTrainingSamples(const FEATURE_DEFS_STRUCT &FeatureDefs, char *Directory, LIST CharList, const char *program_feature_type)
CLUSTERCONFIG Config
void AddToNormProtosList(LIST *NormProtoList, LIST ProtoList, char *CharName)
int NumberOfProtos(LIST ProtoList, bool CountSigProtos, bool CountInsigProtos)
void FreeTrainingSamples(LIST CharList)
void FreeLabeledList(LABELEDLIST LabeledList)
void ParseArguments(int *argc, char ***argv)
void CleanUpUnusedData(LIST ProtoList)
void allocNormProtos()
void FreeNormProtoList(LIST CharList)
LABELEDLIST FindList(LIST List, char *Label)
FEATURE_DEFS_STRUCT feature_defs
LABELEDLIST NewLabeledList(const char *Label)
void ReadTrainingSamples(const FEATURE_DEFS_STRUCT &feature_defs, const char *feature_name, int max_samples, UNICHARSET *unicharset, FILE *file, LIST *training_samples)
void Normalize(float *Values)
LIST RemoveInsignificantProtos(LIST ProtoList, bool KeepSigProtos, bool KeepInsigProtos, int N)
void MergeInsignificantProtos(LIST ProtoList, const char *label, CLUSTERER *Clusterer, CLUSTERCONFIG *Config)
MERGE_CLASS FindClass(LIST List, const char *Label)
const char * GetNextFilename(int argc, const char *const *argv)
CLUSTERER * SetUpForClustering(const FEATURE_DEFS_STRUCT &FeatureDefs, LABELEDLIST CharSample, const char *program_feature_type)
MERGE_CLASS NewLabeledClass(const char *Label)
void FreeLabeledClassList(LIST ClassListList)
CLASS_STRUCT * SetUpForFloat2Int(const UNICHARSET &unicharset, LIST LabeledClassList)
struct LABELEDLISTNODE * LABELEDLIST
ShapeTable * LoadShapeTable(const STRING &file_prefix)
MasterTrainer * LoadTrainingData(int argc, const char *const *argv, bool replication, ShapeTable **shape_table, STRING *file_prefix)
void WriteShapeTable(const STRING &file_prefix, const ShapeTable &shape_table)
static const char * Version()
Definition: baseapi.cpp:233
Definition: strngs.h:45
CLASS_TYPE Class