#include <mastertrainer.h>
|
| MasterTrainer (NormalizationMode norm_mode, bool shape_analysis, bool replicate_samples, int debug_level) |
|
| ~MasterTrainer () |
|
bool | Serialize (FILE *fp) const |
|
void | LoadUnicharset (const char *filename) |
|
void | SetFeatureSpace (const IntFeatureSpace &fs) |
|
void | ReadTrainingSamples (const char *page_name, const FEATURE_DEFS_STRUCT &feature_defs, bool verification) |
|
void | AddSample (bool verification, const char *unichar_str, TrainingSample *sample) |
|
void | LoadPageImages (const char *filename) |
|
void | PostLoadCleanup () |
|
void | PreTrainingSetup () |
|
void | SetupMasterShapes () |
|
void | IncludeJunk () |
|
void | ReplicateAndRandomizeSamplesIfRequired () |
|
bool | LoadFontInfo (const char *filename) |
|
bool | LoadXHeights (const char *filename) |
|
bool | AddSpacingInfo (const char *filename) |
|
int | GetFontInfoId (const char *font_name) |
|
int | GetBestMatchingFontInfoId (const char *filename) |
|
const STRING & | GetTRFileName (int index) const |
|
void | SetupFlatShapeTable (ShapeTable *shape_table) |
|
CLUSTERER * | SetupForClustering (const ShapeTable &shape_table, const FEATURE_DEFS_STRUCT &feature_defs, int shape_id, int *num_samples) |
|
void | WriteInttempAndPFFMTable (const UNICHARSET &unicharset, const UNICHARSET &shape_set, const ShapeTable &shape_table, CLASS_STRUCT *float_classes, const char *inttemp_file, const char *pffmtable_file) |
|
const UNICHARSET & | unicharset () const |
|
TrainingSampleSet * | GetSamples () |
|
const ShapeTable & | master_shapes () const |
|
void | DebugCanonical (const char *unichar_str1, const char *unichar_str2) |
|
void | DisplaySamples (const char *unichar_str1, int cloud_font, const char *unichar_str2, int canonical_font) |
|
void | TestClassifierVOld (bool replicate_samples, ShapeClassifier *test_classifier, ShapeClassifier *old_classifier) |
|
void | TestClassifierOnSamples (CountTypes error_mode, int report_level, bool replicate_samples, ShapeClassifier *test_classifier, STRING *report_string) |
|
double | TestClassifier (CountTypes error_mode, int report_level, bool replicate_samples, TrainingSampleSet *samples, ShapeClassifier *test_classifier, STRING *report_string) |
|
float | ShapeDistance (const ShapeTable &shapes, int s1, int s2) |
|
Definition at line 69 of file mastertrainer.h.
◆ MasterTrainer()
tesseract::MasterTrainer::MasterTrainer |
( |
NormalizationMode |
norm_mode, |
|
|
bool |
shape_analysis, |
|
|
bool |
replicate_samples, |
|
|
int |
debug_level |
|
) |
| |
Definition at line 51 of file mastertrainer.cpp.
55 : norm_mode_(norm_mode), samples_(fontinfo_table_),
56 junk_samples_(fontinfo_table_), verify_samples_(fontinfo_table_),
58 enable_shape_analysis_(shape_analysis),
59 enable_replication_(replicate_samples),
60 fragments_(
nullptr), prev_unichar_id_(-1), debug_level_(debug_level) {
◆ ~MasterTrainer()
tesseract::MasterTrainer::~MasterTrainer |
( |
| ) |
|
Definition at line 63 of file mastertrainer.cpp.
65 for (
int p = 0; p < page_images_.
size(); ++p)
66 pixDestroy(&page_images_[p]);
◆ AddSample()
void tesseract::MasterTrainer::AddSample |
( |
bool |
verification, |
|
|
const char * |
unichar_str, |
|
|
TrainingSample * |
sample |
|
) |
| |
Definition at line 163 of file mastertrainer.cpp.
167 prev_unichar_id_ = -1;
169 if (prev_unichar_id_ >= 0)
170 fragments_[prev_unichar_id_] = -1;
176 if (prev_unichar_id_ >= 0) {
179 if (fragments_[prev_unichar_id_] == 0)
180 fragments_[prev_unichar_id_] = junk_id;
181 else if (fragments_[prev_unichar_id_] != junk_id)
182 fragments_[prev_unichar_id_] = -1;
186 prev_unichar_id_ = -1;
◆ AddSpacingInfo()
bool tesseract::MasterTrainer::AddSpacingInfo |
( |
const char * |
filename | ) |
|
Definition at line 412 of file mastertrainer.cpp.
413 FILE* fontinfo_file = fopen(filename,
"rb");
414 if (fontinfo_file ==
nullptr)
418 if (fontinfo_id < 0) {
419 tprintf(
"No font found matching fontinfo filename %s\n", filename);
420 fclose(fontinfo_file);
423 tprintf(
"Reading spacing from %s for font %d...\n", filename, fontinfo_id);
430 int x_gap, x_gap_before, x_gap_after, num_kerned;
432 FontInfo *fi = &fontinfo_table_.
get(fontinfo_id);
433 fi->init_spacing(unicharset_.
size());
434 FontSpacingInfo *spacing =
nullptr;
435 for (
int l = 0; l < num_unichars; ++l) {
436 if (
tfscanf(fontinfo_file,
"%s %d %d %d",
437 uch, &x_gap_before, &x_gap_after, &num_kerned) != 4) {
438 tprintf(
"Bad format of font spacing file %s\n", filename);
439 fclose(fontinfo_file);
444 spacing =
new FontSpacingInfo();
445 spacing->x_gap_before =
static_cast<int16_t
>(x_gap_before * scale);
446 spacing->x_gap_after =
static_cast<int16_t
>(x_gap_after * scale);
448 for (
int k = 0; k < num_kerned; ++k) {
449 if (
tfscanf(fontinfo_file,
"%s %d", kerned_uch, &x_gap) != 2) {
450 tprintf(
"Bad format of font spacing file %s\n", filename);
451 fclose(fontinfo_file);
456 spacing->kerned_unichar_ids.push_back(
458 spacing->kerned_x_gaps.push_back(
static_cast<int16_t
>(x_gap * scale));
460 if (valid) fi->add_spacing(unicharset_.
unichar_to_id(uch), spacing);
462 fclose(fontinfo_file);
◆ DebugCanonical()
void tesseract::MasterTrainer::DebugCanonical |
( |
const char * |
unichar_str1, |
|
|
const char * |
unichar_str2 |
|
) |
| |
Definition at line 636 of file mastertrainer.cpp.
640 if (class_id2 == INVALID_UNICHAR_ID)
641 class_id2 = class_id1;
642 if (class_id1 == INVALID_UNICHAR_ID) {
643 tprintf(
"No unicharset entry found for %s\n", unichar_str1);
646 tprintf(
"Font ambiguities for unichar %d = %s and %d = %s\n",
647 class_id1, unichar_str1, class_id2, unichar_str2);
649 int num_fonts = samples_.
NumFonts();
650 const IntFeatureMap& feature_map = feature_map_;
654 for (
int f = 0; f < num_fonts; ++f) {
660 for (
int f1 = 0; f1 < num_fonts; ++f1) {
665 for (
int f2 = 0; f2 < num_fonts; ++f2) {
675 ShapeTable shapes(unicharset_);
676 for (
int f = 0; f < num_fonts; ++f) {
678 shapes.AddShape(class_id1, f);
679 if (class_id1 != class_id2 &&
681 shapes.AddShape(class_id2, f);
◆ DisplaySamples()
void tesseract::MasterTrainer::DisplaySamples |
( |
const char * |
unichar_str1, |
|
|
int |
cloud_font, |
|
|
const char * |
unichar_str2, |
|
|
int |
canonical_font |
|
) |
| |
Definition at line 696 of file mastertrainer.cpp.
699 const IntFeatureMap& feature_map = feature_map_;
700 const IntFeatureSpace& feature_space = feature_map.
feature_space();
705 if (class_id2 != INVALID_UNICHAR_ID && canonical_font >= 0) {
708 for (uint32_t f = 0; f <
sample->num_features(); ++f) {
713 if (class_id1 != INVALID_UNICHAR_ID && cloud_font >= 0) {
715 for (
int f = 0; f < cloud.size(); ++f) {
718 feature_map.InverseIndexFeature(f);
732 int feature_index = feature_space.XYToFeatureIndex(ev->
x, ev->
y);
733 if (feature_index >= 0) {
736 shape.AddToShape(class_id1, cloud_font);
◆ GetBestMatchingFontInfoId()
int tesseract::MasterTrainer::GetBestMatchingFontInfoId |
( |
const char * |
filename | ) |
|
Definition at line 479 of file mastertrainer.cpp.
480 int fontinfo_id = -1;
482 for (
int f = 0; f < fontinfo_table_.
size(); ++f) {
483 if (strstr(filename, fontinfo_table_.
get(f).name) !=
nullptr) {
484 int len = strlen(fontinfo_table_.
get(f).name);
486 if (len > best_len) {
◆ GetFontInfoId()
int tesseract::MasterTrainer::GetFontInfoId |
( |
const char * |
font_name | ) |
|
Definition at line 468 of file mastertrainer.cpp.
471 fontinfo.name =
const_cast<char*
>(font_name);
472 fontinfo.properties = 0;
473 fontinfo.universal_id = 0;
474 return fontinfo_table_.
get_index(fontinfo);
◆ GetSamples()
◆ GetTRFileName()
const STRING& tesseract::MasterTrainer::GetTRFileName |
( |
int |
index | ) |
const |
|
inline |
◆ IncludeJunk()
void tesseract::MasterTrainer::IncludeJunk |
( |
| ) |
|
Definition at line 295 of file mastertrainer.cpp.
300 tprintf(
"Moving %d junk samples to master sample set.\n", num_junks);
301 for (
int s = 0; s < num_junks; ++s) {
303 int junk_id =
sample->class_id();
306 if (sample_id == INVALID_UNICHAR_ID)
308 sample->set_class_id(sample_id);
◆ LoadFontInfo()
bool tesseract::MasterTrainer::LoadFontInfo |
( |
const char * |
filename | ) |
|
Definition at line 333 of file mastertrainer.cpp.
334 FILE* fp = fopen(filename,
"rb");
336 fprintf(stderr,
"Failed to load font_properties from %s\n", filename);
339 int italic, bold, fixed, serif, fraktur;
342 char* font_name =
new char[1024];
343 fontinfo.name = font_name;
344 fontinfo.properties = 0;
345 fontinfo.universal_id = 0;
346 if (
tfscanf(fp,
"%1024s %i %i %i %i %i\n", font_name, &italic, &bold,
347 &fixed, &serif, &fraktur) != 6) {
351 fontinfo.properties =
357 if (!fontinfo_table_.
contains(fontinfo)) {
◆ LoadPageImages()
void tesseract::MasterTrainer::LoadPageImages |
( |
const char * |
filename | ) |
|
Definition at line 193 of file mastertrainer.cpp.
197 for (page = 0;; page++) {
198 pix = pixReadFromMultipageTiff(filename, &offset);
203 tprintf(
"Loaded %d page images from %s\n", page, filename);
◆ LoadUnicharset()
void tesseract::MasterTrainer::LoadUnicharset |
( |
const char * |
filename | ) |
|
Definition at line 88 of file mastertrainer.cpp.
90 tprintf(
"Failed to load unicharset from file %s\n"
91 "Building unicharset for training from scratch...\n",
99 charsetsize_ = unicharset_.
size();
100 delete [] fragments_;
101 fragments_ =
new int[charsetsize_];
102 memset(fragments_, 0,
sizeof(*fragments_) * charsetsize_);
◆ LoadXHeights()
bool tesseract::MasterTrainer::LoadXHeights |
( |
const char * |
filename | ) |
|
Definition at line 369 of file mastertrainer.cpp.
370 tprintf(
"fontinfo table is of size %d\n", fontinfo_table_.
size());
372 if (filename ==
nullptr)
return true;
373 FILE *f = fopen(filename,
"rb");
375 fprintf(stderr,
"Failed to load font xheights from %s\n", filename);
378 tprintf(
"Reading x-heights from %s ...\n", filename);
380 fontinfo.properties = 0;
381 fontinfo.universal_id = 0;
384 int total_xheight = 0;
385 int xheight_count = 0;
387 if (
tfscanf(f,
"%1023s %d\n", buffer, &xht) != 2)
390 fontinfo.name = buffer;
391 if (!fontinfo_table_.
contains(fontinfo))
continue;
392 int fontinfo_id = fontinfo_table_.
get_index(fontinfo);
393 xheights_[fontinfo_id] = xht;
394 total_xheight += xht;
397 if (xheight_count == 0) {
398 fprintf(stderr,
"No valid xheights in %s!\n", filename);
402 int mean_xheight =
DivRounded(total_xheight, xheight_count);
403 for (
int i = 0; i < fontinfo_table_.
size(); ++i) {
404 if (xheights_[i] < 0)
405 xheights_[i] = mean_xheight;
◆ master_shapes()
const ShapeTable& tesseract::MasterTrainer::master_shapes |
( |
| ) |
const |
|
inline |
◆ PostLoadCleanup()
void tesseract::MasterTrainer::PostLoadCleanup |
( |
| ) |
|
Definition at line 211 of file mastertrainer.cpp.
212 if (debug_level_ > 0)
213 tprintf(
"PostLoadCleanup...\n");
214 if (enable_shape_analysis_)
215 ReplaceFragmentedSamples();
216 SampleIterator sample_it;
217 sample_it.Init(
nullptr,
nullptr,
true, &verify_samples_);
218 sample_it.NormalizeSamples();
226 if (debug_level_ > 0)
227 tprintf(
"ComputeCanonicalSamples...\n");
◆ PreTrainingSetup()
void tesseract::MasterTrainer::PreTrainingSetup |
( |
| ) |
|
Definition at line 234 of file mastertrainer.cpp.
235 if (debug_level_ > 0)
236 tprintf(
"PreTrainingSetup...\n");
239 if (debug_level_ > 0)
240 tprintf(
"ComputeCloudFeatures...\n");
◆ ReadTrainingSamples()
void tesseract::MasterTrainer::ReadTrainingSamples |
( |
const char * |
page_name, |
|
|
const FEATURE_DEFS_STRUCT & |
feature_defs, |
|
|
bool |
verification |
|
) |
| |
Definition at line 112 of file mastertrainer.cpp.
122 FILE* fp = fopen(page_name,
"rb");
124 tprintf(
"Failed to open tr file: %s\n", page_name);
128 while (fgets(buffer,
sizeof(buffer), fp) !=
nullptr) {
129 if (buffer[0] ==
'\n')
132 char* space = strchr(buffer,
' ');
133 if (space ==
nullptr) {
134 tprintf(
"Bad format in tr file, reading fontname, unichar\n");
139 if (font_id < 0) font_id = 0;
144 tprintf(
"Bad format in tr file, reading box coords\n");
148 auto*
sample =
new TrainingSample;
149 sample->set_font_id(font_id);
150 sample->set_page_num(page_number + page_images_.
size());
151 sample->set_bounding_box(bounding_box);
152 sample->ExtractCharDesc(int_feature_type, micro_feature_type,
153 cn_feature_type, geo_feature_type, char_desc);
157 charsetsize_ = unicharset_.
size();
◆ ReplicateAndRandomizeSamplesIfRequired()
void tesseract::MasterTrainer::ReplicateAndRandomizeSamplesIfRequired |
( |
| ) |
|
Definition at line 321 of file mastertrainer.cpp.
322 if (enable_replication_) {
323 if (debug_level_ > 0)
324 tprintf(
"ReplicateAndRandomize...\n");
◆ Serialize()
bool tesseract::MasterTrainer::Serialize |
( |
FILE * |
fp | ) |
const |
Definition at line 72 of file mastertrainer.cpp.
73 uint32_t value = norm_mode_;
76 if (!feature_space_.
Serialize(fp))
return false;
77 if (!samples_.
Serialize(fp))
return false;
78 if (!junk_samples_.
Serialize(fp))
return false;
79 if (!verify_samples_.
Serialize(fp))
return false;
80 if (!master_shapes_.
Serialize(fp))
return false;
81 if (!flat_shapes_.
Serialize(fp))
return false;
82 if (!fontinfo_table_.
Serialize(fp))
return false;
83 if (!xheights_.
Serialize(fp))
return false;
◆ SetFeatureSpace()
void tesseract::MasterTrainer::SetFeatureSpace |
( |
const IntFeatureSpace & |
fs | ) |
|
|
inline |
◆ SetupFlatShapeTable()
void tesseract::MasterTrainer::SetupFlatShapeTable |
( |
ShapeTable * |
shape_table | ) |
|
Definition at line 496 of file mastertrainer.cpp.
502 int num_shapes = flat_shapes_.
NumShapes();
503 for (
int s = 0; s < num_shapes; ++s) {
504 int font = flat_shapes_.
GetShape(s)[0].font_ids[0];
506 for (f = 0; f < active_fonts.
size(); ++f) {
507 if (active_fonts[f] == font)
510 if (f == active_fonts.
size())
514 int num_fonts = active_fonts.
size();
515 for (
int f = 0; f < num_fonts; ++f) {
516 for (
int s = num_shapes - 1; s >= 0; --s) {
517 int font = flat_shapes_.
GetShape(s)[0].font_ids[0];
518 if (font == active_fonts[f]) {
◆ SetupForClustering()
Definition at line 527 of file mastertrainer.cpp.
540 IndexMapBiDi shape_map;
541 shape_map.Init(shape_table.NumShapes(),
false);
542 shape_map.SetMap(shape_id,
true);
547 it.Init(&shape_map, &shape_table,
false, &samples_);
548 for (it.Begin(); !it.AtEnd(); it.Next()) {
552 for (
int i = sample_ptrs.
size() - 1; i >= 0; --i) {
553 const TrainingSample*
sample = sample_ptrs[i];
554 uint32_t num_features =
sample->num_micro_features();
555 for (uint32_t f = 0; f < num_features; ++f)
559 *num_samples = sample_id;
◆ SetupMasterShapes()
void tesseract::MasterTrainer::SetupMasterShapes |
( |
| ) |
|
Definition at line 246 of file mastertrainer.cpp.
247 tprintf(
"Building master shape table\n");
248 const int num_fonts = samples_.
NumFonts();
250 ShapeTable char_shapes_begin_fragment(samples_.
unicharset());
251 ShapeTable char_shapes_end_fragment(samples_.
unicharset());
252 ShapeTable char_shapes(samples_.
unicharset());
255 for (
int f = 0; f < num_fonts; ++f) {
257 shapes.AddShape(c, f);
263 if (fragment ==
nullptr)
264 char_shapes.AppendMasterShapes(shapes,
nullptr);
266 char_shapes_begin_fragment.AppendMasterShapes(shapes,
nullptr);
268 char_shapes_end_fragment.AppendMasterShapes(shapes,
nullptr);
270 char_shapes.AppendMasterShapes(shapes,
nullptr);
274 char_shapes.AppendMasterShapes(char_shapes_begin_fragment,
nullptr);
277 char_shapes.AppendMasterShapes(char_shapes_end_fragment,
nullptr);
◆ ShapeDistance()
float tesseract::MasterTrainer::ShapeDistance |
( |
const ShapeTable & |
shapes, |
|
|
int |
s1, |
|
|
int |
s2 |
|
) |
| |
Definition at line 810 of file mastertrainer.cpp.
811 const IntFeatureMap& feature_map = feature_map_;
812 const Shape& shape1 = shapes.GetShape(s1);
813 const Shape& shape2 = shapes.GetShape(s2);
814 int num_chars1 = shape1.size();
815 int num_chars2 = shape2.size();
816 float dist_sum = 0.0f;
818 if (num_chars1 > 1 || num_chars2 > 1) {
821 for (
int c1 = 0; c1 < num_chars1; ++c1) {
822 for (
int c2 = 0; c2 < num_chars2; ++c2) {
835 return dist_sum / dist_count;
◆ TestClassifier()
Definition at line 783 of file mastertrainer.cpp.
789 SampleIterator sample_it;
790 sample_it.Init(
nullptr,
nullptr, replicate_samples, samples);
791 if (report_level > 0) {
793 for (sample_it.Begin(); !sample_it.AtEnd(); sample_it.Next())
795 tprintf(
"Iterator has charset size of %d/%d, %d shapes, %d samples\n",
796 sample_it.SparseCharsetSize(), sample_it.CompactCharsetSize(),
797 test_classifier->GetShapeTable()->NumShapes(), num_samples);
798 tprintf(
"Testing %sREPLICATED:\n", replicate_samples ?
"" :
"NON-");
800 double unichar_error = 0.0;
802 error_mode, fontinfo_table_,
803 page_images_, &sample_it, &unichar_error,
804 nullptr, report_string);
805 return unichar_error;
◆ TestClassifierOnSamples()
void tesseract::MasterTrainer::TestClassifierOnSamples |
( |
CountTypes |
error_mode, |
|
|
int |
report_level, |
|
|
bool |
replicate_samples, |
|
|
ShapeClassifier * |
test_classifier, |
|
|
STRING * |
report_string |
|
) |
| |
◆ TestClassifierVOld()
Definition at line 749 of file mastertrainer.cpp.
752 SampleIterator sample_it;
753 sample_it.Init(
nullptr,
nullptr, replicate_samples, &samples_);
756 page_images_, &sample_it);
◆ unicharset()
const UNICHARSET& tesseract::MasterTrainer::unicharset |
( |
| ) |
const |
|
inline |
◆ WriteInttempAndPFFMTable()
void tesseract::MasterTrainer::WriteInttempAndPFFMTable |
( |
const UNICHARSET & |
unicharset, |
|
|
const UNICHARSET & |
shape_set, |
|
|
const ShapeTable & |
shape_table, |
|
|
CLASS_STRUCT * |
float_classes, |
|
|
const char * |
inttemp_file, |
|
|
const char * |
pffmtable_file |
|
) |
| |
Definition at line 567 of file mastertrainer.cpp.
575 fontinfo_table_.
MoveTo(&classify->get_fontinfo_table());
576 INT_TEMPLATES int_templates = classify->CreateIntTemplates(float_classes,
578 FILE* fp = fopen(inttemp_file,
"wb");
580 tprintf(
"Error, failed to open file \"%s\"\n", inttemp_file);
582 classify->WriteIntTemplates(fp, int_templates, shape_set);
595 for (
int i = 0; i < int_templates->
NumClasses; ++i) {
599 uint16_t max_length = 0;
600 for (
int config_id = 0; config_id < Class->
NumConfigs; config_id++) {
604 if (length > max_length)
606 int shape_id = float_classes[i].
font_set.
get(config_id);
607 const Shape& shape = shape_table.GetShape(shape_id);
608 for (
int c = 0; c < shape.size(); ++c) {
609 int unichar_id = shape[c].unichar_id;
610 if (length > unichar_cutoffs[unichar_id])
611 unichar_cutoffs[unichar_id] = length;
614 shapetable_cutoffs.
push_back(max_length);
616 fp = fopen(pffmtable_file,
"wb");
618 tprintf(
"Error, failed to open file \"%s\"\n", pffmtable_file);
623 if (strcmp(unichar,
" ") == 0) {
626 fprintf(fp,
"%s %d\n", unichar, unichar_cutoffs[c]);
The documentation for this class was generated from the following files:
void init_to_size(int size, const T &t)
int GetFontInfoId(const char *font_name)
uint16_t ConfigLengths[MAX_NUM_CONFIGS]
CLUSTERER * MakeClusterer(int16_t SampleSize, const PARAM_DESC ParamDesc[])
void AddSample(bool verification, const char *unichar_str, TrainingSample *sample)
const UNICHARSET & unicharset() const
const char * string() const
float ClusterDistance(int font_id1, int class_id1, int font_id2, int class_id2, const IntFeatureMap &feature_map)
void ReplicateAndRandomizeSamples()
static double ComputeErrorRate(ShapeClassifier *classifier, int report_level, CountTypes boosting_mode, const FontInfoTable &fontinfo_table, const GenericVector< Pix * > &page_images, SampleIterator *it, double *unichar_error, double *scaled_error, STRING *fonts_report)
int tfscanf(FILE *stream, const char *format,...)
const IntFeatureSpace & feature_space() const
void AddShape(const Shape &other)
const char * id_to_unichar(UNICHAR_ID id) const
void FreeCharDescription(CHAR_DESC CharDesc)
void OrganizeByFontAndClass()
const char *const kGeoFeatureType
int FindShape(int unichar_id, int font_id) const
SVEvent * AwaitEvent(SVEventType type)
static CHAR_FRAGMENT * parse_from_string(const char *str)
bool Serialize(FILE *fp) const
void IndexFeatures(const IntFeatureSpace &feature_space)
int NumClassSamples(int font_id, int class_id, bool randomize) const
const int kMinClusteredShapes
static void DebugNewErrors(ShapeClassifier *new_classifier, ShapeClassifier *old_classifier, CountTypes boosting_mode, const FontInfoTable &fontinfo_table, const GenericVector< Pix * > &page_images, SampleIterator *it)
bool ParseBoxFileStr(const char *boxfile_str, int *page_number, STRING *utf8_str, TBOX *bounding_box)
double TestClassifier(CountTypes error_mode, int report_level, bool replicate_samples, TrainingSampleSet *samples, ShapeClassifier *test_classifier, STRING *report_string)
DLLSYM void tprintf(const char *format,...)
const T & get(int id) const
Return the object from an id.
int GetBestMatchingFontInfoId(const char *filename)
const BitVector & GetCloudFeatures(int font_id, int class_id) const
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
bool contains_unichar(const char *const unichar_repr) const
TrainingSample * mutable_sample(int index)
void Init(const IntFeatureSpace &feature_space)
int DivRounded(int a, int b)
void MoveTo(UnicityTable< FontInfo > *target)
SAMPLE * MakeSample(CLUSTERER *Clusterer, const float *Feature, int32_t CharID)
void ComputeCanonicalFeatures()
bool Serialize(FILE *fp, const char *data, size_t n)
UnicityTableEqEq< int > font_set
bool Serialize(FILE *fp) const
bool Serialize(FILE *fp) const
TrainingSample * extract_sample(int index)
void ComputeCloudFeatures(int feature_space_size)
bool Serialize(FILE *fp) const
const float kFontMergeDistance
bool Serialize(FILE *fp) const
STRING SummaryStr() const
ScrollView * CreateFeatureSpaceWindow(const char *name, int xpos, int ypos)
void ComputeCanonicalSamples(const IntFeatureMap &map, bool debug)
FEATURE_DEFS_STRUCT feature_defs
uint32_t ShortNameToFeatureType(const FEATURE_DEFS_STRUCT &FeatureDefs, const char *ShortName)
const FEATURE_DESC_STRUCT * FeatureDesc[NUM_FEATURE_TYPES]
const CHAR_FRAGMENT * get_fragment(UNICHAR_ID unichar_id) const
const int kMaxUnicharsPerCluster
const Shape & GetShape(int shape_id) const
float UnicharDistance(const UnicharAndFonts &uf1, const UnicharAndFonts &uf2, bool matched_fonts, const IntFeatureMap &feature_map)
const char *const kIntFeatureType
bool save_to_file(const char *const filename) const
void DisplaySamplesWithFeature(int f_index, const Shape &shape, const IntFeatureSpace &feature_space, ScrollView::Color color, ScrollView *window) const
bool load_from_file(const char *const filename, bool skip_fragments)
int AddSample(const char *unichar, TrainingSample *sample)
void AppendMasterShapes(const ShapeTable &other, GenericVector< int > *shape_map)
void free_int_templates(INT_TEMPLATES templates)
bool contains(const T &object) const
void AppendOtherUnicharset(const UNICHARSET &src)
void LoadUnicharset(const char *filename)
const char *const kMicroFeatureType
#define ClassForClassId(T, c)
const TrainingSample * GetCanonicalSample(int font_id, int class_id) const
void ClearFeatureSpaceWindow(NORM_METHOD norm_method, ScrollView *window)
void RenderIntFeature(ScrollView *window, const INT_FEATURE_STRUCT *Feature, ScrollView::Color color)
CHAR_DESC ReadCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, FILE *File)
const PARAM_DESC * ParamDesc
int AddShape(int unichar_id, int font_id)
int get_index(const T &object) const
const char *const kCNFeatureType
bool is_beginning() const
const UNICHARSET & unicharset() const