#include <lstmrecognizer.h>
|
| LSTMRecognizer () |
|
| LSTMRecognizer (const STRING language_data_path_prefix) |
|
| ~LSTMRecognizer () |
|
int | NumOutputs () const |
|
int | training_iteration () const |
|
int | sample_iteration () const |
|
double | learning_rate () const |
|
LossType | OutputLossType () const |
|
bool | SimpleTextOutput () const |
|
bool | IsIntMode () const |
|
bool | IsRecoding () const |
|
bool | IsTensorFlow () const |
|
GenericVector< STRING > | EnumerateLayers () const |
|
Network * | GetLayer (const STRING &id) const |
|
float | GetLayerLearningRate (const STRING &id) const |
|
void | ScaleLearningRate (double factor) |
|
void | ScaleLayerLearningRate (const STRING &id, double factor) |
|
void | ConvertToInt () |
|
const UNICHARSET & | GetUnicharset () const |
|
const UnicharCompress & | GetRecoder () const |
|
const Dict * | GetDict () const |
|
void | SetIteration (int iteration) |
|
int | NumInputs () const |
|
int | null_char () const |
|
bool | Load (const ParamsVectors *params, const char *lang, TessdataManager *mgr) |
|
bool | Serialize (const TessdataManager *mgr, TFile *fp) const |
|
bool | DeSerialize (const TessdataManager *mgr, TFile *fp) |
|
bool | LoadCharsets (const TessdataManager *mgr) |
|
bool | LoadRecoder (TFile *fp) |
|
bool | LoadDictionary (const ParamsVectors *params, const char *lang, TessdataManager *mgr) |
|
void | RecognizeLine (const ImageData &image_data, bool invert, bool debug, double worst_dict_cert, const TBOX &line_box, PointerVector< WERD_RES > *words, int lstm_choice_mode=0) |
|
void | OutputStats (const NetworkIO &outputs, float *min_output, float *mean_output, float *sd) |
|
bool | RecognizeLine (const ImageData &image_data, bool invert, bool debug, bool re_invert, bool upside_down, float *scale_factor, NetworkIO *inputs, NetworkIO *outputs) |
|
STRING | DecodeLabels (const GenericVector< int > &labels) |
|
void | DisplayForward (const NetworkIO &inputs, const GenericVector< int > &labels, const GenericVector< int > &label_coords, const char *window_name, ScrollView **window) |
|
void | LabelsFromOutputs (const NetworkIO &outputs, GenericVector< int > *labels, GenericVector< int > *xcoords) |
|
|
void | SetRandomSeed () |
|
void | DisplayLSTMOutput (const GenericVector< int > &labels, const GenericVector< int > &xcoords, int height, ScrollView *window) |
|
void | DebugActivationPath (const NetworkIO &outputs, const GenericVector< int > &labels, const GenericVector< int > &xcoords) |
|
void | DebugActivationRange (const NetworkIO &outputs, const char *label, int best_choice, int x_start, int x_end) |
|
void | LabelsViaReEncode (const NetworkIO &output, GenericVector< int > *labels, GenericVector< int > *xcoords) |
|
void | LabelsViaSimpleText (const NetworkIO &output, GenericVector< int > *labels, GenericVector< int > *xcoords) |
|
const char * | DecodeLabel (const GenericVector< int > &labels, int start, int *end, int *decoded) |
|
const char * | DecodeSingleLabel (int label) |
|
Definition at line 54 of file lstmrecognizer.h.
◆ LSTMRecognizer() [1/2]
tesseract::LSTMRecognizer::LSTMRecognizer |
( |
| ) |
|
◆ LSTMRecognizer() [2/2]
tesseract::LSTMRecognizer::LSTMRecognizer |
( |
const STRING |
language_data_path_prefix | ) |
|
◆ ~LSTMRecognizer()
tesseract::LSTMRecognizer::~LSTMRecognizer |
( |
| ) |
|
◆ ConvertToInt()
void tesseract::LSTMRecognizer::ConvertToInt |
( |
| ) |
|
|
inline |
◆ DebugActivationPath()
Definition at line 360 of file lstmrecognizer.cpp.
366 for (
int start = 0; start < labels.
size(); start = end) {
374 const char* label =
DecodeLabel(labels, start, &end, &decoded);
377 for (
int i = start + 1; i < end; ++i) {
379 xcoords[i], xcoords[i + 1]);
◆ DebugActivationRange()
void tesseract::LSTMRecognizer::DebugActivationRange |
( |
const NetworkIO & |
outputs, |
|
|
const char * |
label, |
|
|
int |
best_choice, |
|
|
int |
x_start, |
|
|
int |
x_end |
|
) |
| |
|
protected |
Definition at line 387 of file lstmrecognizer.cpp.
390 tprintf(
"%s=%d On [%d, %d), scores=", label, best_choice, x_start, x_end);
391 double max_score = 0.0;
392 double mean_score = 0.0;
393 const int width = x_end - x_start;
394 for (
int x = x_start; x < x_end; ++x) {
395 const float* line = outputs.f(x);
396 const double score = line[best_choice] * 100.0;
397 if (score > max_score) max_score = score;
398 mean_score += score / width;
400 double best_score = 0.0;
401 for (
int c = 0; c < outputs.NumFeatures(); ++c) {
402 if (c != best_choice && line[c] > best_score) {
404 best_score = line[c];
410 tprintf(
", Mean=%g, max=%g\n", mean_score, max_score);
◆ DecodeLabel()
const char * tesseract::LSTMRecognizer::DecodeLabel |
( |
const GenericVector< int > & |
labels, |
|
|
int |
start, |
|
|
int * |
end, |
|
|
int * |
decoded |
|
) |
| |
|
protected |
Definition at line 475 of file lstmrecognizer.cpp.
482 if (decoded !=
nullptr) {
489 while (index < labels.
size() &&
491 code.Set(code.length(), labels[index++]);
492 while (index < labels.
size() && labels[index] ==
null_char_) ++index;
496 if (uni_id != INVALID_UNICHAR_ID &&
497 (index == labels.
size() ||
501 if (decoded !=
nullptr) *decoded = uni_id;
506 return "<Undecodable>";
508 if (decoded !=
nullptr) *decoded = labels[start];
509 if (labels[start] ==
null_char_)
return "<null>";
◆ DecodeLabels()
◆ DecodeSingleLabel()
const char * tesseract::LSTMRecognizer::DecodeSingleLabel |
( |
int |
label | ) |
|
|
protected |
◆ DeSerialize()
Definition at line 105 of file lstmrecognizer.cpp.
108 if (
network_ ==
nullptr)
return false;
109 bool include_charsets = mgr ==
nullptr ||
118 if (!fp->DeSerialize(&
null_char_))
return false;
119 if (!fp->DeSerialize(&
adam_beta_))
return false;
121 if (!fp->DeSerialize(&
momentum_))
return false;
122 if (include_charsets && !
LoadRecoder(fp))
return false;
123 if (!include_charsets && !
LoadCharsets(mgr))
return false;
◆ DisplayForward()
Definition at line 317 of file lstmrecognizer.cpp.
322 #ifndef GRAPHICS_DISABLED // do nothing if there's no graphics
323 Pix* input_pix = inputs.ToPix();
325 pixGetHeight(input_pix), window);
328 #endif // GRAPHICS_DISABLED
◆ DisplayLSTMOutput()
Definition at line 333 of file lstmrecognizer.cpp.
336 #ifndef GRAPHICS_DISABLED // do nothing if there's no graphics
340 for (
int start = 0; start < labels.
size(); start = end) {
341 int xpos = xcoords[start] * x_scale;
347 const char* str =
DecodeLabel(labels, start, &end,
nullptr);
348 if (*str ==
'\\') str =
"\\\\";
349 xpos = xcoords[(start + end) / 2] * x_scale;
350 window->
Text(xpos, height, str);
352 window->
Line(xpos, 0, xpos, height * 3 / 2);
355 #endif // GRAPHICS_DISABLED
◆ EnumerateLayers()
Definition at line 80 of file lstmrecognizer.h.
82 auto* series =
static_cast<Series*
>(
network_);
84 series->EnumerateLayers(
nullptr, &layers);
◆ GetDict()
const Dict* tesseract::LSTMRecognizer::GetDict |
( |
| ) |
const |
|
inline |
◆ GetLayer()
Network* tesseract::LSTMRecognizer::GetLayer |
( |
const STRING & |
id | ) |
const |
|
inline |
Definition at line 88 of file lstmrecognizer.h.
91 auto* series =
static_cast<Series*
>(
network_);
92 return series->GetLayer(&
id[1]);
◆ GetLayerLearningRate()
float tesseract::LSTMRecognizer::GetLayerLearningRate |
( |
const STRING & |
id | ) |
const |
|
inline |
Definition at line 95 of file lstmrecognizer.h.
99 auto* series =
static_cast<Series*
>(
network_);
100 return series->LayerLearningRate(&
id[1]);
◆ GetRecoder()
◆ GetUnicharset()
const UNICHARSET& tesseract::LSTMRecognizer::GetUnicharset |
( |
| ) |
const |
|
inline |
◆ IsIntMode()
bool tesseract::LSTMRecognizer::IsIntMode |
( |
| ) |
const |
|
inline |
◆ IsRecoding()
bool tesseract::LSTMRecognizer::IsRecoding |
( |
| ) |
const |
|
inline |
◆ IsTensorFlow()
bool tesseract::LSTMRecognizer::IsTensorFlow |
( |
| ) |
const |
|
inline |
◆ LabelsFromOutputs()
◆ LabelsViaReEncode()
◆ LabelsViaSimpleText()
Definition at line 456 of file lstmrecognizer.cpp.
461 const int width = output.Width();
462 for (
int t = 0; t < width; ++t) {
464 const int label = output.BestLabel(t, &score);
◆ learning_rate()
double tesseract::LSTMRecognizer::learning_rate |
( |
| ) |
const |
|
inline |
◆ Load()
◆ LoadCharsets()
bool tesseract::LSTMRecognizer::LoadCharsets |
( |
const TessdataManager * |
mgr | ) |
|
◆ LoadDictionary()
◆ LoadRecoder()
bool tesseract::LSTMRecognizer::LoadRecoder |
( |
TFile * |
fp | ) |
|
◆ null_char()
int tesseract::LSTMRecognizer::null_char |
( |
| ) |
const |
|
inline |
◆ NumInputs()
int tesseract::LSTMRecognizer::NumInputs |
( |
| ) |
const |
|
inline |
◆ NumOutputs()
int tesseract::LSTMRecognizer::NumOutputs |
( |
| ) |
const |
|
inline |
◆ OutputLossType()
LossType tesseract::LSTMRecognizer::OutputLossType |
( |
| ) |
const |
|
inline |
◆ OutputStats()
void tesseract::LSTMRecognizer::OutputStats |
( |
const NetworkIO & |
outputs, |
|
|
float * |
min_output, |
|
|
float * |
mean_output, |
|
|
float * |
sd |
|
) |
| |
Definition at line 206 of file lstmrecognizer.cpp.
208 const int kOutputScale = INT8_MAX;
209 STATS stats(0, kOutputScale + 1);
210 for (
int t = 0; t < outputs.Width(); ++t) {
211 int best_label = outputs.BestLabel(t,
nullptr);
213 float best_output = outputs.f(t)[best_label];
214 stats.add(
static_cast<int>(kOutputScale * best_output), 1);
219 if (stats.get_total() == 0) {
224 *min_output =
static_cast<float>(stats.min_bucket()) / kOutputScale;
225 *mean_output = stats.mean() / kOutputScale;
226 *sd = stats.sd() / kOutputScale;
◆ RecognizeLine() [1/2]
bool tesseract::LSTMRecognizer::RecognizeLine |
( |
const ImageData & |
image_data, |
|
|
bool |
invert, |
|
|
bool |
debug, |
|
|
bool |
re_invert, |
|
|
bool |
upside_down, |
|
|
float * |
scale_factor, |
|
|
NetworkIO * |
inputs, |
|
|
NetworkIO * |
outputs |
|
) |
| |
Definition at line 232 of file lstmrecognizer.cpp.
237 const int kMaxImageWidth = 2560;
243 if (pix ==
nullptr) {
244 tprintf(
"Line cannot be recognized!!\n");
248 tprintf(
"Image too large to learn!! Size = %dx%d\n", pixGetWidth(pix),
253 if (upside_down) pixRotate180(pix, pix);
255 *scale_factor = min_width / *scale_factor;
261 float pos_min, pos_mean, pos_sd;
262 OutputStats(*outputs, &pos_min, &pos_mean, &pos_sd);
263 if (invert && pos_min < 0.5) {
265 NetworkIO inv_inputs, inv_outputs;
273 float inv_min, inv_mean, inv_sd;
274 OutputStats(inv_outputs, &inv_min, &inv_mean, &inv_sd);
275 if (inv_min > pos_min && inv_mean > pos_mean && inv_sd < pos_sd) {
278 tprintf(
"Inverting image: old min=%g, mean=%g, sd=%g, inv %g,%g,%g\n",
279 pos_min, pos_mean, pos_sd, inv_min, inv_mean, inv_sd);
281 *outputs = inv_outputs;
282 *inputs = inv_inputs;
283 }
else if (re_invert) {
◆ RecognizeLine() [2/2]
void tesseract::LSTMRecognizer::RecognizeLine |
( |
const ImageData & |
image_data, |
|
|
bool |
invert, |
|
|
bool |
debug, |
|
|
double |
worst_dict_cert, |
|
|
const TBOX & |
line_box, |
|
|
PointerVector< WERD_RES > * |
words, |
|
|
int |
lstm_choice_mode = 0 |
|
) |
| |
◆ sample_iteration()
int tesseract::LSTMRecognizer::sample_iteration |
( |
| ) |
const |
|
inline |
◆ ScaleLayerLearningRate()
void tesseract::LSTMRecognizer::ScaleLayerLearningRate |
( |
const STRING & |
id, |
|
|
double |
factor |
|
) |
| |
|
inline |
Definition at line 117 of file lstmrecognizer.h.
120 auto* series =
static_cast<Series*
>(
network_);
121 series->ScaleLayerLearningRate(&
id[1], factor);
◆ ScaleLearningRate()
void tesseract::LSTMRecognizer::ScaleLearningRate |
( |
double |
factor | ) |
|
|
inline |
◆ Serialize()
Definition at line 86 of file lstmrecognizer.cpp.
87 bool include_charsets = mgr ==
nullptr ||
91 if (include_charsets && !
GetUnicharset().save_to_file(fp))
return false;
99 if (!fp->Serialize(&
momentum_))
return false;
◆ SetIteration()
void tesseract::LSTMRecognizer::SetIteration |
( |
int |
iteration | ) |
|
|
inline |
◆ SetRandomSeed()
void tesseract::LSTMRecognizer::SetRandomSeed |
( |
| ) |
|
|
inlineprotected |
◆ SimpleTextOutput()
bool tesseract::LSTMRecognizer::SimpleTextOutput |
( |
| ) |
const |
|
inline |
◆ training_iteration()
int tesseract::LSTMRecognizer::training_iteration |
( |
| ) |
const |
|
inline |
◆ adam_beta_
float tesseract::LSTMRecognizer::adam_beta_ |
|
protected |
◆ ccutil_
CCUtil tesseract::LSTMRecognizer::ccutil_ |
|
protected |
◆ debug_win_
◆ dict_
Dict* tesseract::LSTMRecognizer::dict_ |
|
protected |
◆ learning_rate_
float tesseract::LSTMRecognizer::learning_rate_ |
|
protected |
◆ momentum_
float tesseract::LSTMRecognizer::momentum_ |
|
protected |
◆ network_
Network* tesseract::LSTMRecognizer::network_ |
|
protected |
◆ network_str_
STRING tesseract::LSTMRecognizer::network_str_ |
|
protected |
◆ null_char_
int32_t tesseract::LSTMRecognizer::null_char_ |
|
protected |
◆ randomizer_
TRand tesseract::LSTMRecognizer::randomizer_ |
|
protected |
◆ recoder_
◆ sample_iteration_
int32_t tesseract::LSTMRecognizer::sample_iteration_ |
|
protected |
◆ scratch_space_
◆ search_
◆ training_flags_
int32_t tesseract::LSTMRecognizer::training_flags_ |
|
protected |
◆ training_iteration_
int32_t tesseract::LSTMRecognizer::training_iteration_ |
|
protected |
The documentation for this class was generated from the following files:
bool Serialize(TFile *fp) const
void Line(int x1, int y1, int x2, int y2)
void RecognizeLine(const ImageData &image_data, bool invert, bool debug, double worst_dict_cert, const TBOX &line_box, PointerVector< WERD_RES > *words, int lstm_choice_mode=0)
char * user_patterns_suffix
static int DisplayImage(Pix *pix, ScrollView *window)
NetworkScratch scratch_space_
STRING language_data_path_prefix
static void PreparePixInput(const StaticShape &shape, const Pix *pix, TRand *randomizer, NetworkIO *input)
LossType OutputLossType() const
static constexpr float kMinCertainty
virtual bool Serialize(TFile *fp) const
void OutputStats(const NetworkIO &outputs, float *min_output, float *mean_output, float *sd)
void DisplayForward(const NetworkIO &inputs, const GenericVector< int > &labels, const GenericVector< int > &label_coords, const char *window_name, ScrollView **window)
virtual StaticShape InputShape() const
static Network * CreateFromFile(TFile *fp)
void ScaleLayerLearningRate(const STRING &id, double factor)
static void ClearWindow(bool tess_coords, const char *window_name, int width, int height, ScrollView **window)
bool Serialize(FILE *fp) const
void LabelsFromOutputs(const NetworkIO &outputs, GenericVector< int > *labels, GenericVector< int > *xcoords)
DLLSYM void tprintf(const char *format,...)
bool DeSerialize(const TessdataManager *mgr, TFile *fp)
void Text(int x, int y, const char *mystring)
void DebugActivationPath(const NetworkIO &outputs, const GenericVector< int > &labels, const GenericVector< int > &xcoords)
void ExtractBestPathAsWords(const TBOX &line_box, float scale_factor, bool debug, const UNICHARSET *unicharset, PointerVector< WERD_RES > *words, int lstm_choice_mode=0)
LossType loss_type() const
char * user_patterns_file
bool DeSerialize(TFile *fp)
const char * get_normed_unichar(UNICHAR_ID unichar_id) const
void LoadLSTM(const STRING &lang, TessdataManager *data_file)
bool IsValidFirstCode(int code) const
bool DeSerialize(bool swap, FILE *fp)
void Decode(const NetworkIO &output, double dict_ratio, double cert_offset, double worst_dict_cert, const UNICHARSET *charset, int lstm_choice_mode=0)
bool LoadRecoder(TFile *fp)
void DisplayLSTMOutput(const GenericVector< int > &labels, const GenericVector< int > &xcoords, int height, ScrollView *window)
virtual int XScaleFactor() const
virtual void SetRandomizer(TRand *randomizer)
void SetupForLoad(DawgCache *dawg_cache)
virtual void Forward(bool debug, const NetworkIO &input, const TransposedArray *input_transpose, NetworkScratch *scratch, NetworkIO *output)=0
int DecodeUnichar(const RecodedCharID &code) const
void TextAttributes(const char *font, int pixel_size, bool bold, bool italic, bool underlined)
void SetupPassThrough(const UNICHARSET &unicharset)
void LabelsViaReEncode(const NetworkIO &output, GenericVector< int > *labels, GenericVector< int > *xcoords)
bool SimpleTextOutput() const
bool LoadDictionary(const ParamsVectors *params, const char *lang, TessdataManager *mgr)
bool TestFlag(NetworkFlags flag) const
GenericVector< STRING > EnumerateLayers() const
bool load_from_file(const char *const filename, bool skip_fragments)
const char * DecodeLabel(const GenericVector< int > &labels, int start, int *end, int *decoded)
static Pix * PrepareLSTMInputs(const ImageData &image_data, const Network *network, int min_width, TRand *randomizer, float *image_scale)
virtual StaticShape OutputShape(const StaticShape &input_shape) const
const UNICHARSET & GetUnicharset() const
static TESS_API DawgCache * GlobalDawgCache()
void set_seed(uint64_t seed)
bool LoadCharsets(const TessdataManager *mgr)
void ExtractBestPathAsLabels(GenericVector< int > *labels, GenericVector< int > *xcoords) const
int EncodeUnichar(int unichar_id, RecodedCharID *code) const
static const int kMaxCodeLen
int32_t sample_iteration_
virtual void CacheXScaleFactor(int factor)
const char * DecodeSingleLabel(int label)
void LabelsViaSimpleText(const NetworkIO &output, GenericVector< int > *labels, GenericVector< int > *xcoords)
virtual void ConvertToInt()
void DebugActivationRange(const NetworkIO &outputs, const char *label, int best_choice, int x_start, int x_end)
int32_t training_iteration_
@ TESSDATA_LSTM_UNICHARSET
RecodeBeamSearch * search_