Go to the documentation of this file.
23 # include "config_auto.h"
34 #ifndef DISABLED_LEGACY_ENGINE
54 if ((fp = fopen(path.
string(),
"rb")) !=
nullptr) {
58 path +=
"tessconfigs/";
60 if ((fp = fopen(path.
string(),
"rb")) !=
nullptr) {
80 const char* arg0,
const char* textbase,
const char* language,
89 lang = language !=
nullptr ? language :
"eng";
99 "Please make sure the TESSDATA_PREFIX environment variable is set"
100 " to your \"tessdata\" directory.\n");
103 #ifndef DISABLED_LEGACY_ENGINE
115 #endif // ndef DISABLED_LEGACY_ENGINE
130 for (
int i = 0; i < configs_size; ++i) {
136 if (vars_vec !=
nullptr && vars_values !=
nullptr) {
137 for (
int i = 0; i < vars_vec->
size(); ++i) {
139 (*vars_values)[i].
string(),
140 set_params_constraint, this->
params())) {
141 tprintf(
"Warning: The parameter '%s' was not found.\n", (*vars_vec)[i].
string());
148 if (params_file !=
nullptr) {
152 tprintf(
"Failed to open %s for writing params.\n",
169 #ifndef ANDROID_BUILD
170 # ifdef DISABLED_LEGACY_ENGINE
175 # endif // ndef DISABLED_LEGACY_ENGINE
181 tprintf(
"Error: LSTM requested, but not present!! Loading tesseract.\n");
185 #endif // ndef ANDROID_BUILD
190 #ifndef ANDROID_BUILD
192 #endif // ndef ANDROID_BUILD
194 #ifndef DISABLED_LEGACY_ENGINE
197 tprintf(
"Error: Tesseract (legacy) engine requested, but components are "
198 "not present in %s!!\n", tessdata_path.
c_str());
201 #endif // ndef DISABLED_LEGACY_ENGINE
203 tprintf(
"Error: Size of unicharset is greater than MAX_NUM_CLASSES\n");
208 #ifndef DISABLED_LEGACY_ENGINE
235 #endif // ndef DISABLED_LEGACY_ENGINE
241 static bool IsStrInList(
const STRING& str,
243 for (
int i = 0; i < str_list.
size(); ++i) {
244 if (str_list[i] == str)
return true;
258 while (remains.length() > 0) {
260 const char* start = remains.string();
261 while (*start ==
'+') ++start;
264 target = not_to_load;
268 int end = strlen(start);
269 const char* plus = strchr(start,
'+');
270 if (plus !=
nullptr && plus - start < end) end = plus - start;
272 lang_code.truncate_at(end);
276 if (!IsStrInList(lang_code, *target)) {
288 char** configs,
int configs_size,
291 bool set_only_non_debug_params,
292 TessdataManager* mgr) {
297 sub_langs_.delete_data_pointers();
301 bool loaded_primary =
false;
303 for (
int lang_index = 0; lang_index < langs_to_load.
size(); ++lang_index) {
304 if (!IsStrInList(langs_to_load[lang_index], langs_not_to_load)) {
305 const char* lang_str = langs_to_load[lang_index].string();
307 if (!loaded_primary) {
314 arg0, textbase, lang_str, oem, configs, configs_size, vars_vec,
315 vars_values, set_only_non_debug_params, mgr);
319 if (!loaded_primary) {
321 tprintf(
"Failed loading language '%s'\n", lang_str);
324 &langs_to_load, &langs_not_to_load);
325 loaded_primary =
true;
329 tprintf(
"Failed loading language '%s'\n", lang_str);
332 sub_langs_.push_back(tess_to_init);
335 &langs_to_load, &langs_not_to_load);
340 if (!loaded_primary) {
341 tprintf(
"Tesseract couldn't load any languages!\n");
344 #ifndef DISABLED_LEGACY_ENGINE
345 if (!sub_langs_.empty()) {
352 for (
int s = 0; s < sub_langs_.size(); ++s) {
353 sub_langs_[s]->language_model_->getParamsModel().Copy(
356 tprintf(
"Using params model of the primary language\n");
359 for (
int s = 0; s < sub_langs_.size(); ++s) {
360 sub_langs_[s]->language_model_->getParamsModel().Clear();
366 #endif // ndef DISABLED_LEGACY_ENGINE
388 char** configs,
int configs_size,
391 bool set_only_non_debug_params,
392 TessdataManager* mgr) {
394 configs_size, vars_vec, vars_values,
395 set_only_non_debug_params, mgr)) {
409 #ifndef DISABLED_LEGACY_ENGINE
414 for (
int i = 0; i < new_fonts.
size(); ++i) {
423 for (
int i = 0; i < lang_fonts->
size(); ++i) {
424 int index = all_fonts.
get_id(lang_fonts->
get(i));
440 for (
int i = 0; i < sub_langs_.size(); ++i) {
445 for (
int i = 0; i < sub_langs_.size(); ++i) {
448 font_table_size_ = all_fonts.
size();
455 nullptr, 0,
nullptr,
nullptr,
false, mgr))
463 #endif // ndef DISABLED_LEGACY_ENGINE
UnicityTable< FontInfo > & get_fontinfo_table()
const char * string() const
@ OEM_TESSERACT_LSTM_COMBINED
bool tessedit_ambigs_training
void read_config_file(const char *filename, SetParamConstraint constraint)
static bool ReadParamsFromFp(SetParamConstraint constraint, TFile *fp, ParamsVectors *member_params)
STRING language_data_path_prefix
void InitUnicharAmbigs(const UNICHARSET &unicharset, bool use_ambigs_for_adaption)
std::unique_ptr< LanguageModel > language_model_
@ SET_PARAM_CONSTRAINT_NON_DEBUG_ONLY
void program_editup(const char *textbase, TessdataManager *init_classifier, TessdataManager *init_dict)
void Load(const STRING &lang, TessdataManager *data_file)
int get_id(T object) const
bool IsLSTMAvailable() const
bool IsBaseAvailable() const
int init_tesseract_internal(const char *arg0, const char *textbase, const char *language, OcrEngineMode oem, char **configs, int configs_size, const GenericVector< STRING > *vars_vec, const GenericVector< STRING > *vars_values, bool set_only_init_params, TessdataManager *mgr)
char * tessedit_load_sublangs
DLLSYM void tprintf(const char *format,...)
const T & get(int id) const
Return the object from an id.
void ParseLanguageString(const char *lang_str, GenericVector< STRING > *to_load, GenericVector< STRING > *not_to_load)
UnicharAmbigs unichar_ambigs
int init_tesseract(const char *arg0, const char *textbase, const char *language, OcrEngineMode oem, char **configs, int configs_size, const GenericVector< STRING > *vars_vec, const GenericVector< STRING > *vars_values, bool set_only_init_params, TessdataManager *mgr)
void SetupUniversalFontIds()
_ConstTessMemberResultCallback_5_0< false, R, T1, P1, P2, P3, P4, P5 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)(P1, P2, P3, P4, P5) const, typename Identity< P1 >::type p1, typename Identity< P2 >::type p2, typename Identity< P3 >::type p3, typename Identity< P4 >::type p4, typename Identity< P5 >::type p5)
static bool ReadParamsFile(const char *file, SetParamConstraint constraint, ParamsVectors *member_params)
bool use_ambigs_for_adaption
int init_tesseract_lm(const char *arg0, const char *textbase, const char *language, TessdataManager *mgr)
int push_back(T object)
Add an element in the table.
void LoadUnicharAmbigs(const UNICHARSET &encoder_set, TFile *ambigs_file, int debug_level, bool use_ambigs_for_adaption, UNICHARSET *unicharset)
static void PrintParams(FILE *fp, const ParamsVectors *member_params)
bool Load(const ParamsVectors *params, const char *lang, TessdataManager *mgr)
bool tessedit_use_primary_params_model
bool tessedit_init_config_only
void SetupForLoad(DawgCache *dawg_cache)
bool CompareFontInfo(const FontInfo &fi1, const FontInfo &fi2)
void LoadUniversal(const UNICHARSET &encoder_set, UNICHARSET *unicharset)
int tessedit_ocr_engine_mode
bool load_from_file(const char *const filename, bool skip_fragments)
bool IsComponentAvailable(TessdataType type) const
int size() const
Return the size used.
bool major_right_to_left() const
char * tessedit_write_params_to_file
@ SET_PARAM_CONSTRAINT_NONE
void CopyFrom(const UNICHARSET &src)
const UNICHARSET & GetUnicharset() const
static TESS_API DawgCache * GlobalDawgCache()
bool GetComponent(TessdataType type, TFile *fp)
static bool SetParam(const char *name, const char *value, SetParamConstraint constraint, ParamsVectors *member_params)
bool Init(const char *data_file_name)
void main_setup(const char *argv0, const char *basename)
CCUtil::main_setup - set location of tessdata and name of image.
void set_compare_callback(TessResultCallback2< bool, T const &, T const & > *cb)
Dict & getDict() override
bool init_tesseract_lang_data(const char *arg0, const char *textbase, const char *language, OcrEngineMode oem, char **configs, int configs_size, const GenericVector< STRING > *vars_vec, const GenericVector< STRING > *vars_values, bool set_only_init_params, TessdataManager *mgr)
const char * c_str() const