Go to the documentation of this file.
19 #define _USE_MATH_DEFINES // for M_PI
23 #include "config_auto.h"
37 #include <sys/types.h>
53 #include <curl/curl.h>
55 #include "allheaders.h"
56 #ifndef DISABLED_LEGACY_ENGINE
60 #include "config_auto.h"
71 #ifndef DISABLED_LEGACY_ENGINE
77 #if defined(USE_OPENCL)
98 static BOOL_VAR(stream_filelist,
false,
"Stream a filelist from stdin");
99 static STRING_VAR(document_title,
"",
"Title of output document (used for hOCR and PDF output)");
115 static const char* kInputFile =
"noname.tif";
119 static const char* kOldVarsFile =
"failed_vars.txt";
125 static void addAvailableLanguages(
const STRING &datadir,
const STRING &base,
128 const STRING base2 = (base.
string()[0] ==
'\0') ? base : base +
"/";
129 const size_t extlen =
sizeof(kTrainedDataSuffix);
131 WIN32_FIND_DATA data;
132 HANDLE handle = FindFirstFile((datadir + base2 +
"*").
string(), &data);
133 if (handle != INVALID_HANDLE_VALUE) {
136 char *name = data.cFileName;
138 if (name[0] !=
'.') {
139 if ((data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) ==
140 FILE_ATTRIBUTE_DIRECTORY) {
141 addAvailableLanguages(datadir, base2 + name, langs);
143 size_t len = strlen(name);
144 if (len > extlen && name[len - extlen] ==
'.' &&
145 strcmp(&name[len - extlen + 1], kTrainedDataSuffix) == 0) {
146 name[len - extlen] =
'\0';
151 result = FindNextFile(handle, &data);
156 DIR* dir = opendir((datadir + base).
string());
157 if (dir !=
nullptr) {
159 while ((de = readdir(dir))) {
160 char *name = de->d_name;
162 if (name[0] !=
'.') {
164 if (stat((datadir + base2 + name).
string(), &st) == 0 &&
165 (st.st_mode & S_IFDIR) == S_IFDIR) {
166 addAvailableLanguages(datadir, base2 + name, langs);
168 size_t len = strlen(name);
169 if (len > extlen && name[len - extlen] ==
'.' &&
170 strcmp(&name[len - extlen + 1], kTrainedDataSuffix) == 0) {
171 name[len - extlen] =
'\0';
183 static int CompareSTRING(
const void* p1,
const void* p2) {
184 const auto* s1 =
static_cast<const STRING*
>(p1);
185 const auto* s2 =
static_cast<const STRING*
>(p2);
186 return strcmp(s1->c_str(), s2->c_str());
190 : tesseract_(nullptr),
191 osd_tesseract_(nullptr),
192 equ_detect_(nullptr),
197 thresholder_(nullptr),
198 paragraph_models_(nullptr),
199 block_list_(nullptr),
201 input_file_(nullptr),
202 output_file_(nullptr),
206 recognition_done_(false),
222 std::locale::global(std::locale(
""));
234 return PACKAGE_VERSION;
246 ds_device device = OpenclDevice::getDeviceSelection();
247 if (device.type == DS_DEVICE_OPENCL_DEVICE) {
248 *data =
new cl_device_id;
249 memcpy(*data, &device.oclDeviceID,
sizeof(cl_device_id));
250 return sizeof(cl_device_id);
264 tprintf(
"Deprecated method CatchSignals has only a dummy implementation!\n");
299 auto *p = ParamUtils::FindParam<IntParam>(
301 if (p ==
nullptr)
return false;
302 *value = (int32_t)(*p);
307 auto *p = ParamUtils::FindParam<BoolParam>(
309 if (p ==
nullptr)
return false;
315 auto *p = ParamUtils::FindParam<StringParam>(
317 return (p !=
nullptr) ? p->string() :
nullptr;
321 auto *p = ParamUtils::FindParam<DoubleParam>(
323 if (p ==
nullptr)
return false;
324 *value = (double)(*p);
350 bool set_only_non_debug_params) {
351 return Init(datapath, 0, language,
oem, configs, configs_size, vars_vec,
352 vars_values, set_only_non_debug_params,
nullptr);
362 bool set_only_non_debug_params,
FileReader reader) {
364 if (language ==
nullptr) language =
"eng";
365 STRING datapath = data_size == 0 ? data : language;
382 bool reset_classifier =
true;
384 reset_classifier =
false;
386 if (reader !=
nullptr)
reader_ = reader;
388 if (data_size != 0) {
394 language,
oem, configs, configs_size, vars_vec, vars_values,
395 set_only_non_debug_params, &mgr) != 0) {
415 #ifndef DISABLED_LEGACY_ENGINE
417 if (reset_classifier) {
420 #endif // ndef DISABLED_LEGACY_ENGINE
448 for (
int i = 0; i < num_subs; ++i)
461 langs->
sort(CompareSTRING);
466 #ifndef DISABLED_LEGACY_ENGINE
481 #endif // ndef DISABLED_LEGACY_ENGINE
490 #ifndef DISABLED_LEGACY_ENGINE
546 int width,
int height) {
552 int bits_per_pixel = bytes_per_pixel == 0 ? 1 : bytes_per_pixel * 8;
553 SetImage(imagedata, bytes_per_line * 8 / bits_per_pixel, height + top,
554 bytes_per_pixel, bytes_per_line);
560 #ifndef DISABLED_LEGACY_ENGINE
571 #endif // ndef DISABLED_LEGACY_ENGINE
581 int width,
int height,
582 int bytes_per_pixel,
int bytes_per_line) {
585 bytes_per_pixel, bytes_per_line);
594 tprintf(
"Please call SetImage before SetSourceResolution.\n");
607 if (pixGetSpp(pix) == 4 && pixGetInputFormat(pix) == IFF_PNG) {
609 Pix* p1 = pixRemoveAlpha(pix);
611 (void)pixCopy(pix, p1);
662 Pixa** pixa,
int** blockids,
int** paraids) {
664 pixa, blockids, paraids);
707 bool text_only,
bool raw_image,
708 const int raw_padding,
709 Pixa** pixa,
int** blockids,
712 if (page_it ==
nullptr)
714 if (page_it ==
nullptr)
718 int component_count = 0;
719 int left, top, right, bottom;
726 &left, &top, &right, &bottom);
732 level, &left, &top, &right, &bottom);
735 if (get_bbox->
Run() &&
738 }
while (page_it->
Next(level));
740 Boxa* boxa = boxaCreate(component_count);
742 *pixa = pixaCreate(component_count);
743 if (blockids !=
nullptr)
744 *blockids =
new int[component_count];
745 if (paraids !=
nullptr)
746 *paraids =
new int[component_count];
750 int component_index = 0;
753 if (get_bbox->
Run() &&
755 Box* lbox = boxCreate(left, top, right - left, bottom - top);
756 boxaAddBox(boxa, lbox, L_INSERT);
757 if (pixa !=
nullptr) {
765 pixaAddPix(*pixa, pix, L_INSERT);
766 pixaAddBox(*pixa, lbox, L_CLONE);
768 if (paraids !=
nullptr) {
769 (*paraids)[component_index] = paraid;
773 if (blockids !=
nullptr) {
774 (*blockids)[component_index] = blockid;
782 }
while (page_it->
Next(level));
844 #ifndef DISABLED_LEGACY_ENGINE
850 #endif // ndef DISABLED_LEGACY_ENGINE
867 #ifndef DISABLED_LEGACY_ENGINE
872 #endif // ndef DISABLED_LEGACY_ENGINE
887 #ifndef GRAPHICS_DISABLED
889 #endif // GRAPHICS_DISABLED
895 #ifndef DISABLED_LEGACY_ENGINE
905 fclose(training_output_file);
906 #endif // ndef DISABLED_LEGACY_ENGINE
909 bool wait_for_text =
true;
921 #ifndef DISABLED_LEGACY_ENGINE
927 tprintf(
"Please call SetImage before attempting recognition.\n");
944 while (page_res_it.
word() !=
nullptr) {
948 page_res_it.
row()->
row, word_res);
953 #endif // ndef DISABLED_LEGACY_ENGINE
978 bool TessBaseAPI::ProcessPagesFileList(FILE *flist,
980 const char* retry_config,
981 int timeout_millisec,
983 int tessedit_page_number) {
984 if (!flist && !buf)
return false;
985 int page = (tessedit_page_number >= 0) ? tessedit_page_number : 0;
990 buf->
split(
'\n', &lines);
991 if (lines.
empty())
return false;
995 for (
int i = 0; i < page; i++) {
997 if (fgets(pagename,
sizeof(pagename), flist) ==
nullptr)
break;
1002 if (renderer && !renderer->
BeginDocument(document_title.c_str())) {
1009 if (fgets(pagename,
sizeof(pagename), flist) ==
nullptr)
break;
1011 if (page >= lines.
size())
break;
1012 snprintf(pagename,
sizeof(pagename),
"%s", lines[page].c_str());
1015 Pix *pix = pixRead(pagename);
1016 if (pix ==
nullptr) {
1017 tprintf(
"Image file %s cannot be read!\n", pagename);
1020 tprintf(
"Page %d : %s\n", page, pagename);
1021 bool r =
ProcessPage(pix, page, pagename, retry_config,
1022 timeout_millisec, renderer);
1024 if (!r)
return false;
1025 if (tessedit_page_number >= 0)
break;
1036 bool TessBaseAPI::ProcessPagesMultipageTiff(
const l_uint8 *data,
1038 const char* filename,
1039 const char* retry_config,
1040 int timeout_millisec,
1042 int tessedit_page_number) {
1043 #ifndef ANDROID_BUILD
1045 int page = (tessedit_page_number >= 0) ? tessedit_page_number : 0;
1048 if (tessedit_page_number >= 0) {
1049 page = tessedit_page_number;
1050 pix = (data) ? pixReadMemTiff(data, size, page)
1051 : pixReadTiff(filename, page);
1053 pix = (data) ? pixReadMemFromMultipageTiff(data, size, &offset)
1054 : pixReadFromMultipageTiff(filename, &offset);
1056 if (pix ==
nullptr)
break;
1057 tprintf(
"Page %d\n", page + 1);
1061 bool r =
ProcessPage(pix, page, filename, retry_config,
1062 timeout_millisec, renderer);
1064 if (!r)
return false;
1065 if (tessedit_page_number >= 0)
break;
1077 int timeout_millisec,
1081 #ifndef DISABLED_LEGACY_ENGINE
1089 #endif // ndef DISABLED_LEGACY_ENGINE
1094 WriteMemoryCallback(
void *contents,
size_t size,
size_t nmemb,
void *userp)
1096 size = size * nmemb;
1097 std::string* buf =
reinterpret_cast<std::string*
>(userp);
1098 buf->append(
reinterpret_cast<const char*
>(contents), size);
1114 const char* retry_config,
1115 int timeout_millisec,
1117 bool stdInput = !strcmp(filename,
"stdin") || !strcmp(filename,
"-");
1120 if (_setmode(_fileno(stdin), _O_BINARY) == -1)
1121 tprintf(
"ERROR: cin to binary: %s", strerror(errno));
1125 if (stream_filelist) {
1126 return ProcessPagesFileList(stdin,
nullptr, retry_config,
1127 timeout_millisec, renderer,
1135 const l_uint8 *data =
nullptr;
1137 buf.assign((std::istreambuf_iterator<char>(std::cin)),
1138 (std::istreambuf_iterator<char>()));
1139 data =
reinterpret_cast<const l_uint8 *
>(buf.data());
1140 }
else if (strncmp(filename,
"http:", 5) == 0 ||
1141 strncmp(filename,
"https:", 6) == 0 ) {
1144 CURL* curl = curl_easy_init();
1145 if (curl ==
nullptr) {
1146 fprintf(stderr,
"Error, curl_easy_init failed\n");
1150 curlcode = curl_easy_setopt(curl, CURLOPT_URL, filename);
1152 curlcode = curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteMemoryCallback);
1154 curlcode = curl_easy_setopt(curl, CURLOPT_WRITEDATA, &buf);
1156 curlcode = curl_easy_perform(curl);
1158 curl_easy_cleanup(curl);
1159 data =
reinterpret_cast<const l_uint8 *
>(buf.data());
1162 fprintf(stderr,
"Error, this tesseract has no URL support\n");
1167 if (FILE* file = fopen(filename,
"rb")) {
1170 fprintf(stderr,
"Error, cannot read input file %s: %s\n",
1171 filename, strerror(errno));
1178 int r = (data !=
nullptr) ?
1179 findFileFormatBuffer(data, &format) :
1180 findFileFormat(filename, &format);
1183 if (r != 0 || format == IFF_UNKNOWN) {
1185 if (data !=
nullptr) {
1188 std::ifstream t(filename);
1189 std::string u((std::istreambuf_iterator<char>(t)),
1190 std::istreambuf_iterator<char>());
1193 return ProcessPagesFileList(
nullptr, &s, retry_config,
1194 timeout_millisec, renderer,
1199 bool tiff = (format == IFF_TIFF || format == IFF_TIFF_PACKBITS ||
1200 format == IFF_TIFF_RLE || format == IFF_TIFF_G3 ||
1201 format == IFF_TIFF_G4 || format == IFF_TIFF_LZW ||
1202 #if LIBLEPT_MAJOR_VERSION > 1 || LIBLEPT_MINOR_VERSION > 76
1203 format == IFF_TIFF_JPEG ||
1205 format == IFF_TIFF_ZIP);
1210 pix = (data !=
nullptr) ? pixReadMem(data, buf.size()) : pixRead(filename);
1211 if (pix ==
nullptr) {
1217 if (renderer && !renderer->
BeginDocument(document_title.c_str())) {
1224 ProcessPagesMultipageTiff(data, buf.size(), filename, retry_config,
1225 timeout_millisec, renderer,
1228 timeout_millisec, renderer);
1234 if (!r || (renderer && !renderer->EndDocument())) {
1241 const char* retry_config,
int timeout_millisec,
1245 bool failed =
false;
1251 if (it ==
nullptr) {
1258 }
else if (timeout_millisec > 0) {
1261 monitor.
cancel =
nullptr;
1273 #ifndef ANDROID_BUILD
1275 pixWrite(
"tessinput.tif", page_pix, IFF_TIFF_G4);
1276 #endif // ANDROID_BUILD
1279 if (failed && retry_config !=
nullptr && retry_config[0] !=
'\0') {
1281 FILE* fp = fopen(kOldVarsFile,
"wb");
1282 if (fp ==
nullptr) {
1283 tprintf(
"Error, failed to open file \"%s\"\n", kOldVarsFile);
1296 if (renderer && !failed) {
1297 failed = !renderer->
AddImage(
this);
1360 text += para_text.get();
1362 char* result =
new char[text.
length() + 1];
1370 int left, top, right, bottom;
1371 it->
BoundingBox(level, &left, &top, &right, &bottom);
1387 int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1;
1388 int page_id = page_number + 1;
1392 int page_num = page_id;
1407 tsv_str +=
"\t-1\t\n";
1427 AddBoxToTSV(res_it,
RIL_BLOCK, &tsv_str);
1428 tsv_str +=
"\t-1\t\n";
1439 AddBoxToTSV(res_it,
RIL_PARA, &tsv_str);
1440 tsv_str +=
"\t-1\t\n";
1451 tsv_str +=
"\t-1\t\n";
1455 int left, top, right, bottom;
1484 char* ret =
new char[tsv_str.
length() + 1];
1485 strcpy(ret, tsv_str.
string());
1528 char* result =
new char[total_length];
1530 int output_length = 0;
1533 int left, top, right, bottom;
1535 const std::unique_ptr<
char[]> text(
1539 for (
int i = 0; text[i] !=
'\0'; ++i) {
1543 snprintf(result + output_length, total_length - output_length,
1544 "%s %d %d %d %d %d\n", text.get(), left,
image_height_ - bottom,
1546 output_length += strlen(result + output_length);
1562 0x20ac, 0x201c, 0x201d, 0x2018, 0x2019, 0x2022, 0x2014, 0
1566 0x00a2, 0x0022, 0x0022, 0x0027, 0x0027, 0x00b7, 0x002d, 0
1578 bool tilde_crunch_written =
false;
1579 bool last_char_was_newline =
true;
1580 bool last_char_was_tilde =
false;
1584 char* result =
new char[total_length];
1592 (!tilde_crunch_written ||
1603 last_char_was_tilde =
false;
1605 if (!last_char_was_tilde) {
1607 last_char_was_tilde =
true;
1609 tilde_crunch_written =
true;
1610 last_char_was_newline =
false;
1615 tilde_crunch_written =
false;
1619 int length = lengths.
length();
1623 if (last_char_was_tilde &&
1624 word->
word->
space() == 0 && wordstr[offset] ==
' ') {
1628 offset = lengths[i++];
1630 if (i < length && wordstr[offset] != 0) {
1631 if (!last_char_was_newline)
1634 last_char_was_newline =
false;
1635 for (; i < length; offset += lengths[i++]) {
1636 if (wordstr[offset] ==
' ' ||
1639 last_char_was_tilde =
true;
1643 UNICHAR ch(wordstr + offset, lengths[i]);
1645 for (
int j = 0;
kUniChs[j] != 0; ++j) {
1651 if (uni_ch <= 0xff) {
1652 *ptr++ =
static_cast<char>(uni_ch);
1653 last_char_was_tilde =
false;
1656 last_char_was_tilde =
true;
1665 tilde_crunch_written =
false;
1666 last_char_was_newline =
true;
1667 last_char_was_tilde =
false;
1675 #ifndef DISABLED_LEGACY_ENGINE
1687 const char** script_name,
1688 float* script_conf) {
1699 if (orient_deg) *orient_deg = orient_id * 90;
1704 *script_name = script;
1720 const char* script_name;
1730 std::stringstream stream;
1732 stream.imbue(std::locale::classic());
1734 stream.precision(2);
1737 <<
"Page number: " << page_number <<
"\n"
1738 <<
"Orientation in degrees: " << orient_deg <<
"\n"
1739 <<
"Rotate: " << rotate <<
"\n"
1740 <<
"Orientation confidence: " << orient_conf <<
"\n"
1741 <<
"Script: " << script_name <<
"\n"
1742 <<
"Script confidence: " << script_conf <<
"\n";
1743 const std::string& text = stream.str();
1744 char* result =
new char[text.length() + 1];
1745 strcpy(result, text.c_str());
1749 #endif // ndef DISABLED_LEGACY_ENGINE
1754 if (!conf)
return 0;
1757 while (*pt >= 0) sum += *pt++;
1758 if (pt != conf) sum /= pt - conf;
1773 int* conf =
new int[n_word+1];
1778 int w_conf =
static_cast<int>(100 + 5 * choice->
certainty());
1780 if (w_conf < 0) w_conf = 0;
1781 if (w_conf > 100) w_conf = 100;
1782 conf[n_word++] = w_conf;
1788 #ifndef DISABLED_LEGACY_ENGINE
1802 bool success =
true;
1806 const std::unique_ptr<const char[]> text(
GetUTF8Text());
1808 tprintf(
"Trying to adapt \"%s\" to \"%s\"\n", text.get(), wordstr);
1810 if (text !=
nullptr) {
1813 if (word_res !=
nullptr) {
1818 for (t = 0; text[t] !=
'\0'; ++t) {
1819 if (text[t] ==
'\n' || text[t] ==
' ')
1821 while (wordstr[w] ==
' ') ++w;
1822 if (text[t] != wordstr[w])
1826 if (text[t] !=
'\0' || wordstr[w] !=
'\0') {
1834 if (pr_it.
word() ==
nullptr)
1837 word_res = pr_it.
word();
1854 #endif // ndef DISABLED_LEGACY_ENGINE
1931 if (it ==
nullptr) {
1937 if (x2 <= x1) x2 = x1 + 1;
1939 *out_slope =
static_cast<float>(y2 - y1) / (x2 - x1);
1940 *out_offset =
static_cast<int>(y1 - *out_slope * x1);
1943 int left, top, right, bottom;
1953 *out_offset += bottom - std::max(left_y, right_y);
1956 *out_slope = -*out_slope;
1983 for (
int i = 0; i < num_subs; ++i) {
1989 #ifndef DISABLED_LEGACY_ENGINE
1994 #endif // ndef DISABLED_LEGACY_ENGINE
1999 tprintf(
"Please call Init before attempting to set an image.\n");
2016 if (*pix !=
nullptr)
2024 tprintf(
"Warning: User defined image dpi is outside of expected range "
2033 tprintf(
"Warning: Invalid resolution %d dpi. Using %d instead.\n",
2059 tprintf(
"Estimated internal resolution %d out of range! "
2060 "Corrected to %d.\n",
2070 tprintf(
"Please call SetImage before attempting recognition.\n");
2080 #ifndef DISABLED_LEGACY_ENGINE
2091 #ifndef DISABLED_LEGACY_ENGINE
2097 tprintf(
"Warning: Could not set equation detector\n");
2102 #endif // ndef DISABLED_LEGACY_ENGINE
2107 osd_tess ==
nullptr) {
2114 tprintf(
"Warning: Auto orientation and script detection requested,"
2115 " but data path is undefined\n");
2120 nullptr, 0,
nullptr,
nullptr,
2121 false, &mgr) == 0) {
2126 tprintf(
"Warning: Auto orientation and script detection requested,"
2127 " but osd language failed to load\n");
2174 int total_length = 2;
2175 int total_blobs = 0;
2181 if (choice !=
nullptr) {
2182 total_blobs += choice->
length() + 2;
2190 if (blob_count !=
nullptr)
2191 *blob_count = total_blobs;
2192 return total_length;
2195 #ifndef DISABLED_LEGACY_ENGINE
2213 #endif // ndef DISABLED_LEGACY_ENGINE
2234 bool** vertical_writing) {
2235 delete[] *block_orientation;
2236 *block_orientation =
nullptr;
2237 delete[] *vertical_writing;
2238 *vertical_writing =
nullptr;
2241 block_it.move_to_first();
2243 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
2244 if (!block_it.data()->pdblk.poly_block()->IsText()) {
2250 tprintf(
"WARNING: Found no blocks\n");
2253 *block_orientation =
new int[num_blocks];
2254 *vertical_writing =
new bool[num_blocks];
2255 block_it.move_to_first();
2257 for (block_it.mark_cycle_pt(); !block_it.cycled_list();
2258 block_it.forward()) {
2259 if (!block_it.data()->pdblk.poly_block()->IsText()) {
2262 FCOORD re_rotation = block_it.data()->re_rotation();
2263 float re_theta = re_rotation.
angle();
2264 FCOORD classify_rotation = block_it.data()->classify_rotation();
2265 float classify_theta = classify_rotation.
angle();
2266 double rot_theta = - (re_theta - classify_theta) * 2.0 / M_PI;
2267 if (rot_theta < 0) rot_theta += 4;
2268 int num_rotations =
static_cast<int>(rot_theta + 0.5);
2269 (*block_orientation)[i] = num_rotations;
2272 (*vertical_writing)[i] = classify_rotation.
y() != 0.0f;
2279 int debug_level = 0;
2287 result_it, &models);
2313 for (ptr = text; *ptr; ptr++) {
2315 case '<': ret +=
"<";
break;
2316 case '>': ret +=
">";
break;
2317 case '&': ret +=
"&";
break;
2318 case '"': ret +=
""";
break;
2319 case '\'': ret +=
"'";
break;
2320 default: ret += *ptr;
2327 #ifndef DISABLED_LEGACY_ENGINE
2355 int32_t xstarts[] = {-32000};
2356 double quad_coeffs[] = {0, 0,
baseline};
2369 int width = pixGetWidth(pix);
2370 int height = pixGetHeight(pix);
2371 BLOCK block(
"a character",
true, 0, 0, 0, 0, width, height);
2378 C_BLOB_IT c_blob_it(list);
2379 if (c_blob_it.empty())
2382 C_OUTLINE_IT ol_it(c_blob_it.data()->out_list());
2383 for (c_blob_it.forward();
2384 !c_blob_it.at_first();
2385 c_blob_it.forward()) {
2386 C_BLOB *c_blob = c_blob_it.data();
2387 ol_it.add_list_after(c_blob->
out_list());
2400 float x_center = (box.
left() + box.
right()) / 2.0f;
2411 static TBLOB *make_tesseract_blob(
float baseline,
float xheight,
2412 float descender,
float ascender,
2413 bool numeric_mode, Pix* pix) {
2436 TBLOB *blob = make_tesseract_blob(
baseline, xheight, descender, ascender,
2440 float best_rating = -100;
2444 BLOB_CHOICE_LIST choices;
2446 BLOB_CHOICE_IT choice_it;
2447 choice_it.set_to_list(&choices);
2448 for (choice_it.mark_cycle_pt(); !choice_it.cycled_list();
2449 choice_it.forward()) {
2450 if (choice_it.data()->rating() > best_rating) {
2451 best_rating = choice_it.data()->rating();
2465 auto *page_res =
new PAGE_RES(
false, block_list,
2474 pass1_result =
new PAGE_RES(
false, block_list,
2477 return pass1_result;
2487 length = (len == -1 ? strlen(repr) : len);
2506 static void add_space(TESS_CHAR_IT* it) {
2507 auto *t =
new TESS_CHAR(0,
" ");
2508 it->add_after_then_move(t);
2512 static float rating_to_cost(
float rating) {
2513 rating = 100 + rating;
2517 if (rating < 0) rating = 0;
2525 static void extract_result(TESS_CHAR_IT* out,
2529 while (page_res_it.word() !=
nullptr) {
2537 int n = strlen(len);
2538 for (
int i = 0; i < n; i++) {
2542 out->add_after_then_move(tc);
2546 page_res_it.forward();
2563 TESS_CHAR_LIST tess_chars;
2564 TESS_CHAR_IT tess_chars_it(&tess_chars);
2565 extract_result(&tess_chars_it, page_res);
2566 tess_chars_it.move_to_first();
2567 int n = tess_chars.length();
2569 *lengths =
new int[n];
2570 *costs =
new float[n];
2576 for (tess_chars_it.mark_cycle_pt();
2577 !tess_chars_it.cycled_list();
2578 tess_chars_it.forward(), i++) {
2580 text_len += (*lengths)[i] = tc->
length;
2581 (*costs)[i] = tc->
cost;
2585 (*y1)[i] = tc->
box.
top();
2587 char *p = *text =
new char[text_len];
2589 tess_chars_it.move_to_first();
2590 for (tess_chars_it.mark_cycle_pt();
2591 !tess_chars_it.cycled_list();
2592 tess_chars_it.forward()) {
2610 int* feature_outline_index) {
2616 &cn_features, &fx_info, &outline_counts);
2621 *num_features = cn_features.
size();
2622 memcpy(int_features, &cn_features[0], *num_features *
sizeof(cn_features[0]));
2624 if (feature_outline_index !=
nullptr) {
2626 for (
int i = 0; i < outline_counts.
size(); ++i) {
2627 while (f < outline_counts[i])
2628 feature_outline_index[f++] = i;
2636 int left,
int top,
int right,
int bottom) {
2637 TBOX box(left, bottom, right, top);
2638 BLOCK_IT b_it(blocks);
2639 for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
2640 BLOCK* block = b_it.data();
2644 for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward()) {
2645 ROW* row = r_it.data();
2649 for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) {
2650 WERD* word = w_it.data();
2661 int num_max_matches,
2664 int* num_matches_returned) {
2665 auto* choices =
new BLOB_CHOICE_LIST;
2667 BLOB_CHOICE_IT choices_it(choices);
2668 int& index = *num_matches_returned;
2670 for (choices_it.mark_cycle_pt();
2671 !choices_it.cycled_list() && index < num_max_matches;
2672 choices_it.forward()) {
2675 ratings[index] = choice->
rating();
2678 *num_matches_returned = index;
2681 #endif // ndef DISABLED_LEGACY_ENGINE
int IntCastRounded(double x)
void AdaptToChar(TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, float Threshold, ADAPT_TEMPLATES adaptive_templates)
BLOCK_LIST * block_list_
The page layout.
static bool GetParamAsString(const char *name, const ParamsVectors *member_params, STRING *value)
bool SetDebugVariable(const char *name, const char *value)
bool BeginDocument(const char *title)
WERD_CHOICE * prev_word_best_choice_
void chomp_string(char *str)
PolyBlockType BlockType() const
void InitForAnalysePage()
int GetThresholdedImageScaleFactor() const
@ SET_PARAM_CONSTRAINT_NON_INIT_ONLY
void ResetAdaptiveClassifier()
const char * string() const
bool tessedit_ambigs_training
void TidyUp(PAGE_RES *page_res)
int * AllWordConfidences()
void(Wordrec::*)(const MATRIX &, const WERD_CHOICE_LIST &, const UNICHARSET &, BlamerBundle *) FillLatticeFunc
UNICHAR_ID unichar_id() const
void SetDictFunc(DictFunc f)
bool AdaptToWordStr(PageSegMode mode, const char *wordstr)
GenericVector< IntParam * > int_params
virtual char * GetUTF8Text(PageIteratorLevel level) const
void CorrectClassifyWords(PAGE_RES *page_res)
void read_config_file(const char *filename, SetParamConstraint constraint)
WERD_CHOICE * best_choice
Pix * GetBinaryImage(PageIteratorLevel level) const
void SetRectangle(int left, int top, int width, int height)
static void ResetToDefaults(ParamsVectors *member_params)
TBOX intersection(const TBOX &box) const
bool flag(WERD_FLAGS mask) const
#define ELISTIZE(CLASSNAME)
const STRING & unichar_string() const
bool ProcessPage(Pix *pix, int page_index, const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
void add_str_int(const char *str, int number)
TESS_LOCAL int FindLines()
static TESS_LOCAL int TesseractExtractResult(char **text, int **lengths, float **costs, int **x0, int **y0, int **x1, int **y1, PAGE_RES *page_res)
void SetProbabilityInContextFunc(ProbabilityInContextFunc f)
int Recognize(ETEXT_DESC *monitor)
PAGE_RES * SetupApplyBoxes(const GenericVector< TBOX > &boxes, BLOCK_LIST *block_list)
bool tessedit_train_from_boxes
void GetFeaturesForBlob(TBLOB *blob, INT_FEATURE_STRUCT *int_features, int *num_features, int *feature_outline_index)
const int kBytesPerBoxFileLine
C_BLOB_LIST * blob_list()
get blobs
constexpr int kMaxCredibleResolution
GenericVector< ParagraphModel * > * paragraph_models_
FileReader reader_
Reads files from any filesystem.
@ PSM_SINGLE_BLOCK
Assume a single uniform block of text. (Default.)
void set_text(const char *new_text)
char * GetOsdText(int page_number)
const char * id_to_unichar(UNICHAR_ID id) const
#define ELISTIZEH(CLASSNAME)
OcrEngineMode last_oem_requested_
Last ocr language mode requested.
static size_t getOpenCLDevice(void **device)
void SetPageSegMode(PageSegMode mode)
static void DeleteBlockList(BLOCK_LIST *block_list)
Pix * pix_original() const
bool IsAtBeginningOf(PageIteratorLevel level) const override
float angle() const
find angle
bool ProcessPagesInternal(const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
void ExtractFontName(const STRING &filename, STRING *fontname)
TESS_LOCAL bool InternalSetImage()
bool tessedit_train_line_recognizer
int orientation_and_script_detection(STRING &filename, OSResults *osr, tesseract::Tesseract *tess)
void SetSourceYResolution(int ppi)
virtual void Clear()
Destroy the Pix if there is one, freeing memory.
int GetSourceYResolution()
bool PTIsTextType(PolyBlockType type)
char * GetUTF8Text(PageIteratorLevel level) const
static const char * Version()
PageSegMode GetPageSegMode() const
int valid_word(const WERD_CHOICE &word, bool numbers_ok) const
TESS_LOCAL void DetectParagraphs(bool after_text_recognition)
@ SET_PARAM_CONSTRAINT_DEBUG_ONLY
STRING * output_file_
Name used by debug code.
void InitAdaptiveClassifier(TessdataManager *mgr)
const char * GetStringVariable(const char *name) const
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
GenericVector< BoolParam * > bool_params
char * GetTSVText(int page_number)
bool ProcessPages(const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
void MaximallyChopWord(const GenericVector< TBOX > &boxes, BLOCK *block, ROW *row, WERD_RES *word_res)
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
TESS_LOCAL PAGE_RES * RecognitionPass2(BLOCK_LIST *block_list, PAGE_RES *pass1_result)
void SetOutputName(const char *name)
void set_pix_thresholds(Pix *thresholds)
bool recognition_done_
page_res_ contains recognition data.
int GetScaleFactor() const
Tesseract * get_sub_lang(int index) const
MutableIterator * GetMutableIterator()
const int kBytesPerNumber
int RecognizeForChopTest(ETEXT_DESC *monitor)
Pix ** mutable_pix_binary()
const char * get_script_from_script_id(int id) const
TESS_LOCAL void AdaptToCharacter(const char *unichar_repr, int length, float baseline, float xheight, float descender, float ascender)
DLLSYM void tprintf(const char *format,...)
bool IsEmpty() const
Return true if no image has been set.
const Dawg * GetDawg(int i) const
void set_pix_original(Pix *original_pix)
int GetScaledEstimatedResolution() const
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
bool contains_unichar(const char *const unichar_repr) const
Tesseract * tesseract_
The underlying data object.
bool interactive_display_mode
#define STRING_VAR(name, val, comment)
int IsValidWord(const char *word)
PageIterator * AnalyseLayout()
void ApplyBoxTraining(const STRING &fontname, PAGE_RES *page_res)
int GetScaledYResolution() const
bool tessedit_write_images
BLOCK_RES * block() const
void BestChoiceToCorrectText()
ROW_LIST * row_list()
get rows
bool SetVariable(const char *name, const char *value)
void ResetDocumentDictionary()
Boxa * GetStrips(Pixa **pixa, int **blockids)
PAGE_RES * page_res_
The page-level data.
bool GetVariableAsString(const char *name, STRING *val)
OcrEngineMode oem() const
void * cancel_this
monitor-aware progress callback
ADAPT_TEMPLATES AdaptedTemplates
void Normalize(const BLOCK *block, const FCOORD *rotation, const DENORM *predecessor, float x_origin, float y_origin, float x_scale, float y_scale, float final_xshift, float final_yshift, bool inverse, Pix *pix)
bool Empty(PageIteratorLevel level) const
const STRING & unichar_lengths() const
void ReadConfigFile(const char *filename)
Pix * GetImage(PageIteratorLevel level, int padding, Pix *original_img, int *left, int *top) const
EquationDetect * equ_detect_
The equation detector.
static ROW * FindRowForBox(BLOCK_LIST *blocks, int left, int top, int right, int bottom)
bool GetTextDirection(int *out_offset, float *out_slope)
TESS_API int get_best_script(int orientation_id) const
static TBLOB * MakeTBLOB(Pix *pix)
void AdaptiveClassifier(TBLOB *Blob, BLOB_CHOICE_LIST *Choices)
ResultIterator * GetIterator()
const char * GetUnichar(int unichar_id)
PDBLK pdblk
Page Description Block.
int init_tesseract(const char *arg0, const char *textbase, const char *language, OcrEngineMode oem, char **configs, int configs_size, const GenericVector< STRING > *vars_vec, const GenericVector< STRING > *vars_values, bool set_only_init_params, TessdataManager *mgr)
void ReSegmentByClassification(PAGE_RES *page_res)
int GetSourceYResolution() const
void recog_training_segmented(const STRING &fname, PAGE_RES *page_res, volatile ETEXT_DESC *monitor, FILE *output_file)
void SetBlackAndWhitelist()
bool GetDoubleVariable(const char *name, double *value) const
_ConstTessMemberResultCallback_5_0< false, R, T1, P1, P2, P3, P4, P5 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)(P1, P2, P3, P4, P5) const, typename Identity< P1 >::type p1, typename Identity< P2 >::type p2, typename Identity< P3 >::type p3, typename Identity< P4 >::type p4, typename Identity< P5 >::type p5)
static ROW * MakeTessOCRRow(float baseline, float xheight, float descender, float ascender)
const char * GetInputName()
void GetAvailableLanguagesAsVector(GenericVector< STRING > *langs) const
const char * GetDatapath()
bool DetectOS(OSResults *)
int tessedit_pageseg_mode
int init_tesseract_lm(const char *arg0, const char *textbase, const char *language, TessdataManager *mgr)
TESS_LOCAL PAGE_RES * RecognitionPass1(BLOCK_LIST *block_list)
TruthCallback * truth_cb_
void set_unlv_suspects(WERD_RES *word)
const int kBytesPer64BitNumber
Boxa * GetComponentImages(PageIteratorLevel level, bool text_only, bool raw_image, int raw_padding, Pixa **pixa, int **blockids, int **paraids)
Boxa * GetTextlines(bool raw_image, int raw_padding, Pixa **pixa, int **blockids, int **paraids)
static void PrintParams(FILE *fp, const ParamsVectors *member_params)
bool tessedit_resegment_from_boxes
bool tessedit_resegment_from_line_boxes
bool classify_bln_numeric_mode
int NumDawgs() const
Return the number of dawgs in the dawgs_ vector.
const char kTesseractReject
TBOX bounding_box() const
Boxa * GetConnectedComponents(Pixa **cc)
const TBOX & BlobBox(int index) const
void RunAdaptiveClassifier(TBLOB *blob, int num_max_matches, int *unichar_ids, float *ratings, int *num_matches_returned)
char * TesseractRect(const unsigned char *imagedata, int bytes_per_pixel, int bytes_per_line, int left, int top, int width, int height)
double(Dict::* probability_in_context_)(const char *lang, const char *context, int context_bytes, const char *character, int character_bytes)
Probability in context function used by the ngram permuter.
PAGE_RES * ApplyBoxes(const STRING &fname, bool find_segmentation, BLOCK_LIST *block_list)
@ W_FUZZY_NON
fuzzy nonspace
WERD_RES * restart_page()
static TBLOB * PolygonalCopy(bool allow_detailed_fx, C_BLOB *src)
struct TessResultRenderer TessResultRenderer
bool AddImage(TessBaseAPI *api)
int SegmentPage(const STRING *input_file, BLOCK_LIST *blocks, Tesseract *osd_tess, OSResults *osr)
const int kMaxBytesPerLine
constexpr int kMinCredibleResolution
TESS_LOCAL int TextLength(int *blob_count)
@ PSM_AUTO_ONLY
Automatic page segmentation, but no OSD, or OCR.
STRING * datapath_
Current location of tessdata.
void ClearAdaptiveClassifier()
virtual void Run(A1, A2, A3, A4)=0
bool(*)(const STRING &, GenericVector< char > *) FileReader
Boxa * GetRegions(Pixa **pixa)
const int kBlnBaselineOffset
bool recog_all_words(PAGE_RES *page_res, ETEXT_DESC *monitor, const TBOX *target_word_box, const char *word_config, int dopasses)
bool GetIntVariable(const char *name, int *value) const
STRING * input_file_
Name used by training code.
void set_source_resolution(int ppi)
int Init(const char *datapath, const char *language, OcrEngineMode mode, char **configs, int configs_size, const GenericVector< STRING > *vars_vec, const GenericVector< STRING > *vars_values, bool set_only_non_debug_params)
void(Wordrec::* fill_lattice_)(const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)
void SetRectangle(int left, int top, int width, int height)
bool major_overlap(const TBOX &box) const
ImageThresholder * thresholder_
Image thresholding module.
FILE * init_recog_training(const STRING &fname)
bool GetBoolVariable(const char *name, bool *value) const
const Dawg * GetDawg(int index) const
Return i-th dawg pointer recorded in the dawgs_ vector.
float Confidence(PageIteratorLevel level) const
void SetFillLatticeFunc(FillLatticeFunc f)
Pix * GetThresholdedImage()
void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
bool BoundingBox(PageIteratorLevel level, int *left, int *top, int *right, int *bottom) const
void set_pix_grey(Pix *grey_pix)
void pgeditor_main(int width, int height, PAGE_RES *page_res)
bool IsBinary() const
Returns true if the source image is binary.
bool Next(PageIteratorLevel level) override
float base_line(float xpos) const
GenericVector< StringParam * > string_params
void set_min_orientation_margin(double margin)
STRING * language_
Last initialized language.
void PrintVariables(FILE *fp) const
int(Dict::* letter_is_okay_)(void *void_dawg_args, const UNICHARSET &unicharset, UNICHAR_ID unichar_id, bool word_end) const
const int kNumbersPerBlob
void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
@ PSM_OSD_ONLY
Orientation and script detection only.
TESS_CHAR(float _cost, const char *repr, int len=-1)
int(Dict::*)(void *, const UNICHARSET &, UNICHAR_ID, bool) const DictFunc
tesseract::BoxWord * box_word
#define MAX_NUM_INT_FEATURES
void ReadDebugConfigFile(const char *filename)
virtual bool Next(PageIteratorLevel level)
bool DetectOrientationScript(int *orient_deg, float *orient_conf, const char **script_name, float *script_conf)
char * GetBoxText(int page_number)
TBOX bounding_box() const
double min_orientation_margin
void delete_data_pointers()
void GetLoadedLanguagesAsVector(GenericVector< STRING > *langs) const
bool WriteTRFile(const STRING &filename)
BLOCK_LIST * FindLinesCreateBlockList()
tesseract::ParamsVectors * GlobalParams()
Tesseract * osd_tesseract_
For orientation & script detection.
bool Baseline(PageIteratorLevel level, int *x1, int *y1, int *x2, int *y2) const
bool textord_equation_detect
static void NormalizeTBLOB(TBLOB *tblob, ROW *row, bool numeric_mode)
virtual void GetImageSizes(int *left, int *top, int *width, int *height, int *imagewidth, int *imageheight)
void SetInputName(const char *name)
static TESS_API DawgCache * GlobalDawgCache()
static void ClearPersistentCache()
void LearnWord(const char *fontname, WERD_RES *word)
virtual TESS_LOCAL bool Threshold(Pix **pix)
bool IsAtFinalElement(PageIteratorLevel level, PageIteratorLevel element) const override
void DetectParagraphs(int debug_level, GenericVector< RowInfo > *row_infos, GenericVector< PARA * > *row_owners, PARA_LIST *paragraphs, GenericVector< ParagraphModel * > *models)
void GetBlockTextOrientations(int **block_orientation, bool **vertical_writing)
GenericVector< DoubleParam * > double_params
void extract_edges(Pix *pix, BLOCK *block)
Boxa * GetWords(Pixa **pixa)
static bool SetParam(const char *name, const char *value, SetParamConstraint constraint, ParamsVectors *member_params)
double matcher_good_threshold
const UNICHARSET & getUnicharset() const
bool TrainLineRecognizer(const STRING &input_imagename, const STRING &output_basename, BLOCK_LIST *block_list)
void SetSourceResolution(int ppi)
virtual bool IsAtFinalElement(PageIteratorLevel level, PageIteratorLevel element) const
int num_sub_langs() const
double(Dict::*)(const char *, const char *, int, const char *, int) ProbabilityInContextFunc
const char * GetInitLanguagesAsString() const
void SetEquationDetect(EquationDetect *detector)
bool BoundingBoxInternal(PageIteratorLevel level, int *left, int *top, int *right, int *bottom) const
static void ExtractFeatures(const TBLOB &blob, bool nonlinear_norm, GenericVector< INT_FEATURE_STRUCT > *bl_features, GenericVector< INT_FEATURE_STRUCT > *cn_features, INT_FX_RESULT_STRUCT *results, GenericVector< int > *outline_cn_counts)
bool IsValidCharacter(const char *utf8_character)
static ResultIterator * StartOfParagraph(const LTRResultIterator &resit)
TBOX bounding_box() const
bool PSM_OSD_ENABLED(int pageseg_mode)
#define BOOL_VAR(name, val, comment)
CRUNCH_MODE unlv_crunch_mode
Dict & getDict() override
virtual Pix * GetPixRectGrey()
CANCEL_FUNC cancel
for errcode use
C_OUTLINE_LIST * out_list()
bool LoadMemBuffer(const char *name, const char *data, int size)
TESS_LOCAL LTRResultIterator * GetLTRIterator()
void SetInputImage(Pix *pix)
const char * c_str() const
void set_deadline_msecs(int32_t deadline_msecs)
virtual Pix * GetPixRectThresholds()
void split(char c, GenericVector< STRING > *splited)
STRING HOcrEscape(const char *text)
int InitLangMod(const char *datapath, const char *language)
void PrepareForTessOCR(BLOCK_LIST *block_list, Tesseract *osd_tess, OSResults *osr)
bool tessedit_make_boxes_from_boxes
static void CatchSignals()
int OrientationIdToValue(const int &id)
virtual bool ThresholdToPix(PageSegMode pageseg_mode, Pix **pix)
Returns false on error.