tesseract  4.1.1
tesseract::TessBaseAPI Class Reference

#include <baseapi.h>

Public Member Functions

 TessBaseAPI ()
 
virtual ~TessBaseAPI ()
 
void SetInputName (const char *name)
 
const char * GetInputName ()
 
void SetInputImage (Pix *pix)
 
Pix * GetInputImage ()
 
int GetSourceYResolution ()
 
const char * GetDatapath ()
 
void SetOutputName (const char *name)
 
bool SetVariable (const char *name, const char *value)
 
bool SetDebugVariable (const char *name, const char *value)
 
bool GetIntVariable (const char *name, int *value) const
 
bool GetBoolVariable (const char *name, bool *value) const
 
bool GetDoubleVariable (const char *name, double *value) const
 
const char * GetStringVariable (const char *name) const
 
void PrintVariables (FILE *fp) const
 
bool GetVariableAsString (const char *name, STRING *val)
 
int Init (const char *datapath, const char *language, OcrEngineMode mode, char **configs, int configs_size, const GenericVector< STRING > *vars_vec, const GenericVector< STRING > *vars_values, bool set_only_non_debug_params)
 
int Init (const char *datapath, const char *language, OcrEngineMode oem)
 
int Init (const char *datapath, const char *language)
 
int Init (const char *data, int data_size, const char *language, OcrEngineMode mode, char **configs, int configs_size, const GenericVector< STRING > *vars_vec, const GenericVector< STRING > *vars_values, bool set_only_non_debug_params, FileReader reader)
 
const char * GetInitLanguagesAsString () const
 
void GetLoadedLanguagesAsVector (GenericVector< STRING > *langs) const
 
void GetAvailableLanguagesAsVector (GenericVector< STRING > *langs) const
 
int InitLangMod (const char *datapath, const char *language)
 
void InitForAnalysePage ()
 
void ReadConfigFile (const char *filename)
 
void ReadDebugConfigFile (const char *filename)
 
void SetPageSegMode (PageSegMode mode)
 
PageSegMode GetPageSegMode () const
 
char * TesseractRect (const unsigned char *imagedata, int bytes_per_pixel, int bytes_per_line, int left, int top, int width, int height)
 
void ClearAdaptiveClassifier ()
 
void SetImage (const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
 
void SetImage (Pix *pix)
 
void SetSourceResolution (int ppi)
 
void SetRectangle (int left, int top, int width, int height)
 
void SetThresholder (ImageThresholder *thresholder)
 
Pix * GetThresholdedImage ()
 
Boxa * GetRegions (Pixa **pixa)
 
Boxa * GetTextlines (bool raw_image, int raw_padding, Pixa **pixa, int **blockids, int **paraids)
 
Boxa * GetTextlines (Pixa **pixa, int **blockids)
 
Boxa * GetStrips (Pixa **pixa, int **blockids)
 
Boxa * GetWords (Pixa **pixa)
 
Boxa * GetConnectedComponents (Pixa **cc)
 
Boxa * GetComponentImages (PageIteratorLevel level, bool text_only, bool raw_image, int raw_padding, Pixa **pixa, int **blockids, int **paraids)
 
Boxa * GetComponentImages (const PageIteratorLevel level, const bool text_only, Pixa **pixa, int **blockids)
 
int GetThresholdedImageScaleFactor () const
 
PageIteratorAnalyseLayout ()
 
PageIteratorAnalyseLayout (bool merge_similar_words)
 
int Recognize (ETEXT_DESC *monitor)
 
int RecognizeForChopTest (ETEXT_DESC *monitor)
 
bool ProcessPages (const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
 
bool ProcessPagesInternal (const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
 
bool ProcessPage (Pix *pix, int page_index, const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
 
ResultIteratorGetIterator ()
 
MutableIteratorGetMutableIterator ()
 
char * GetUTF8Text ()
 
char * GetHOCRText (ETEXT_DESC *monitor, int page_number)
 
char * GetHOCRText (int page_number)
 
char * GetAltoText (ETEXT_DESC *monitor, int page_number)
 
char * GetAltoText (int page_number)
 
char * GetTSVText (int page_number)
 
char * GetLSTMBoxText (int page_number)
 
char * GetBoxText (int page_number)
 
char * GetWordStrBoxText (int page_number)
 
char * GetUNLVText ()
 
bool DetectOrientationScript (int *orient_deg, float *orient_conf, const char **script_name, float *script_conf)
 
char * GetOsdText (int page_number)
 
int MeanTextConf ()
 
int * AllWordConfidences ()
 
bool AdaptToWordStr (PageSegMode mode, const char *wordstr)
 
void Clear ()
 
void End ()
 
int IsValidWord (const char *word)
 
bool IsValidCharacter (const char *utf8_character)
 
bool GetTextDirection (int *out_offset, float *out_slope)
 
void SetDictFunc (DictFunc f)
 
void SetProbabilityInContextFunc (ProbabilityInContextFunc f)
 
bool DetectOS (OSResults *)
 
void GetBlockTextOrientations (int **block_orientation, bool **vertical_writing)
 
void SetFillLatticeFunc (FillLatticeFunc f)
 
BLOCK_LIST * FindLinesCreateBlockList ()
 
void GetFeaturesForBlob (TBLOB *blob, INT_FEATURE_STRUCT *int_features, int *num_features, int *feature_outline_index)
 
void RunAdaptiveClassifier (TBLOB *blob, int num_max_matches, int *unichar_ids, float *ratings, int *num_matches_returned)
 
const char * GetUnichar (int unichar_id)
 
const DawgGetDawg (int i) const
 
int NumDawgs () const
 
Tesseracttesseract () const
 
OcrEngineMode oem () const
 
void InitTruthCallback (TruthCallback *cb)
 
void set_min_orientation_margin (double margin)
 

Static Public Member Functions

static const char * Version ()
 
static size_t getOpenCLDevice (void **device)
 
static void CatchSignals ()
 
static void ClearPersistentCache ()
 
static void DeleteBlockList (BLOCK_LIST *block_list)
 
static ROWMakeTessOCRRow (float baseline, float xheight, float descender, float ascender)
 
static TBLOBMakeTBLOB (Pix *pix)
 
static void NormalizeTBLOB (TBLOB *tblob, ROW *row, bool numeric_mode)
 
static ROWFindRowForBox (BLOCK_LIST *blocks, int left, int top, int right, int bottom)
 

Protected Member Functions

TESS_LOCAL bool InternalSetImage ()
 
virtual TESS_LOCAL bool Threshold (Pix **pix)
 
TESS_LOCAL int FindLines ()
 
void ClearResults ()
 
TESS_LOCAL LTRResultIteratorGetLTRIterator ()
 
TESS_LOCAL int TextLength (int *blob_count)
 
TESS_LOCAL void DetectParagraphs (bool after_text_recognition)
 
TESS_LOCAL void AdaptToCharacter (const char *unichar_repr, int length, float baseline, float xheight, float descender, float ascender)
 
TESS_LOCAL PAGE_RESRecognitionPass1 (BLOCK_LIST *block_list)
 
TESS_LOCAL PAGE_RESRecognitionPass2 (BLOCK_LIST *block_list, PAGE_RES *pass1_result)
 
TESS_LOCAL const PAGE_RESGetPageRes () const
 

Static Protected Member Functions

static TESS_LOCAL int TesseractExtractResult (char **text, int **lengths, float **costs, int **x0, int **y0, int **x1, int **y1, PAGE_RES *page_res)
 

Protected Attributes

Tesseracttesseract_
 The underlying data object. More...
 
Tesseractosd_tesseract_
 For orientation & script detection. More...
 
EquationDetectequ_detect_
 The equation detector. More...
 
FileReader reader_
 Reads files from any filesystem. More...
 
ImageThresholderthresholder_
 Image thresholding module. More...
 
GenericVector< ParagraphModel * > * paragraph_models_
 
BLOCK_LIST * block_list_
 The page layout. More...
 
PAGE_RESpage_res_
 The page-level data. More...
 
STRINGinput_file_
 Name used by training code. More...
 
STRINGoutput_file_
 Name used by debug code. More...
 
STRINGdatapath_
 Current location of tessdata. More...
 
STRINGlanguage_
 Last initialized language. More...
 
OcrEngineMode last_oem_requested_
 Last ocr language mode requested. More...
 
bool recognition_done_
 page_res_ contains recognition data. More...
 
TruthCallbacktruth_cb_
 
int rect_left_
 
int rect_top_
 
int rect_width_
 
int rect_height_
 
int image_width_
 
int image_height_
 

Detailed Description

Base class for all tesseract APIs. Specific classes can add ability to work on different inputs or produce different outputs. This class is mostly an interface layer on top of the Tesseract instance class to hide the data types so that users of this class don't have to include any other Tesseract headers.

Definition at line 91 of file baseapi.h.

Constructor & Destructor Documentation

◆ TessBaseAPI()

tesseract::TessBaseAPI::TessBaseAPI ( )

Definition at line 189 of file baseapi.cpp.

190  : tesseract_(nullptr),
191  osd_tesseract_(nullptr),
192  equ_detect_(nullptr),
193  reader_(nullptr),
194  // Thresholder is initialized to nullptr here, but will be set before use by:
195  // A constructor of a derived API, SetThresholder(), or
196  // created implicitly when used in InternalSetImage.
197  thresholder_(nullptr),
198  paragraph_models_(nullptr),
199  block_list_(nullptr),
200  page_res_(nullptr),
201  input_file_(nullptr),
202  output_file_(nullptr),
203  datapath_(nullptr),
204  language_(nullptr),
206  recognition_done_(false),
207  truth_cb_(nullptr),
208  rect_left_(0),
209  rect_top_(0),
210  rect_width_(0),
211  rect_height_(0),
212  image_width_(0),
213  image_height_(0) {
214 #if defined(DEBUG)
215  // The Tesseract executables would use the "C" locale by default,
216  // but other software which is linked against the Tesseract library
217  // typically uses the locale from the user's environment.
218  // Here the default is overridden to allow debugging of potential
219  // problems caused by the locale settings.
220 
221  // Use the current locale if building debug code.
222  std::locale::global(std::locale(""));
223 #endif
224 }
EquationDetect * equ_detect_
The equation detector.
Definition: baseapi.h:890
GenericVector< ParagraphModel * > * paragraph_models_
Definition: baseapi.h:893
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:895
STRING * output_file_
Name used by debug code.
Definition: baseapi.h:897
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:888
STRING * input_file_
Name used by training code.
Definition: baseapi.h:896
STRING * language_
Last initialized language.
Definition: baseapi.h:899
bool recognition_done_
page_res_ contains recognition data.
Definition: baseapi.h:901
FileReader reader_
Reads files from any filesystem.
Definition: baseapi.h:891
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:892
BLOCK_LIST * block_list_
The page layout.
Definition: baseapi.h:894
TruthCallback * truth_cb_
Definition: baseapi.h:902
Tesseract * osd_tesseract_
For orientation & script detection.
Definition: baseapi.h:889
STRING * datapath_
Current location of tessdata.
Definition: baseapi.h:898
OcrEngineMode last_oem_requested_
Last ocr language mode requested.
Definition: baseapi.h:900

◆ ~TessBaseAPI()

tesseract::TessBaseAPI::~TessBaseAPI ( )
virtual

Definition at line 226 of file baseapi.cpp.

226  {
227  End();
228 }

Member Function Documentation

◆ AdaptToCharacter()

void tesseract::TessBaseAPI::AdaptToCharacter ( const char *  unichar_repr,
int  length,
float  baseline,
float  xheight,
float  descender,
float  ascender 
)
protected

Adapt to recognize the current image as the given character. The image must be preloaded and be just an image of a single character.

Adapt to recognize the current image as the given character. The image must be preloaded into pix_binary_ and be just an image of a single character.

Definition at line 2429 of file baseapi.cpp.

2434  {
2435  UNICHAR_ID id = tesseract_->unicharset.unichar_to_id(unichar_repr, length);
2436  TBLOB *blob = make_tesseract_blob(baseline, xheight, descender, ascender,
2438  tesseract_->pix_binary());
2439  float threshold;
2440  float best_rating = -100;
2441 
2442 
2443  // Classify to get a raw choice.
2444  BLOB_CHOICE_LIST choices;
2445  tesseract_->AdaptiveClassifier(blob, &choices);
2446  BLOB_CHOICE_IT choice_it;
2447  choice_it.set_to_list(&choices);
2448  for (choice_it.mark_cycle_pt(); !choice_it.cycled_list();
2449  choice_it.forward()) {
2450  if (choice_it.data()->rating() > best_rating) {
2451  best_rating = choice_it.data()->rating();
2452  }
2453  }
2454 
2455  threshold = tesseract_->matcher_good_threshold;
2456 
2457  if (blob->outlines)
2458  tesseract_->AdaptToChar(blob, id, kUnknownFontinfoId, threshold,
2460  delete blob;
2461 }
int UNICHAR_ID
Definition: unichar.h:34
@ baseline
Definition: mfoutline.h:63
Pix * pix_binary() const
Definition: blobs.h:284
TESSLINE * outlines
Definition: blobs.h:400
UNICHARSET unicharset
Definition: ccutil.h:73
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:210
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:515
bool classify_bln_numeric_mode
Definition: classify.h:508
void AdaptiveClassifier(TBLOB *Blob, BLOB_CHOICE_LIST *Choices)
Definition: adaptmatch.cpp:191
double matcher_good_threshold
Definition: classify.h:456
void AdaptToChar(TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, float Threshold, ADAPT_TEMPLATES adaptive_templates)
Definition: adaptmatch.cpp:853

◆ AdaptToWordStr()

bool tesseract::TessBaseAPI::AdaptToWordStr ( PageSegMode  mode,
const char *  wordstr 
)

Applies the given word to the adaptive classifier if possible. The word must be SPACE-DELIMITED UTF-8 - l i k e t h i s , so it can tell the boundaries of the graphemes. Assumes that SetImage/SetRectangle have been used to set the image to the given word. The mode arg should be PSM_SINGLE_WORD or PSM_CIRCLE_WORD, as that will be used to control layout analysis. The currently set PageSegMode is preserved. Returns false if adaption was not possible for some reason.

Definition at line 1799 of file baseapi.cpp.

1799  {
1800  int debug = 0;
1801  GetIntVariable("applybox_debug", &debug);
1802  bool success = true;
1803  PageSegMode current_psm = GetPageSegMode();
1804  SetPageSegMode(mode);
1805  SetVariable("classify_enable_learning", "0");
1806  const std::unique_ptr<const char[]> text(GetUTF8Text());
1807  if (debug) {
1808  tprintf("Trying to adapt \"%s\" to \"%s\"\n", text.get(), wordstr);
1809  }
1810  if (text != nullptr) {
1811  PAGE_RES_IT it(page_res_);
1812  WERD_RES* word_res = it.word();
1813  if (word_res != nullptr) {
1814  word_res->word->set_text(wordstr);
1815  // Check to see if text matches wordstr.
1816  int w = 0;
1817  int t;
1818  for (t = 0; text[t] != '\0'; ++t) {
1819  if (text[t] == '\n' || text[t] == ' ')
1820  continue;
1821  while (wordstr[w] == ' ') ++w;
1822  if (text[t] != wordstr[w])
1823  break;
1824  ++w;
1825  }
1826  if (text[t] != '\0' || wordstr[w] != '\0') {
1827  // No match.
1828  delete page_res_;
1829  GenericVector<TBOX> boxes;
1833  PAGE_RES_IT pr_it(page_res_);
1834  if (pr_it.word() == nullptr)
1835  success = false;
1836  else
1837  word_res = pr_it.word();
1838  } else {
1839  word_res->BestChoiceToCorrectText();
1840  }
1841  if (success) {
1842  tesseract_->EnableLearning = true;
1843  tesseract_->LearnWord(nullptr, word_res);
1844  }
1845  } else {
1846  success = false;
1847  }
1848  } else {
1849  success = false;
1850  }
1851  SetPageSegMode(current_psm);
1852  return success;
1853 }
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
void SetPageSegMode(PageSegMode mode)
Definition: baseapi.cpp:515
bool GetIntVariable(const char *name, int *value) const
Definition: baseapi.cpp:298
bool SetVariable(const char *name, const char *value)
Definition: baseapi.cpp:286
PageSegMode GetPageSegMode() const
Definition: baseapi.cpp:522
void TidyUp(PAGE_RES *page_res)
void ReSegmentByClassification(PAGE_RES *page_res)
PAGE_RES * SetupApplyBoxes(const GenericVector< TBOX > &boxes, BLOCK_LIST *block_list)
Definition: applybox.cpp:207
void BestChoiceToCorrectText()
Definition: pageres.cpp:923
WERD * word
Definition: pageres.h:186
void set_text(const char *new_text)
Definition: werd.h:115
void LearnWord(const char *fontname, WERD_RES *word)
Definition: adaptmatch.cpp:250

◆ AllWordConfidences()

int * tesseract::TessBaseAPI::AllWordConfidences ( )

Returns all word confidences (between 0 and 100) in an array, terminated by -1. The calling function must delete [] after use. The number of confidences should correspond to the number of space- delimited words in GetUTF8Text.

Returns an array of all word confidences, terminated by -1.

Definition at line 1764 of file baseapi.cpp.

1764  {
1765  if (tesseract_ == nullptr ||
1766  (!recognition_done_ && Recognize(nullptr) < 0))
1767  return nullptr;
1768  int n_word = 0;
1769  PAGE_RES_IT res_it(page_res_);
1770  for (res_it.restart_page(); res_it.word() != nullptr; res_it.forward())
1771  n_word++;
1772 
1773  int* conf = new int[n_word+1];
1774  n_word = 0;
1775  for (res_it.restart_page(); res_it.word() != nullptr; res_it.forward()) {
1776  WERD_RES *word = res_it.word();
1777  WERD_CHOICE* choice = word->best_choice;
1778  int w_conf = static_cast<int>(100 + 5 * choice->certainty());
1779  // This is the eq for converting Tesseract confidence to 1..100
1780  if (w_conf < 0) w_conf = 0;
1781  if (w_conf > 100) w_conf = 100;
1782  conf[n_word++] = w_conf;
1783  }
1784  conf[n_word] = -1;
1785  return conf;
1786 }
int Recognize(ETEXT_DESC *monitor)
Definition: baseapi.cpp:830
WERD_CHOICE * best_choice
Definition: pageres.h:241
float certainty() const
Definition: ratngs.h:320

◆ AnalyseLayout() [1/2]

PageIterator * tesseract::TessBaseAPI::AnalyseLayout ( )

Runs page layout analysis in the mode set by SetPageSegMode. May optionally be called prior to Recognize to get access to just the page layout results. Returns an iterator to the results. If merge_similar_words is true, words are combined where suitable for use with a line recognizer. Use if you want to use AnalyseLayout to find the textlines, and then want to process textline fragments with an external line recognizer. Returns nullptr on error or an empty page. The returned iterator must be deleted after use. WARNING! This class points to data held within the TessBaseAPI class, and therefore can only be used while the TessBaseAPI class still exists and has not been subjected to a call of Init, SetImage, Recognize, Clear, End DetectOS, or anything else that changes the internal PAGE_RES.

Definition at line 810 of file baseapi.cpp.

810 { return AnalyseLayout(false); }
PageIterator * AnalyseLayout()
Definition: baseapi.cpp:810

◆ AnalyseLayout() [2/2]

PageIterator * tesseract::TessBaseAPI::AnalyseLayout ( bool  merge_similar_words)

Definition at line 812 of file baseapi.cpp.

812  {
813  if (FindLines() == 0) {
814  if (block_list_->empty())
815  return nullptr; // The page was empty.
816  page_res_ = new PAGE_RES(merge_similar_words, block_list_, nullptr);
817  DetectParagraphs(false);
818  return new PageIterator(
822  }
823  return nullptr;
824 }
TESS_LOCAL void DetectParagraphs(bool after_text_recognition)
Definition: baseapi.cpp:2278
TESS_LOCAL int FindLines()
Definition: baseapi.cpp:2068
int GetScaledYResolution() const
Definition: thresholder.h:92

◆ CatchSignals()

void tesseract::TessBaseAPI::CatchSignals ( )
static

Writes the thresholded image to stderr as a PBM file on receipt of a SIGSEGV, SIGFPE, or SIGBUS signal. (Linux/Unix only).

This method used to write the thresholded image to stderr as a PBM file on receipt of a SIGSEGV, SIGFPE, or SIGBUS signal. (Linux/Unix only).

Definition at line 262 of file baseapi.cpp.

262  {
263  // Warn API users that an implementation is needed.
264  tprintf("Deprecated method CatchSignals has only a dummy implementation!\n");
265 }

◆ Clear()

void tesseract::TessBaseAPI::Clear ( )

Free up recognition results and any stored image data, without actually freeing any recognition data that would be time-consuming to reload. Afterwards, you must call SetImage or TesseractRect before doing any Recognize or Get* operation.

Definition at line 1862 of file baseapi.cpp.

1862  {
1863  if (thresholder_ != nullptr)
1864  thresholder_->Clear();
1865  ClearResults();
1866  if (tesseract_ != nullptr) SetInputImage(nullptr);
1867 }
void SetInputImage(Pix *pix)
Definition: baseapi.cpp:956
virtual void Clear()
Destroy the Pix if there is one, freeing memory.
Definition: thresholder.cpp:48

◆ ClearAdaptiveClassifier()

void tesseract::TessBaseAPI::ClearAdaptiveClassifier ( )

Call between pages or documents etc to free up memory and forget adaptive data.

Definition at line 565 of file baseapi.cpp.

565  {
566  if (tesseract_ == nullptr)
567  return;
570 }

◆ ClearPersistentCache()

void tesseract::TessBaseAPI::ClearPersistentCache ( )
static

Clear any library-level memory caches. There are a variety of expensive-to-load constant data structures (mostly language dictionaries) that are cached globally – surviving the Init() and End() of individual TessBaseAPI's. This function allows the clearing of these caches.

Definition at line 1910 of file baseapi.cpp.

1910  {
1912 }
void DeleteUnusedDawgs()
Definition: dawg_cache.h:43
static TESS_API DawgCache * GlobalDawgCache()
Definition: dict.cpp:184

◆ ClearResults()

void tesseract::TessBaseAPI::ClearResults ( )
protected

Delete the pageres and block list ready for a new page.

Delete the pageres and clear the block list ready for a new page.

Definition at line 2144 of file baseapi.cpp.

2144  {
2145  if (tesseract_ != nullptr) {
2146  tesseract_->Clear();
2147  }
2148  delete page_res_;
2149  page_res_ = nullptr;
2150  recognition_done_ = false;
2151  if (block_list_ == nullptr)
2152  block_list_ = new BLOCK_LIST;
2153  else
2154  block_list_->clear();
2155  if (paragraph_models_ != nullptr) {
2157  delete paragraph_models_;
2158  paragraph_models_ = nullptr;
2159  }
2160 }
void delete_data_pointers()

◆ DeleteBlockList()

void tesseract::TessBaseAPI::DeleteBlockList ( BLOCK_LIST *  block_list)
static

Delete a block list. This is to keep BLOCK_LIST pointer opaque and let go of including the other headers.

Definition at line 2346 of file baseapi.cpp.

2346  {
2347  delete block_list;
2348 }

◆ DetectOrientationScript()

bool tesseract::TessBaseAPI::DetectOrientationScript ( int *  orient_deg,
float *  orient_conf,
const char **  script_name,
float *  script_conf 
)

Detect the orientation of the input image and apparent script (alphabet). orient_deg is the detected clockwise rotation of the input image in degrees (0, 90, 180, 270) orient_conf is the confidence (15.0 is reasonably confident) script_name is an ASCII string, the name of the script, e.g. "Latin" script_conf is confidence level in the script Returns true on success and writes values to each parameter as an output

Definition at line 1686 of file baseapi.cpp.

1688  {
1689  OSResults osr;
1690 
1691  bool osd = DetectOS(&osr);
1692  if (!osd) {
1693  return false;
1694  }
1695 
1696  int orient_id = osr.best_result.orientation_id;
1697  int script_id = osr.get_best_script(orient_id);
1698  if (orient_conf) *orient_conf = osr.best_result.oconfidence;
1699  if (orient_deg) *orient_deg = orient_id * 90; // convert quadrant to degrees
1700 
1701  if (script_name) {
1702  const char* script = osr.unicharset->get_script_from_script_id(script_id);
1703 
1704  *script_name = script;
1705  }
1706 
1707  if (script_conf) *script_conf = osr.best_result.sconfidence;
1708 
1709  return true;
1710 }
bool DetectOS(OSResults *)
Definition: baseapi.cpp:2200
float oconfidence
Definition: osdetect.h:46
int orientation_id
Definition: osdetect.h:43
float sconfidence
Definition: osdetect.h:45
OSBestResult best_result
Definition: osdetect.h:81
UNICHARSET * unicharset
Definition: osdetect.h:80
TESS_API int get_best_script(int orientation_id) const
Definition: osdetect.cpp:112
const char * get_script_from_script_id(int id) const
Definition: unicharset.h:854

◆ DetectOS()

bool tesseract::TessBaseAPI::DetectOS ( OSResults osr)

Estimates the Orientation And Script of the image.

Returns
true if the image was processed successfully.

Estimates the Orientation And Script of the image. Returns true if the image was processed successfully.

Definition at line 2200 of file baseapi.cpp.

2200  {
2201  if (tesseract_ == nullptr)
2202  return false;
2203  ClearResults();
2204  if (tesseract_->pix_binary() == nullptr &&
2206  return false;
2207  }
2208 
2209  if (input_file_ == nullptr)
2210  input_file_ = new STRING(kInputFile);
2212 }
int orientation_and_script_detection(STRING &filename, OSResults *osr, tesseract::Tesseract *tess)
Definition: osdetect.cpp:190
virtual TESS_LOCAL bool Threshold(Pix **pix)
Definition: baseapi.cpp:2014
Definition: strngs.h:45

◆ DetectParagraphs()

void tesseract::TessBaseAPI::DetectParagraphs ( bool  after_text_recognition)
protected

Definition at line 2278 of file baseapi.cpp.

2278  {
2279  int debug_level = 0;
2280  GetIntVariable("paragraph_debug_level", &debug_level);
2281  if (paragraph_models_ == nullptr)
2283  MutableIterator *result_it = GetMutableIterator();
2284  do { // Detect paragraphs for this block
2286  ::tesseract::DetectParagraphs(debug_level, after_text_recognition,
2287  result_it, &models);
2288  *paragraph_models_ += models;
2289  } while (result_it->Next(RIL_BLOCK));
2290  delete result_it;
2291 }
void DetectParagraphs(int debug_level, GenericVector< RowInfo > *row_infos, GenericVector< PARA * > *row_owners, PARA_LIST *paragraphs, GenericVector< ParagraphModel * > *models)
MutableIterator * GetMutableIterator()
Definition: baseapi.cpp:1341

◆ End()

void tesseract::TessBaseAPI::End ( )

Close down tesseract and free up all memory. End() is equivalent to destructing and reconstructing your TessBaseAPI. Once End() has been used, none of the other API functions may be used other than Init and anything declared above it in the class definition.

Definition at line 1875 of file baseapi.cpp.

1875  {
1876  Clear();
1877  delete thresholder_;
1878  thresholder_ = nullptr;
1879  delete page_res_;
1880  page_res_ = nullptr;
1881  delete block_list_;
1882  block_list_ = nullptr;
1883  if (paragraph_models_ != nullptr) {
1885  delete paragraph_models_;
1886  paragraph_models_ = nullptr;
1887  }
1888  if (osd_tesseract_ == tesseract_) osd_tesseract_ = nullptr;
1889  delete tesseract_;
1890  tesseract_ = nullptr;
1891  delete osd_tesseract_;
1892  osd_tesseract_ = nullptr;
1893  delete equ_detect_;
1894  equ_detect_ = nullptr;
1895  delete input_file_;
1896  input_file_ = nullptr;
1897  delete output_file_;
1898  output_file_ = nullptr;
1899  delete datapath_;
1900  datapath_ = nullptr;
1901  delete language_;
1902  language_ = nullptr;
1903 }

◆ FindLines()

int tesseract::TessBaseAPI::FindLines ( )
protected

Find lines from the image making the BLOCK_LIST.

Returns
0 on success.

Find lines from the image making the BLOCK_LIST.

Definition at line 2068 of file baseapi.cpp.

2068  {
2069  if (thresholder_ == nullptr || thresholder_->IsEmpty()) {
2070  tprintf("Please call SetImage before attempting recognition.\n");
2071  return -1;
2072  }
2073  if (recognition_done_)
2074  ClearResults();
2075  if (!block_list_->empty()) {
2076  return 0;
2077  }
2078  if (tesseract_ == nullptr) {
2079  tesseract_ = new Tesseract;
2080  #ifndef DISABLED_LEGACY_ENGINE
2082  #endif
2083  }
2084  if (tesseract_->pix_binary() == nullptr &&
2086  return -1;
2087  }
2088 
2090 
2091 #ifndef DISABLED_LEGACY_ENGINE
2093  if (equ_detect_ == nullptr && datapath_ != nullptr) {
2094  equ_detect_ = new EquationDetect(datapath_->string(), nullptr);
2095  }
2096  if (equ_detect_ == nullptr) {
2097  tprintf("Warning: Could not set equation detector\n");
2098  } else {
2100  }
2101  }
2102 #endif // ndef DISABLED_LEGACY_ENGINE
2103 
2104  Tesseract* osd_tess = osd_tesseract_;
2105  OSResults osr;
2107  osd_tess == nullptr) {
2108  if (strcmp(language_->string(), "osd") == 0) {
2109  osd_tess = tesseract_;
2110  } else {
2111  osd_tesseract_ = new Tesseract;
2112  TessdataManager mgr(reader_);
2113  if (datapath_ == nullptr) {
2114  tprintf("Warning: Auto orientation and script detection requested,"
2115  " but data path is undefined\n");
2116  delete osd_tesseract_;
2117  osd_tesseract_ = nullptr;
2118  } else if (osd_tesseract_->init_tesseract(datapath_->string(), nullptr,
2119  "osd", OEM_TESSERACT_ONLY,
2120  nullptr, 0, nullptr, nullptr,
2121  false, &mgr) == 0) {
2122  osd_tess = osd_tesseract_;
2125  } else {
2126  tprintf("Warning: Auto orientation and script detection requested,"
2127  " but osd language failed to load\n");
2128  delete osd_tesseract_;
2129  osd_tesseract_ = nullptr;
2130  }
2131  }
2132  }
2133 
2134  if (tesseract_->SegmentPage(input_file_, block_list_, osd_tess, &osr) < 0)
2135  return -1;
2136 
2137  // If Devanagari is being recognized, we use different images for page seg
2138  // and for OCR.
2139  tesseract_->PrepareForTessOCR(block_list_, osd_tess, &osr);
2140  return 0;
2141 }
@ OEM_TESSERACT_ONLY
Definition: publictypes.h:269
bool PSM_OSD_ENABLED(int pageseg_mode)
Definition: publictypes.h:191
void SetEquationDetect(EquationDetect *detector)
int init_tesseract(const char *arg0, const char *textbase, const char *language, OcrEngineMode oem, char **configs, int configs_size, const GenericVector< STRING > *vars_vec, const GenericVector< STRING > *vars_values, bool set_only_init_params, TessdataManager *mgr)
Definition: tessedit.cpp:286
int SegmentPage(const STRING *input_file, BLOCK_LIST *blocks, Tesseract *osd_tess, OSResults *osr)
Definition: pagesegmain.cpp:99
void PrepareForTessOCR(BLOCK_LIST *block_list, Tesseract *osd_tess, OSResults *osr)
void set_source_resolution(int ppi)
int GetSourceYResolution() const
Definition: thresholder.h:89
bool IsEmpty() const
Return true if no image has been set.
Definition: thresholder.cpp:53
const char * string() const
Definition: strngs.cpp:194
void InitAdaptiveClassifier(TessdataManager *mgr)
Definition: adaptmatch.cpp:527

◆ FindLinesCreateBlockList()

BLOCK_LIST * tesseract::TessBaseAPI::FindLinesCreateBlockList ( )

Find lines from the image making the BLOCK_LIST.

Definition at line 2334 of file baseapi.cpp.

2334  {
2335  ASSERT_HOST(FindLines() == 0);
2336  BLOCK_LIST* result = block_list_;
2337  block_list_ = nullptr;
2338  return result;
2339 }
#define ASSERT_HOST(x)
Definition: errcode.h:88

◆ FindRowForBox()

ROW * tesseract::TessBaseAPI::FindRowForBox ( BLOCK_LIST *  blocks,
int  left,
int  top,
int  right,
int  bottom 
)
static

This method returns the row to which a box of specified dimensions would belong. If no good match is found, it returns nullptr.

Definition at line 2635 of file baseapi.cpp.

2636  {
2637  TBOX box(left, bottom, right, top);
2638  BLOCK_IT b_it(blocks);
2639  for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
2640  BLOCK* block = b_it.data();
2641  if (!box.major_overlap(block->pdblk.bounding_box()))
2642  continue;
2643  ROW_IT r_it(block->row_list());
2644  for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward()) {
2645  ROW* row = r_it.data();
2646  if (!box.major_overlap(row->bounding_box()))
2647  continue;
2648  WERD_IT w_it(row->word_list());
2649  for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) {
2650  WERD* word = w_it.data();
2651  if (box.major_overlap(word->bounding_box()))
2652  return row;
2653  }
2654  }
2655  }
2656  return nullptr;
2657 }
Definition: ocrblock.h:31
PDBLK pdblk
Page Description Block.
Definition: ocrblock.h:190
ROW_LIST * row_list()
get rows
Definition: ocrblock.h:116
Definition: ocrrow.h:37
TBOX bounding_box() const
Definition: ocrrow.h:88
WERD_LIST * word_list()
Definition: ocrrow.h:55
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
Definition: pdblock.h:59
Definition: rect.h:34
Definition: werd.h:56
TBOX bounding_box() const
Definition: werd.cpp:148

◆ GetAltoText() [1/2]

char * tesseract::TessBaseAPI::GetAltoText ( ETEXT_DESC monitor,
int  page_number 
)

Make an XML-formatted string with Alto markup from the internal data structures.

Make an XML-formatted string with ALTO markup from the internal data structures.

Definition at line 126 of file altorenderer.cpp.

126  {
127  if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(monitor) < 0))
128  return nullptr;
129 
130  int lcnt = 0, tcnt = 0, bcnt = 0, wcnt = 0;
131 
132  if (input_file_ == nullptr) SetInputName(nullptr);
133 
134 #ifdef _WIN32
135  // convert input name from ANSI encoding to utf-8
136  int str16_len =
137  MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1, nullptr, 0);
138  wchar_t* uni16_str = new WCHAR[str16_len];
139  str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1,
140  uni16_str, str16_len);
141  int utf8_len = WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, nullptr,
142  0, nullptr, nullptr);
143  char* utf8_str = new char[utf8_len];
144  WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, utf8_str, utf8_len,
145  nullptr, nullptr);
146  *input_file_ = utf8_str;
147  delete[] uni16_str;
148  delete[] utf8_str;
149 #endif
150 
151  std::stringstream alto_str;
152  // Use "C" locale (needed for int values larger than 999).
153  alto_str.imbue(std::locale::classic());
154  alto_str
155  << "\t\t<Page WIDTH=\"" << rect_width_ << "\" HEIGHT=\""
156  << rect_height_
157  << "\" PHYSICAL_IMG_NR=\"" << page_number << "\""
158  << " ID=\"page_" << page_number << "\">\n"
159  << "\t\t\t<PrintSpace HPOS=\"0\" VPOS=\"0\""
160  << " WIDTH=\"" << rect_width_ << "\""
161  << " HEIGHT=\"" << rect_height_ << "\">\n";
162 
163  ResultIterator* res_it = GetIterator();
164  while (!res_it->Empty(RIL_BLOCK)) {
165  if (res_it->Empty(RIL_WORD)) {
166  res_it->Next(RIL_WORD);
167  continue;
168  }
169 
170  if (res_it->IsAtBeginningOf(RIL_BLOCK)) {
171  alto_str << "\t\t\t\t<ComposedBlock ID=\"cblock_" << bcnt << "\"";
172  AddBoxToAlto(res_it, RIL_BLOCK, alto_str);
173  alto_str << "\n";
174  }
175 
176  if (res_it->IsAtBeginningOf(RIL_PARA)) {
177  alto_str << "\t\t\t\t\t<TextBlock ID=\"block_" << tcnt << "\"";
178  AddBoxToAlto(res_it, RIL_PARA, alto_str);
179  alto_str << "\n";
180  }
181 
182  if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
183  alto_str << "\t\t\t\t\t\t<TextLine ID=\"line_" << lcnt << "\"";
184  AddBoxToAlto(res_it, RIL_TEXTLINE, alto_str);
185  alto_str << "\n";
186  }
187 
188  alto_str << "\t\t\t\t\t\t\t<String ID=\"string_" << wcnt << "\"";
189  AddBoxToAlto(res_it, RIL_WORD, alto_str);
190  alto_str << " CONTENT=\"";
191 
192  bool last_word_in_line = res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD);
193  bool last_word_in_tblock = res_it->IsAtFinalElement(RIL_PARA, RIL_WORD);
194  bool last_word_in_cblock = res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD);
195 
196 
197  int left, top, right, bottom;
198  res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom);
199 
200  do {
201  const std::unique_ptr<const char[]> grapheme(
202  res_it->GetUTF8Text(RIL_SYMBOL));
203  if (grapheme && grapheme[0] != 0) {
204  alto_str << HOcrEscape(grapheme.get()).c_str();
205  }
206  res_it->Next(RIL_SYMBOL);
207  } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
208 
209  alto_str << "\"/>";
210 
211  wcnt++;
212 
213  if (last_word_in_line) {
214  alto_str << "\n\t\t\t\t\t\t</TextLine>\n";
215  lcnt++;
216  } else {
217  int hpos = right;
218  int vpos = top;
219  res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom);
220  int width = left - hpos;
221  alto_str << "<SP WIDTH=\"" << width << "\" VPOS=\"" << vpos
222  << "\" HPOS=\"" << hpos << "\"/>\n";
223  }
224 
225  if (last_word_in_tblock) {
226  alto_str << "\t\t\t\t\t</TextBlock>\n";
227  tcnt++;
228  }
229 
230  if (last_word_in_cblock) {
231  alto_str << "\t\t\t\t</ComposedBlock>\n";
232  bcnt++;
233  }
234  }
235 
236  alto_str << "\t\t\t</PrintSpace>\n"
237  << "\t\t</Page>\n";
238  const std::string& text = alto_str.str();
239 
240  char* result = new char[text.length() + 1];
241  strcpy(result, text.c_str());
242  delete res_it;
243  return result;
244 }
STRING HOcrEscape(const char *text)
Definition: baseapi.cpp:2310
ResultIterator * GetIterator()
Definition: baseapi.cpp:1324
void SetInputName(const char *name)
Definition: baseapi.cpp:271
const char * c_str() const
Definition: strngs.cpp:205

◆ GetAltoText() [2/2]

char * tesseract::TessBaseAPI::GetAltoText ( int  page_number)

Make an XML-formatted string with Alto markup from the internal data structures.

Make an XML-formatted string with ALTO markup from the internal data structures.

Definition at line 118 of file altorenderer.cpp.

118  {
119  return GetAltoText(nullptr, page_number);
120 }
char * GetAltoText(ETEXT_DESC *monitor, int page_number)

◆ GetAvailableLanguagesAsVector()

void tesseract::TessBaseAPI::GetAvailableLanguagesAsVector ( GenericVector< STRING > *  langs) const

Returns the available languages in the sorted vector of STRINGs.

Definition at line 456 of file baseapi.cpp.

457  {
458  langs->clear();
459  if (tesseract_ != nullptr) {
460  addAvailableLanguages(tesseract_->datadir, "", langs);
461  langs->sort(CompareSTRING);
462  }
463 }
STRING datadir
Definition: ccutil.h:69

◆ GetBlockTextOrientations()

void tesseract::TessBaseAPI::GetBlockTextOrientations ( int **  block_orientation,
bool **  vertical_writing 
)

Return text orientation of each block as determined by an earlier run of layout analysis.

Return text orientation of each block as determined in an earlier page layout analysis operation. Orientation is returned as the number of ccw 90-degree rotations (in [0..3]) required to make the text in the block upright (readable). Note that this may not necessary be the block orientation preferred for recognition (such as the case of vertical CJK text).

Also returns whether the text in the block is believed to have vertical writing direction (when in an upright page orientation).

The returned array is of length equal to the number of text blocks, which may be less than the total number of blocks. The ordering is intended to be consistent with GetTextLines().

Definition at line 2233 of file baseapi.cpp.

2234  {
2235  delete[] *block_orientation;
2236  *block_orientation = nullptr;
2237  delete[] *vertical_writing;
2238  *vertical_writing = nullptr;
2239  BLOCK_IT block_it(block_list_);
2240 
2241  block_it.move_to_first();
2242  int num_blocks = 0;
2243  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
2244  if (!block_it.data()->pdblk.poly_block()->IsText()) {
2245  continue;
2246  }
2247  ++num_blocks;
2248  }
2249  if (!num_blocks) {
2250  tprintf("WARNING: Found no blocks\n");
2251  return;
2252  }
2253  *block_orientation = new int[num_blocks];
2254  *vertical_writing = new bool[num_blocks];
2255  block_it.move_to_first();
2256  int i = 0;
2257  for (block_it.mark_cycle_pt(); !block_it.cycled_list();
2258  block_it.forward()) {
2259  if (!block_it.data()->pdblk.poly_block()->IsText()) {
2260  continue;
2261  }
2262  FCOORD re_rotation = block_it.data()->re_rotation();
2263  float re_theta = re_rotation.angle();
2264  FCOORD classify_rotation = block_it.data()->classify_rotation();
2265  float classify_theta = classify_rotation.angle();
2266  double rot_theta = - (re_theta - classify_theta) * 2.0 / M_PI;
2267  if (rot_theta < 0) rot_theta += 4;
2268  int num_rotations = static_cast<int>(rot_theta + 0.5);
2269  (*block_orientation)[i] = num_rotations;
2270  // The classify_rotation is non-zero only if the text has vertical
2271  // writing direction.
2272  (*vertical_writing)[i] = classify_rotation.y() != 0.0f;
2273  ++i;
2274  }
2275 }
Definition: points.h:189
float angle() const
find angle
Definition: points.h:247
float y() const
Definition: points.h:210

◆ GetBoolVariable()

bool tesseract::TessBaseAPI::GetBoolVariable ( const char *  name,
bool *  value 
) const

Definition at line 306 of file baseapi.cpp.

306  {
307  auto *p = ParamUtils::FindParam<BoolParam>(
308  name, GlobalParams()->bool_params, tesseract_->params()->bool_params);
309  if (p == nullptr) return false;
310  *value = bool(*p);
311  return true;
312 }
tesseract::ParamsVectors * GlobalParams()
Definition: params.cpp:32
ParamsVectors * params()
Definition: ccutil.h:67
GenericVector< BoolParam * > bool_params
Definition: params.h:44

◆ GetBoxText()

char * tesseract::TessBaseAPI::GetBoxText ( int  page_number)

The recognized text is returned as a char* which is coded in the same format as a box file used in training. Constructs coordinates in the original image - not just the rectangle. page_number is a 0-based page index that will appear in the box file. Returned string must be freed with the delete [] operator.

The recognized text is returned as a char* which is coded as a UTF8 box file. page_number is a 0-base page index that will appear in the box file. Returned string must be freed with the delete [] operator.

Definition at line 1520 of file baseapi.cpp.

1520  {
1521  if (tesseract_ == nullptr ||
1522  (!recognition_done_ && Recognize(nullptr) < 0))
1523  return nullptr;
1524  int blob_count;
1525  int utf8_length = TextLength(&blob_count);
1526  int total_length = blob_count * kBytesPerBoxFileLine + utf8_length +
1528  char* result = new char[total_length];
1529  result[0] = '\0';
1530  int output_length = 0;
1531  LTRResultIterator* it = GetLTRIterator();
1532  do {
1533  int left, top, right, bottom;
1534  if (it->BoundingBox(RIL_SYMBOL, &left, &top, &right, &bottom)) {
1535  const std::unique_ptr</*non-const*/ char[]> text(
1536  it->GetUTF8Text(RIL_SYMBOL));
1537  // Tesseract uses space for recognition failure. Fix to a reject
1538  // character, kTesseractReject so we don't create illegal box files.
1539  for (int i = 0; text[i] != '\0'; ++i) {
1540  if (text[i] == ' ')
1541  text[i] = kTesseractReject;
1542  }
1543  snprintf(result + output_length, total_length - output_length,
1544  "%s %d %d %d %d %d\n", text.get(), left, image_height_ - bottom,
1545  right, image_height_ - top, page_number);
1546  output_length += strlen(result + output_length);
1547  // Just in case...
1548  if (output_length + kMaxBytesPerLine > total_length)
1549  break;
1550  }
1551  } while (it->Next(RIL_SYMBOL));
1552  delete it;
1553  return result;
1554 }
const char kTesseractReject
Definition: baseapi.cpp:106
const int kBytesPerBoxFileLine
Definition: baseapi.cpp:1502
const int kMaxBytesPerLine
Definition: baseapi.cpp:1511
TESS_LOCAL LTRResultIterator * GetLTRIterator()
Definition: baseapi.cpp:1307
TESS_LOCAL int TextLength(int *blob_count)
Definition: baseapi.cpp:2169

◆ GetComponentImages() [1/2]

Boxa* tesseract::TessBaseAPI::GetComponentImages ( const PageIteratorLevel  level,
const bool  text_only,
Pixa **  pixa,
int **  blockids 
)
inline

Definition at line 450 of file baseapi.h.

452  {
453  return GetComponentImages(level, text_only, false, 0, pixa, blockids, nullptr);
454  }
Boxa * GetComponentImages(PageIteratorLevel level, bool text_only, bool raw_image, int raw_padding, Pixa **pixa, int **blockids, int **paraids)
Definition: baseapi.cpp:706

◆ GetComponentImages() [2/2]

Boxa * tesseract::TessBaseAPI::GetComponentImages ( PageIteratorLevel  level,
bool  text_only,
bool  raw_image,
int  raw_padding,
Pixa **  pixa,
int **  blockids,
int **  paraids 
)

Get the given level kind of components (block, textline, word etc.) as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize. If blockids is not nullptr, the block-id of each component is also returned as an array of one element per component. delete [] after use. If blockids is not nullptr, the paragraph-id of each component with its block is also returned as an array of one element per component. delete [] after use. If raw_image is true, then portions of the original image are extracted instead of the thresholded image and padded with raw_padding. If text_only is true, then only text components are returned.

Get the given level kind of components (block, textline, word etc.) as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize. If blockids is not nullptr, the block-id of each component is also returned as an array of one element per component. delete [] after use. If text_only is true, then only text components are returned.

Definition at line 706 of file baseapi.cpp.

710  {
711  PageIterator* page_it = GetIterator();
712  if (page_it == nullptr)
713  page_it = AnalyseLayout();
714  if (page_it == nullptr)
715  return nullptr; // Failed.
716 
717  // Count the components to get a size for the arrays.
718  int component_count = 0;
719  int left, top, right, bottom;
720 
721  TessResultCallback<bool>* get_bbox = nullptr;
722  if (raw_image) {
723  // Get bounding box in original raw image with padding.
725  level, raw_padding,
726  &left, &top, &right, &bottom);
727  } else {
728  // Get bounding box from binarized imaged. Note that this could be
729  // differently scaled from the original image.
730  get_bbox = NewPermanentTessCallback(page_it,
732  level, &left, &top, &right, &bottom);
733  }
734  do {
735  if (get_bbox->Run() &&
736  (!text_only || PTIsTextType(page_it->BlockType())))
737  ++component_count;
738  } while (page_it->Next(level));
739 
740  Boxa* boxa = boxaCreate(component_count);
741  if (pixa != nullptr)
742  *pixa = pixaCreate(component_count);
743  if (blockids != nullptr)
744  *blockids = new int[component_count];
745  if (paraids != nullptr)
746  *paraids = new int[component_count];
747 
748  int blockid = 0;
749  int paraid = 0;
750  int component_index = 0;
751  page_it->Begin();
752  do {
753  if (get_bbox->Run() &&
754  (!text_only || PTIsTextType(page_it->BlockType()))) {
755  Box* lbox = boxCreate(left, top, right - left, bottom - top);
756  boxaAddBox(boxa, lbox, L_INSERT);
757  if (pixa != nullptr) {
758  Pix* pix = nullptr;
759  if (raw_image) {
760  pix = page_it->GetImage(level, raw_padding, GetInputImage(), &left,
761  &top);
762  } else {
763  pix = page_it->GetBinaryImage(level);
764  }
765  pixaAddPix(*pixa, pix, L_INSERT);
766  pixaAddBox(*pixa, lbox, L_CLONE);
767  }
768  if (paraids != nullptr) {
769  (*paraids)[component_index] = paraid;
770  if (page_it->IsAtFinalElement(RIL_PARA, level))
771  ++paraid;
772  }
773  if (blockids != nullptr) {
774  (*blockids)[component_index] = blockid;
775  if (page_it->IsAtFinalElement(RIL_BLOCK, level)) {
776  ++blockid;
777  paraid = 0;
778  }
779  }
780  ++component_index;
781  }
782  } while (page_it->Next(level));
783  delete page_it;
784  delete get_bbox;
785  return boxa;
786 }
bool PTIsTextType(PolyBlockType type)
Definition: publictypes.h:82
_ConstTessMemberResultCallback_5_0< false, R, T1, P1, P2, P3, P4, P5 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)(P1, P2, P3, P4, P5) const, typename Identity< P1 >::type p1, typename Identity< P2 >::type p2, typename Identity< P3 >::type p3, typename Identity< P4 >::type p4, typename Identity< P5 >::type p5)
Definition: tesscallback.h:258
bool BoundingBoxInternal(PageIteratorLevel level, int *left, int *top, int *right, int *bottom) const
bool BoundingBox(PageIteratorLevel level, int *left, int *top, int *right, int *bottom) const
virtual R Run()=0

◆ GetConnectedComponents()

Boxa * tesseract::TessBaseAPI::GetConnectedComponents ( Pixa **  pixa)

Gets the individual connected (text) components (created after pages segmentation step, but before recognition) as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize. Note: the caller is responsible for calling boxaDestroy() on the returned Boxa array and pixaDestroy() on cc array.

Gets the individual connected (text) components (created after pages segmentation step, but before recognition) as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize.

Definition at line 694 of file baseapi.cpp.

694  {
695  return GetComponentImages(RIL_SYMBOL, true, pixa, nullptr);
696 }

◆ GetDatapath()

const char * tesseract::TessBaseAPI::GetDatapath ( )

Definition at line 966 of file baseapi.cpp.

966  {
967  return tesseract_->datadir.c_str();
968 }

◆ GetDawg()

const Dawg * tesseract::TessBaseAPI::GetDawg ( int  i) const

Return the pointer to the i-th dawg loaded into tesseract_ object.

Definition at line 2299 of file baseapi.cpp.

2299  {
2300  if (tesseract_ == nullptr || i >= NumDawgs()) return nullptr;
2301  return tesseract_->getDict().GetDawg(i);
2302 }
int NumDawgs() const
Definition: baseapi.cpp:2305
Dict & getDict() override
const Dawg * GetDawg(int index) const
Return i-th dawg pointer recorded in the dawgs_ vector.
Definition: dict.h:432

◆ GetDoubleVariable()

bool tesseract::TessBaseAPI::GetDoubleVariable ( const char *  name,
double *  value 
) const

Definition at line 320 of file baseapi.cpp.

320  {
321  auto *p = ParamUtils::FindParam<DoubleParam>(
322  name, GlobalParams()->double_params, tesseract_->params()->double_params);
323  if (p == nullptr) return false;
324  *value = (double)(*p);
325  return true;
326 }
GenericVector< DoubleParam * > double_params
Definition: params.h:46

◆ GetFeaturesForBlob()

void tesseract::TessBaseAPI::GetFeaturesForBlob ( TBLOB blob,
INT_FEATURE_STRUCT int_features,
int *  num_features,
int *  feature_outline_index 
)

This method returns the features associated with the input image.

This method returns the features associated with the input blob.

Definition at line 2607 of file baseapi.cpp.

2610  {
2611  GenericVector<int> outline_counts;
2614  INT_FX_RESULT_STRUCT fx_info;
2615  tesseract_->ExtractFeatures(*blob, false, &bl_features,
2616  &cn_features, &fx_info, &outline_counts);
2617  if (cn_features.empty() || cn_features.size() > MAX_NUM_INT_FEATURES) {
2618  *num_features = 0;
2619  return; // Feature extraction failed.
2620  }
2621  *num_features = cn_features.size();
2622  memcpy(int_features, &cn_features[0], *num_features * sizeof(cn_features[0]));
2623  // TODO(rays) Pass outline_counts back and simplify the calling code.
2624  if (feature_outline_index != nullptr) {
2625  int f = 0;
2626  for (int i = 0; i < outline_counts.size(); ++i) {
2627  while (f < outline_counts[i])
2628  feature_outline_index[f++] = i;
2629  }
2630  }
2631 }
#define MAX_NUM_INT_FEATURES
Definition: intproto.h:129
bool empty() const
Definition: genericvector.h:91
int size() const
Definition: genericvector.h:72
static void ExtractFeatures(const TBLOB &blob, bool nonlinear_norm, GenericVector< INT_FEATURE_STRUCT > *bl_features, GenericVector< INT_FEATURE_STRUCT > *cn_features, INT_FX_RESULT_STRUCT *results, GenericVector< int > *outline_cn_counts)
Definition: intfx.cpp:442

◆ GetHOCRText() [1/2]

char * tesseract::TessBaseAPI::GetHOCRText ( ETEXT_DESC monitor,
int  page_number 
)

Make a HTML-formatted string with hOCR markup from the internal data structures. page_number is 0-based but will appear in the output as 1-based. monitor can be used to cancel the recognition receive progress callbacks Returned string must be freed with the delete [] operator.

Make a HTML-formatted string with hOCR markup from the internal data structures. page_number is 0-based but will appear in the output as 1-based. Image name/input_file_ can be set by SetInputName before calling GetHOCRText STL removed from original patch submission and refactored by rays. Returned string must be freed with the delete [] operator.

Definition at line 132 of file hocrrenderer.cpp.

132  {
133  if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(monitor) < 0))
134  return nullptr;
135 
136  int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1, scnt = 1, tcnt = 1, gcnt = 1;
137  int page_id = page_number + 1; // hOCR uses 1-based page numbers.
138  bool para_is_ltr = true; // Default direction is LTR
139  const char* paragraph_lang = nullptr;
140  bool font_info = false;
141  bool hocr_boxes = false;
142  GetBoolVariable("hocr_font_info", &font_info);
143  GetBoolVariable("hocr_char_boxes", &hocr_boxes);
144 
145  if (input_file_ == nullptr) SetInputName(nullptr);
146 
147 #ifdef _WIN32
148  // convert input name from ANSI encoding to utf-8
149  int str16_len =
150  MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1, nullptr, 0);
151  wchar_t* uni16_str = new WCHAR[str16_len];
152  str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1,
153  uni16_str, str16_len);
154  int utf8_len = WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, nullptr,
155  0, nullptr, nullptr);
156  char* utf8_str = new char[utf8_len];
157  WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, utf8_str, utf8_len,
158  nullptr, nullptr);
159  *input_file_ = utf8_str;
160  delete[] uni16_str;
161  delete[] utf8_str;
162 #endif
163 
164  std::stringstream hocr_str;
165  // Use "C" locale (needed for double values x_size and x_descenders).
166  hocr_str.imbue(std::locale::classic());
167  // Use 8 digits for double values.
168  hocr_str.precision(8);
169  hocr_str << " <div class='ocr_page'";
170  hocr_str << " id='"
171  << "page_" << page_id << "'";
172  hocr_str << " title='image \"";
173  if (input_file_) {
174  hocr_str << HOcrEscape(input_file_->string()).c_str();
175  } else {
176  hocr_str << "unknown";
177  }
178  hocr_str << "\"; bbox " << rect_left_ << " " << rect_top_ << " "
179  << rect_width_ << " " << rect_height_ << "; ppageno " << page_number
180  << "'>\n";
181 
182  std::unique_ptr<ResultIterator> res_it(GetIterator());
183  while (!res_it->Empty(RIL_BLOCK)) {
184  if (res_it->Empty(RIL_WORD)) {
185  res_it->Next(RIL_WORD);
186  continue;
187  }
188 
189  // Open any new block/paragraph/textline.
190  if (res_it->IsAtBeginningOf(RIL_BLOCK)) {
191  para_is_ltr = true; // reset to default direction
192  hocr_str << " <div class='ocr_carea'"
193  << " id='"
194  << "block_" << page_id << "_" << bcnt << "'";
195  AddBoxTohOCR(res_it.get(), RIL_BLOCK, hocr_str);
196  }
197  if (res_it->IsAtBeginningOf(RIL_PARA)) {
198  hocr_str << "\n <p class='ocr_par'";
199  para_is_ltr = res_it->ParagraphIsLtr();
200  if (!para_is_ltr) {
201  hocr_str << " dir='rtl'";
202  }
203  hocr_str << " id='"
204  << "par_" << page_id << "_" << pcnt << "'";
205  paragraph_lang = res_it->WordRecognitionLanguage();
206  if (paragraph_lang) {
207  hocr_str << " lang='" << paragraph_lang << "'";
208  }
209  AddBoxTohOCR(res_it.get(), RIL_PARA, hocr_str);
210  }
211  if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
212  hocr_str << "\n <span class='";
213  switch (res_it->BlockType()) {
214  case PT_HEADING_TEXT:
215  hocr_str << "ocr_header";
216  break;
217  case PT_PULLOUT_TEXT:
218  hocr_str << "ocr_textfloat";
219  break;
220  case PT_CAPTION_TEXT:
221  hocr_str << "ocr_caption";
222  break;
223  default:
224  hocr_str << "ocr_line";
225  }
226  hocr_str << "' id='"
227  << "line_" << page_id << "_" << lcnt << "'";
228  AddBoxTohOCR(res_it.get(), RIL_TEXTLINE, hocr_str);
229  }
230 
231  // Now, process the word...
232  std::vector<std::vector<std::pair<const char*, float>>>* choiceMap =
233  nullptr;
235 
236  choiceMap = res_it->GetBestLSTMSymbolChoices();
237  }
238  hocr_str << "\n <span class='ocrx_word'"
239  << " id='"
240  << "word_" << page_id << "_" << wcnt << "'";
241  int left, top, right, bottom;
242  bool bold, italic, underlined, monospace, serif, smallcaps;
243  int pointsize, font_id;
244  const char* font_name;
245  res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom);
246  font_name =
247  res_it->WordFontAttributes(&bold, &italic, &underlined, &monospace,
248  &serif, &smallcaps, &pointsize, &font_id);
249  hocr_str << " title='bbox " << left << " " << top << " " << right << " "
250  << bottom << "; x_wconf "
251  << static_cast<int>(res_it->Confidence(RIL_WORD));
252  if (font_info) {
253  if (font_name) {
254  hocr_str << "; x_font " << HOcrEscape(font_name).c_str();
255  }
256  hocr_str << "; x_fsize " << pointsize;
257  }
258  hocr_str << "'";
259  const char* lang = res_it->WordRecognitionLanguage();
260  if (lang && (!paragraph_lang || strcmp(lang, paragraph_lang))) {
261  hocr_str << " lang='" << lang << "'";
262  }
263  switch (res_it->WordDirection()) {
264  // Only emit direction if different from current paragraph direction
265  case DIR_LEFT_TO_RIGHT:
266  if (!para_is_ltr) hocr_str << " dir='ltr'";
267  break;
268  case DIR_RIGHT_TO_LEFT:
269  if (para_is_ltr) hocr_str << " dir='rtl'";
270  break;
271  case DIR_MIX:
272  case DIR_NEUTRAL:
273  default: // Do nothing.
274  break;
275  }
276  hocr_str << ">";
277  bool last_word_in_line = res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD);
278  bool last_word_in_para = res_it->IsAtFinalElement(RIL_PARA, RIL_WORD);
279  bool last_word_in_block = res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD);
280  if (bold) hocr_str << "<strong>";
281  if (italic) hocr_str << "<em>";
282  do {
283  const std::unique_ptr<const char[]> grapheme(
284  res_it->GetUTF8Text(RIL_SYMBOL));
285  if (grapheme && grapheme[0] != 0) {
286  if (hocr_boxes) {
287  res_it->BoundingBox(RIL_SYMBOL, &left, &top, &right, &bottom);
288  hocr_str << "\n <span class='ocrx_cinfo' title='x_bboxes "
289  << left << " " << top << " " << right << " " << bottom
290  << "; x_conf " << res_it->Confidence(RIL_SYMBOL) << "'>";
291  }
292  hocr_str << HOcrEscape(grapheme.get()).c_str();
293  if (hocr_boxes) {
294  hocr_str << "</span>";
295  }
296  }
297  res_it->Next(RIL_SYMBOL);
298  } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
299  if (italic) hocr_str << "</em>";
300  if (bold) hocr_str << "</strong>";
301  // If the lstm choice mode is required it is added here
302  if (tesseract_->lstm_choice_mode == 1 && choiceMap != nullptr) {
303  for (auto timestep : *choiceMap) {
304  hocr_str << "\n <span class='ocrx_cinfo'"
305  << " id='"
306  << "timestep_" << page_id << "_" << wcnt << "_" << tcnt << "'"
307  << ">";
308  for (std::pair<const char*, float> conf : timestep) {
309  hocr_str << "<span class='ocr_glyph'"
310  << " id='"
311  << "choice_" << page_id << "_" << wcnt << "_" << gcnt << "'"
312  << " title='x_confs " << int(conf.second * 100) << "'>"
313  << conf.first << "</span>";
314  gcnt++;
315  }
316  hocr_str << "</span>";
317  tcnt++;
318  }
319  } else if (tesseract_->lstm_choice_mode == 2 && choiceMap != nullptr) {
320  for (auto timestep : *choiceMap) {
321  if (timestep.size() > 0) {
322  hocr_str << "\n <span class='ocrx_cinfo'"
323  << " id='"
324  << "lstm_choices_" << page_id << "_" << wcnt << "_" << tcnt
325  << "'>";
326  for (auto & j : timestep) {
327  hocr_str << "<span class='ocr_glyph'"
328  << " id='"
329  << "choice_" << page_id << "_" << wcnt << "_" << gcnt
330  << "'"
331  << " title='x_confs " << int(j.second * 100)
332  << "'>" << j.first << "</span>";
333  gcnt++;
334  }
335  hocr_str << "</span>";
336  tcnt++;
337  }
338  }
339  }
340  // Close ocrx_word.
341  if (hocr_boxes || tesseract_->lstm_choice_mode > 0) {
342  hocr_str << "\n ";
343  }
344  hocr_str << "</span>";
345  tcnt = 1;
346  gcnt = 1;
347  wcnt++;
348  // Close any ending block/paragraph/textline.
349  if (last_word_in_line) {
350  hocr_str << "\n </span>";
351  lcnt++;
352  }
353  if (last_word_in_para) {
354  hocr_str << "\n </p>\n";
355  pcnt++;
356  para_is_ltr = true; // back to default direction
357  }
358  if (last_word_in_block) {
359  hocr_str << " </div>\n";
360  bcnt++;
361  }
362  }
363  hocr_str << " </div>\n";
364 
365  const std::string& text = hocr_str.str();
366  char* result = new char[text.length() + 1];
367  strcpy(result, text.c_str());
368  return result;
369 }
@ PT_PULLOUT_TEXT
Definition: capi.h:132
@ PT_HEADING_TEXT
Definition: capi.h:131
@ PT_CAPTION_TEXT
Definition: capi.h:137
@ DIR_MIX
Definition: unichar.h:45
@ DIR_RIGHT_TO_LEFT
Definition: unichar.h:44
@ DIR_LEFT_TO_RIGHT
Definition: unichar.h:43
@ DIR_NEUTRAL
Definition: unichar.h:42
bool GetBoolVariable(const char *name, bool *value) const
Definition: baseapi.cpp:306

◆ GetHOCRText() [2/2]

char * tesseract::TessBaseAPI::GetHOCRText ( int  page_number)

Make a HTML-formatted string with hOCR markup from the internal data structures. page_number is 0-based but will appear in the output as 1-based. Returned string must be freed with the delete [] operator.

Make a HTML-formatted string with hOCR markup from the internal data structures. page_number is 0-based but will appear in the output as 1-based. Image name/input_file_ can be set by SetInputName before calling GetHOCRText STL removed from original patch submission and refactored by rays. Returned string must be freed with the delete [] operator.

Definition at line 119 of file hocrrenderer.cpp.

119  {
120  return GetHOCRText(nullptr, page_number);
121 }
char * GetHOCRText(ETEXT_DESC *monitor, int page_number)

◆ GetInitLanguagesAsString()

const char * tesseract::TessBaseAPI::GetInitLanguagesAsString ( ) const

Returns the languages string used in the last valid initialization. If the last initialization specified "deu+hin" then that will be returned. If hin loaded eng automatically as well, then that will not be included in this list. To find the languages actually loaded use GetLoadedLanguagesAsVector. The returned string should NOT be deleted.

Definition at line 432 of file baseapi.cpp.

432  {
433  return (language_ == nullptr || language_->string() == nullptr) ?
434  "" : language_->string();
435 }

◆ GetInputImage()

Pix * tesseract::TessBaseAPI::GetInputImage ( )

Definition at line 958 of file baseapi.cpp.

958 { return tesseract_->pix_original(); }
Pix * pix_original() const

◆ GetInputName()

const char * tesseract::TessBaseAPI::GetInputName ( )

These functions are required for searchable PDF output. We need our hands on the input file so that we can include it in the PDF without transcoding. If that is not possible, we need the original image. Finally, resolution metadata is stored in the PDF so we need that as well.

Definition at line 960 of file baseapi.cpp.

960  {
961  if (input_file_)
962  return input_file_->c_str();
963  return nullptr;
964 }

◆ GetIntVariable()

bool tesseract::TessBaseAPI::GetIntVariable ( const char *  name,
int *  value 
) const

Returns true if the parameter was found among Tesseract parameters. Fills in value with the value of the parameter.

Definition at line 298 of file baseapi.cpp.

298  {
299  auto *p = ParamUtils::FindParam<IntParam>(
300  name, GlobalParams()->int_params, tesseract_->params()->int_params);
301  if (p == nullptr) return false;
302  *value = (int32_t)(*p);
303  return true;
304 }
GenericVector< IntParam * > int_params
Definition: params.h:43

◆ GetIterator()

ResultIterator * tesseract::TessBaseAPI::GetIterator ( )

Get a reading-order iterator to the results of LayoutAnalysis and/or Recognize. The returned iterator must be deleted after use. WARNING! This class points to data held within the TessBaseAPI class, and therefore can only be used while the TessBaseAPI class still exists and has not been subjected to a call of Init, SetImage, Recognize, Clear, End DetectOS, or anything else that changes the internal PAGE_RES.

Definition at line 1324 of file baseapi.cpp.

1324  {
1325  if (tesseract_ == nullptr || page_res_ == nullptr)
1326  return nullptr;
1327  return ResultIterator::StartOfParagraph(LTRResultIterator(
1331 }
static ResultIterator * StartOfParagraph(const LTRResultIterator &resit)

◆ GetLoadedLanguagesAsVector()

void tesseract::TessBaseAPI::GetLoadedLanguagesAsVector ( GenericVector< STRING > *  langs) const

Returns the loaded languages in the vector of STRINGs. Includes all languages loaded by the last Init, including those loaded as dependencies of other loaded languages.

Definition at line 442 of file baseapi.cpp.

443  {
444  langs->clear();
445  if (tesseract_ != nullptr) {
446  langs->push_back(tesseract_->lang);
447  int num_subs = tesseract_->num_sub_langs();
448  for (int i = 0; i < num_subs; ++i)
449  langs->push_back(tesseract_->get_sub_lang(i)->lang);
450  }
451 }
int push_back(T object)
int num_sub_langs() const
Tesseract * get_sub_lang(int index) const
STRING lang
Definition: ccutil.h:71

◆ GetLSTMBoxText()

char * tesseract::TessBaseAPI::GetLSTMBoxText ( int  page_number = 0)

Make a box file for LSTM training from the internal data structures. Constructs coordinates in the original image - not just the rectangle. page_number is a 0-based page index that will appear in the box file. Returned string must be freed with the delete [] operator.

Definition at line 38 of file lstmboxrenderer.cpp.

38  {
39  if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(nullptr) < 0))
40  return nullptr;
41 
42  STRING lstm_box_str("");
43  bool first_word = true;
44  int left = 0, top = 0, right = 0, bottom = 0;
45 
46  LTRResultIterator* res_it = GetLTRIterator();
47  while (!res_it->Empty(RIL_BLOCK)) {
48  if (res_it->Empty(RIL_SYMBOL)) {
49  res_it->Next(RIL_SYMBOL);
50  continue;
51  }
52  if (!first_word) {
53  if (!(res_it->IsAtBeginningOf(RIL_TEXTLINE))) {
54  if (res_it->IsAtBeginningOf(RIL_WORD)) {
55  lstm_box_str.add_str_int(" ", left);
56  AddBoxToLSTM(right, bottom, top, image_height_, page_number,
57  &lstm_box_str);
58  lstm_box_str += "\n"; // end of row for word
59  } // word
60  } else {
61  if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
62  lstm_box_str.add_str_int("\t ", left);
63  AddBoxToLSTM(right, bottom, top, image_height_, page_number,
64  &lstm_box_str);
65  lstm_box_str += "\n"; // end of row for line
66  } // line
67  }
68  } // not first word
69  first_word = false;
70  // Use bounding box for whole line for everything
71  res_it->BoundingBox(RIL_TEXTLINE, &left, &top, &right, &bottom);
72  do {
73  lstm_box_str +=
74  std::unique_ptr<const char[]>(res_it->GetUTF8Text(RIL_SYMBOL)).get();
75  res_it->Next(RIL_SYMBOL);
76  } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_SYMBOL));
77  lstm_box_str.add_str_int(" ", left);
78  AddBoxToLSTM(right, bottom, top, image_height_, page_number, &lstm_box_str);
79  lstm_box_str += "\n"; // end of row for symbol
80  }
81  if (!first_word) { // if first_word is true => empty page
82  lstm_box_str.add_str_int("\t ", left);
83  AddBoxToLSTM(right, bottom, top, image_height_, page_number, &lstm_box_str);
84  lstm_box_str += "\n"; // end of PAGE
85  }
86  char* ret = new char[lstm_box_str.length() + 1];
87  strcpy(ret, lstm_box_str.string());
88  delete res_it;
89  return ret;
90 }

◆ GetLTRIterator()

LTRResultIterator * tesseract::TessBaseAPI::GetLTRIterator ( )
protected

Return an LTR Result Iterator – used only for training, as we really want to ignore all BiDi smarts at that point. delete once you're done with it.

Get a left-to-right iterator to the results of LayoutAnalysis and/or Recognize. The returned iterator must be deleted after use.

Definition at line 1307 of file baseapi.cpp.

1307  {
1308  if (tesseract_ == nullptr || page_res_ == nullptr)
1309  return nullptr;
1310  return new LTRResultIterator(
1314 }

◆ GetMutableIterator()

MutableIterator * tesseract::TessBaseAPI::GetMutableIterator ( )

Get a mutable iterator to the results of LayoutAnalysis and/or Recognize. The returned iterator must be deleted after use. WARNING! This class points to data held within the TessBaseAPI class, and therefore can only be used while the TessBaseAPI class still exists and has not been subjected to a call of Init, SetImage, Recognize, Clear, End DetectOS, or anything else that changes the internal PAGE_RES.

Definition at line 1341 of file baseapi.cpp.

1341  {
1342  if (tesseract_ == nullptr || page_res_ == nullptr)
1343  return nullptr;
1344  return new MutableIterator(page_res_, tesseract_,
1348 }

◆ getOpenCLDevice()

size_t tesseract::TessBaseAPI::getOpenCLDevice ( void **  data)
static

If compiled with OpenCL AND an available OpenCL device is deemed faster than serial code, then "device" is populated with the cl_device_id and returns sizeof(cl_device_id) otherwise *device=nullptr and returns 0.

Definition at line 244 of file baseapi.cpp.

244  {
245 #ifdef USE_OPENCL
246  ds_device device = OpenclDevice::getDeviceSelection();
247  if (device.type == DS_DEVICE_OPENCL_DEVICE) {
248  *data = new cl_device_id;
249  memcpy(*data, &device.oclDeviceID, sizeof(cl_device_id));
250  return sizeof(cl_device_id);
251  }
252 #endif
253 
254  *data = nullptr;
255  return 0;
256 }

◆ GetOsdText()

char * tesseract::TessBaseAPI::GetOsdText ( int  page_number)

The recognized text is returned as a char* which is coded as UTF8 and must be freed with the delete [] operator. page_number is a 0-based page index that will appear in the osd file.

Definition at line 1717 of file baseapi.cpp.

1717  {
1718  int orient_deg;
1719  float orient_conf;
1720  const char* script_name;
1721  float script_conf;
1722 
1723  if (!DetectOrientationScript(&orient_deg, &orient_conf, &script_name,
1724  &script_conf))
1725  return nullptr;
1726 
1727  // clockwise rotation needed to make the page upright
1728  int rotate = OrientationIdToValue(orient_deg / 90);
1729 
1730  std::stringstream stream;
1731  // Use "C" locale (needed for float values orient_conf and script_conf).
1732  stream.imbue(std::locale::classic());
1733  // Use fixed notation with 2 digits after the decimal point for float values.
1734  stream.precision(2);
1735  stream
1736  << std::fixed
1737  << "Page number: " << page_number << "\n"
1738  << "Orientation in degrees: " << orient_deg << "\n"
1739  << "Rotate: " << rotate << "\n"
1740  << "Orientation confidence: " << orient_conf << "\n"
1741  << "Script: " << script_name << "\n"
1742  << "Script confidence: " << script_conf << "\n";
1743  const std::string& text = stream.str();
1744  char* result = new char[text.length() + 1];
1745  strcpy(result, text.c_str());
1746  return result;
1747 }
int OrientationIdToValue(const int &id)
Definition: osdetect.cpp:566
bool DetectOrientationScript(int *orient_deg, float *orient_conf, const char **script_name, float *script_conf)
Definition: baseapi.cpp:1686

◆ GetPageRes()

TESS_LOCAL const PAGE_RES* tesseract::TessBaseAPI::GetPageRes ( ) const
inlineprotected

Definition at line 883 of file baseapi.h.

883 { return page_res_; }

◆ GetPageSegMode()

PageSegMode tesseract::TessBaseAPI::GetPageSegMode ( ) const

Return the current page segmentation mode.

Definition at line 522 of file baseapi.cpp.

522  {
523  if (tesseract_ == nullptr)
524  return PSM_SINGLE_BLOCK;
525  return static_cast<PageSegMode>(
526  static_cast<int>(tesseract_->tessedit_pageseg_mode));
527 }
@ PSM_SINGLE_BLOCK
Assume a single uniform block of text. (Default.)
Definition: publictypes.h:172

◆ GetRegions()

Boxa * tesseract::TessBaseAPI::GetRegions ( Pixa **  pixa)

Get the result of page layout analysis as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize.

Definition at line 649 of file baseapi.cpp.

649  {
650  return GetComponentImages(RIL_BLOCK, false, pixa, nullptr);
651 }

◆ GetSourceYResolution()

int tesseract::TessBaseAPI::GetSourceYResolution ( )

Definition at line 970 of file baseapi.cpp.

970  {
972 }

◆ GetStringVariable()

const char * tesseract::TessBaseAPI::GetStringVariable ( const char *  name) const

Returns the pointer to the string that represents the value of the parameter if it was found among Tesseract parameters.

Definition at line 314 of file baseapi.cpp.

314  {
315  auto *p = ParamUtils::FindParam<StringParam>(
316  name, GlobalParams()->string_params, tesseract_->params()->string_params);
317  return (p != nullptr) ? p->string() : nullptr;
318 }
GenericVector< StringParam * > string_params
Definition: params.h:45

◆ GetStrips()

Boxa * tesseract::TessBaseAPI::GetStrips ( Pixa **  pixa,
int **  blockids 
)

Get textlines and strips of image regions as a leptonica-style Boxa, Pixa pair, in reading order. Enables downstream handling of non-rectangular regions. Can be called before or after Recognize. If blockids is not nullptr, the block-id of each line is also returned as an array of one element per line. delete [] after use.

Definition at line 675 of file baseapi.cpp.

675  {
676  return GetComponentImages(RIL_TEXTLINE, false, pixa, blockids);
677 }

◆ GetTextDirection()

bool tesseract::TessBaseAPI::GetTextDirection ( int *  out_offset,
float *  out_slope 
)

Definition at line 1929 of file baseapi.cpp.

1929  {
1930  PageIterator* it = AnalyseLayout();
1931  if (it == nullptr) {
1932  return false;
1933  }
1934  int x1, x2, y1, y2;
1935  it->Baseline(RIL_TEXTLINE, &x1, &y1, &x2, &y2);
1936  // Calculate offset and slope (NOTE: Kind of ugly)
1937  if (x2 <= x1) x2 = x1 + 1;
1938  // Convert the point pair to slope/offset of the baseline (in image coords.)
1939  *out_slope = static_cast<float>(y2 - y1) / (x2 - x1);
1940  *out_offset = static_cast<int>(y1 - *out_slope * x1);
1941  // Get the y-coord of the baseline at the left and right edges of the
1942  // textline's bounding box.
1943  int left, top, right, bottom;
1944  if (!it->BoundingBox(RIL_TEXTLINE, &left, &top, &right, &bottom)) {
1945  delete it;
1946  return false;
1947  }
1948  int left_y = IntCastRounded(*out_slope * left + *out_offset);
1949  int right_y = IntCastRounded(*out_slope * right + *out_offset);
1950  // Shift the baseline down so it passes through the nearest bottom-corner
1951  // of the textline's bounding box. This is the difference between the y
1952  // at the lowest (max) edge of the box and the actual box bottom.
1953  *out_offset += bottom - std::max(left_y, right_y);
1954  // Switch back to bottom-up tesseract coordinates. Requires negation of
1955  // the slope and height - offset for the offset.
1956  *out_slope = -*out_slope;
1957  *out_offset = rect_height_ - *out_offset;
1958  delete it;
1959 
1960  return true;
1961 }
int IntCastRounded(double x)
Definition: helpers.h:175

◆ GetTextlines() [1/2]

Boxa * tesseract::TessBaseAPI::GetTextlines ( bool  raw_image,
int  raw_padding,
Pixa **  pixa,
int **  blockids,
int **  paraids 
)

Get the textlines as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize. If raw_image is true, then extract from the original image instead of the thresholded image and pad by raw_padding pixels. If blockids is not nullptr, the block-id of each line is also returned as an array of one element per line. delete [] after use. If paraids is not nullptr, the paragraph-id of each line within its block is also returned as an array of one element per line. delete [] after use.

Get the textlines as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize. If blockids is not nullptr, the block-id of each line is also returned as an array of one element per line. delete [] after use. If paraids is not nullptr, the paragraph-id of each line within its block is also returned as an array of one element per line. delete [] after use.

Definition at line 661 of file baseapi.cpp.

662  {
663  return GetComponentImages(RIL_TEXTLINE, true, raw_image, raw_padding,
664  pixa, blockids, paraids);
665 }

◆ GetTextlines() [2/2]

Boxa* tesseract::TessBaseAPI::GetTextlines ( Pixa **  pixa,
int **  blockids 
)
inline

Definition at line 401 of file baseapi.h.

401  {
402  return GetTextlines(false, 0, pixa, blockids, nullptr);
403  }
Boxa * GetTextlines(bool raw_image, int raw_padding, Pixa **pixa, int **blockids, int **paraids)
Definition: baseapi.cpp:661

◆ GetThresholdedImage()

Pix * tesseract::TessBaseAPI::GetThresholdedImage ( )

Get a copy of the internal thresholded image from Tesseract. Caller takes ownership of the Pix and must pixDestroy it. May be called any time after SetImage, or after TesseractRect.

ONLY available after SetImage if you have Leptonica installed. Get a copy of the internal thresholded image from Tesseract.

Definition at line 635 of file baseapi.cpp.

635  {
636  if (tesseract_ == nullptr || thresholder_ == nullptr) return nullptr;
637  if (tesseract_->pix_binary() == nullptr &&
639  return nullptr;
640  }
641  return pixClone(tesseract_->pix_binary());
642 }

◆ GetThresholdedImageScaleFactor()

int tesseract::TessBaseAPI::GetThresholdedImageScaleFactor ( ) const

Returns the scale factor of the thresholded image that would be returned by GetThresholdedImage() and the various GetX() methods that call GetComponentImages(). Returns 0 if no thresholder has been set.

Definition at line 788 of file baseapi.cpp.

788  {
789  if (thresholder_ == nullptr) {
790  return 0;
791  }
792  return thresholder_->GetScaleFactor();
793 }

◆ GetTSVText()

char * tesseract::TessBaseAPI::GetTSVText ( int  page_number)

Make a TSV-formatted string from the internal data structures. page_number is 0-based but will appear in the output as 1-based. Returned string must be freed with the delete [] operator.

Definition at line 1383 of file baseapi.cpp.

1383  {
1384  if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(nullptr) < 0))
1385  return nullptr;
1386 
1387  int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1;
1388  int page_id = page_number + 1; // we use 1-based page numbers.
1389 
1390  STRING tsv_str("");
1391 
1392  int page_num = page_id;
1393  int block_num = 0;
1394  int par_num = 0;
1395  int line_num = 0;
1396  int word_num = 0;
1397 
1398  tsv_str.add_str_int("1\t", page_num); // level 1 - page
1399  tsv_str.add_str_int("\t", block_num);
1400  tsv_str.add_str_int("\t", par_num);
1401  tsv_str.add_str_int("\t", line_num);
1402  tsv_str.add_str_int("\t", word_num);
1403  tsv_str.add_str_int("\t", rect_left_);
1404  tsv_str.add_str_int("\t", rect_top_);
1405  tsv_str.add_str_int("\t", rect_width_);
1406  tsv_str.add_str_int("\t", rect_height_);
1407  tsv_str += "\t-1\t\n";
1408 
1409  ResultIterator* res_it = GetIterator();
1410  while (!res_it->Empty(RIL_BLOCK)) {
1411  if (res_it->Empty(RIL_WORD)) {
1412  res_it->Next(RIL_WORD);
1413  continue;
1414  }
1415 
1416  // Add rows for any new block/paragraph/textline.
1417  if (res_it->IsAtBeginningOf(RIL_BLOCK)) {
1418  block_num++;
1419  par_num = 0;
1420  line_num = 0;
1421  word_num = 0;
1422  tsv_str.add_str_int("2\t", page_num); // level 2 - block
1423  tsv_str.add_str_int("\t", block_num);
1424  tsv_str.add_str_int("\t", par_num);
1425  tsv_str.add_str_int("\t", line_num);
1426  tsv_str.add_str_int("\t", word_num);
1427  AddBoxToTSV(res_it, RIL_BLOCK, &tsv_str);
1428  tsv_str += "\t-1\t\n"; // end of row for block
1429  }
1430  if (res_it->IsAtBeginningOf(RIL_PARA)) {
1431  par_num++;
1432  line_num = 0;
1433  word_num = 0;
1434  tsv_str.add_str_int("3\t", page_num); // level 3 - paragraph
1435  tsv_str.add_str_int("\t", block_num);
1436  tsv_str.add_str_int("\t", par_num);
1437  tsv_str.add_str_int("\t", line_num);
1438  tsv_str.add_str_int("\t", word_num);
1439  AddBoxToTSV(res_it, RIL_PARA, &tsv_str);
1440  tsv_str += "\t-1\t\n"; // end of row for para
1441  }
1442  if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
1443  line_num++;
1444  word_num = 0;
1445  tsv_str.add_str_int("4\t", page_num); // level 4 - line
1446  tsv_str.add_str_int("\t", block_num);
1447  tsv_str.add_str_int("\t", par_num);
1448  tsv_str.add_str_int("\t", line_num);
1449  tsv_str.add_str_int("\t", word_num);
1450  AddBoxToTSV(res_it, RIL_TEXTLINE, &tsv_str);
1451  tsv_str += "\t-1\t\n"; // end of row for line
1452  }
1453 
1454  // Now, process the word...
1455  int left, top, right, bottom;
1456  res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom);
1457  word_num++;
1458  tsv_str.add_str_int("5\t", page_num); // level 5 - word
1459  tsv_str.add_str_int("\t", block_num);
1460  tsv_str.add_str_int("\t", par_num);
1461  tsv_str.add_str_int("\t", line_num);
1462  tsv_str.add_str_int("\t", word_num);
1463  tsv_str.add_str_int("\t", left);
1464  tsv_str.add_str_int("\t", top);
1465  tsv_str.add_str_int("\t", right - left);
1466  tsv_str.add_str_int("\t", bottom - top);
1467  tsv_str.add_str_int("\t", res_it->Confidence(RIL_WORD));
1468  tsv_str += "\t";
1469 
1470  // Increment counts if at end of block/paragraph/textline.
1471  if (res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD)) lcnt++;
1472  if (res_it->IsAtFinalElement(RIL_PARA, RIL_WORD)) pcnt++;
1473  if (res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD)) bcnt++;
1474 
1475  do {
1476  tsv_str +=
1477  std::unique_ptr<const char[]>(res_it->GetUTF8Text(RIL_SYMBOL)).get();
1478  res_it->Next(RIL_SYMBOL);
1479  } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
1480  tsv_str += "\n"; // end of row
1481  wcnt++;
1482  }
1483 
1484  char* ret = new char[tsv_str.length() + 1];
1485  strcpy(ret, tsv_str.string());
1486  delete res_it;
1487  return ret;
1488 }

◆ GetUnichar()

const char * tesseract::TessBaseAPI::GetUnichar ( int  unichar_id)

This method returns the string form of the specified unichar.

Definition at line 2294 of file baseapi.cpp.

2294  {
2295  return tesseract_->unicharset.id_to_unichar(unichar_id);
2296 }
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:291

◆ GetUNLVText()

char * tesseract::TessBaseAPI::GetUNLVText ( )

The recognized text is returned as a char* which is coded as UNLV format Latin-1 with specific reject and suspect codes. Returned string must be freed with the delete [] operator.

Definition at line 1574 of file baseapi.cpp.

1574  {
1575  if (tesseract_ == nullptr ||
1576  (!recognition_done_ && Recognize(nullptr) < 0))
1577  return nullptr;
1578  bool tilde_crunch_written = false;
1579  bool last_char_was_newline = true;
1580  bool last_char_was_tilde = false;
1581 
1582  int total_length = TextLength(nullptr);
1583  PAGE_RES_IT page_res_it(page_res_);
1584  char* result = new char[total_length];
1585  char* ptr = result;
1586  for (page_res_it.restart_page(); page_res_it.word () != nullptr;
1587  page_res_it.forward()) {
1588  WERD_RES *word = page_res_it.word();
1589  // Process the current word.
1590  if (word->unlv_crunch_mode != CR_NONE) {
1591  if (word->unlv_crunch_mode != CR_DELETE &&
1592  (!tilde_crunch_written ||
1593  (word->unlv_crunch_mode == CR_KEEP_SPACE &&
1594  word->word->space() > 0 &&
1595  !word->word->flag(W_FUZZY_NON) &&
1596  !word->word->flag(W_FUZZY_SP)))) {
1597  if (!word->word->flag(W_BOL) &&
1598  word->word->space() > 0 &&
1599  !word->word->flag(W_FUZZY_NON) &&
1600  !word->word->flag(W_FUZZY_SP)) {
1601  /* Write a space to separate from preceding good text */
1602  *ptr++ = ' ';
1603  last_char_was_tilde = false;
1604  }
1605  if (!last_char_was_tilde) {
1606  // Write a reject char.
1607  last_char_was_tilde = true;
1608  *ptr++ = kUNLVReject;
1609  tilde_crunch_written = true;
1610  last_char_was_newline = false;
1611  }
1612  }
1613  } else {
1614  // NORMAL PROCESSING of non tilde crunched words.
1615  tilde_crunch_written = false;
1617  const char* wordstr = word->best_choice->unichar_string().string();
1618  const STRING& lengths = word->best_choice->unichar_lengths();
1619  int length = lengths.length();
1620  int i = 0;
1621  int offset = 0;
1622 
1623  if (last_char_was_tilde &&
1624  word->word->space() == 0 && wordstr[offset] == ' ') {
1625  // Prevent adjacent tilde across words - we know that adjacent tildes
1626  // within words have been removed.
1627  // Skip the first character.
1628  offset = lengths[i++];
1629  }
1630  if (i < length && wordstr[offset] != 0) {
1631  if (!last_char_was_newline)
1632  *ptr++ = ' ';
1633  else
1634  last_char_was_newline = false;
1635  for (; i < length; offset += lengths[i++]) {
1636  if (wordstr[offset] == ' ' ||
1637  wordstr[offset] == kTesseractReject) {
1638  *ptr++ = kUNLVReject;
1639  last_char_was_tilde = true;
1640  } else {
1641  if (word->reject_map[i].rejected())
1642  *ptr++ = kUNLVSuspect;
1643  UNICHAR ch(wordstr + offset, lengths[i]);
1644  int uni_ch = ch.first_uni();
1645  for (int j = 0; kUniChs[j] != 0; ++j) {
1646  if (kUniChs[j] == uni_ch) {
1647  uni_ch = kLatinChs[j];
1648  break;
1649  }
1650  }
1651  if (uni_ch <= 0xff) {
1652  *ptr++ = static_cast<char>(uni_ch);
1653  last_char_was_tilde = false;
1654  } else {
1655  *ptr++ = kUNLVReject;
1656  last_char_was_tilde = true;
1657  }
1658  }
1659  }
1660  }
1661  }
1662  if (word->word->flag(W_EOL) && !last_char_was_newline) {
1663  /* Add a new line output */
1664  *ptr++ = '\n';
1665  tilde_crunch_written = false;
1666  last_char_was_newline = true;
1667  last_char_was_tilde = false;
1668  }
1669  }
1670  *ptr++ = '\n';
1671  *ptr = '\0';
1672  return result;
1673 }
@ CR_DELETE
Definition: pageres.h:161
@ CR_NONE
Definition: pageres.h:158
@ CR_KEEP_SPACE
Definition: pageres.h:159
@ W_FUZZY_SP
fuzzy space
Definition: werd.h:39
@ W_EOL
end of line
Definition: werd.h:33
@ W_FUZZY_NON
fuzzy nonspace
Definition: werd.h:40
@ W_BOL
start of line
Definition: werd.h:32
const int kLatinChs[]
Definition: baseapi.cpp:1565
const char kUNLVReject
Definition: baseapi.cpp:108
const char kUNLVSuspect
Definition: baseapi.cpp:110
const int kUniChs[]
Definition: baseapi.cpp:1561
void set_unlv_suspects(WERD_RES *word)
Definition: output.cpp:273
CRUNCH_MODE unlv_crunch_mode
Definition: pageres.h:315
REJMAP reject_map
Definition: pageres.h:294
const STRING & unichar_lengths() const
Definition: ratngs.h:538
const STRING & unichar_string() const
Definition: ratngs.h:531
uint8_t space()
Definition: werd.h:99
bool flag(WERD_FLAGS mask) const
Definition: werd.h:117
int32_t length() const
Definition: strngs.cpp:189

◆ GetUTF8Text()

char * tesseract::TessBaseAPI::GetUTF8Text ( )

The recognized text is returned as a char* which is coded as UTF8 and must be freed with the delete [] operator.

Make a text string from the internal data structures.

Definition at line 1351 of file baseapi.cpp.

1351  {
1352  if (tesseract_ == nullptr ||
1353  (!recognition_done_ && Recognize(nullptr) < 0))
1354  return nullptr;
1355  STRING text("");
1356  ResultIterator *it = GetIterator();
1357  do {
1358  if (it->Empty(RIL_PARA)) continue;
1359  const std::unique_ptr<const char[]> para_text(it->GetUTF8Text(RIL_PARA));
1360  text += para_text.get();
1361  } while (it->Next(RIL_PARA));
1362  char* result = new char[text.length() + 1];
1363  strncpy(result, text.string(), text.length() + 1);
1364  delete it;
1365  return result;
1366 }

◆ GetVariableAsString()

bool tesseract::TessBaseAPI::GetVariableAsString ( const char *  name,
STRING val 
)

Get value of named variable as a string, if it exists.

Definition at line 329 of file baseapi.cpp.

329  {
330  return ParamUtils::GetParamAsString(name, tesseract_->params(), val);
331 }
static bool GetParamAsString(const char *name, const ParamsVectors *member_params, STRING *value)
Definition: params.cpp:129

◆ GetWords()

Boxa * tesseract::TessBaseAPI::GetWords ( Pixa **  pixa)

Get the words as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize.

Definition at line 684 of file baseapi.cpp.

684  {
685  return GetComponentImages(RIL_WORD, true, pixa, nullptr);
686 }

◆ GetWordStrBoxText()

char * tesseract::TessBaseAPI::GetWordStrBoxText ( int  page_number = 0)

The recognized text is returned as a char* which is coded in the same format as a WordStr box file used in training. page_number is a 0-based page index that will appear in the box file. Returned string must be freed with the delete [] operator.

Create a UTF8 box file with WordStr strings from the internal data structures. page_number is a 0-base page index that will appear in the box file. Returned string must be freed with the delete [] operator.

Definition at line 31 of file wordstrboxrenderer.cpp.

31  {
32  if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(nullptr) < 0))
33  return nullptr;
34 
35  STRING wordstr_box_str("");
36  int left = 0, top = 0, right = 0, bottom = 0;
37 
38  bool first_line = true;
39 
40  LTRResultIterator* res_it = GetLTRIterator();
41  while (!res_it->Empty(RIL_BLOCK)) {
42  if (res_it->Empty(RIL_WORD)) {
43  res_it->Next(RIL_WORD);
44  continue;
45  }
46 
47  if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
48  if (!first_line) {
49  wordstr_box_str.add_str_int("\n\t ", right + 1);
50  wordstr_box_str.add_str_int(" ", image_height_ - bottom);
51  wordstr_box_str.add_str_int(" ", right + 5);
52  wordstr_box_str.add_str_int(" ", image_height_ - top);
53  wordstr_box_str.add_str_int(" ", page_number); // row for tab for EOL
54  wordstr_box_str += "\n";
55  } else {
56  first_line = false;
57  }
58  // Use bounding box for whole line for WordStr
59  res_it->BoundingBox(RIL_TEXTLINE, &left, &top, &right, &bottom);
60  wordstr_box_str.add_str_int("WordStr ", left);
61  wordstr_box_str.add_str_int(" ", image_height_ - bottom);
62  wordstr_box_str.add_str_int(" ", right);
63  wordstr_box_str.add_str_int(" ", image_height_ - top);
64  wordstr_box_str.add_str_int(" ", page_number); // word
65  wordstr_box_str += " #";
66  }
67  do {
68  wordstr_box_str +=
69  std::unique_ptr<const char[]>(res_it->GetUTF8Text(RIL_WORD)).get();
70  wordstr_box_str += " ";
71  res_it->Next(RIL_WORD);
72  } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
73  }
74 
75  if (left != 0 && top != 0 && right != 0 && bottom != 0) {
76  wordstr_box_str.add_str_int("\n\t ", right + 1);
77  wordstr_box_str.add_str_int(" ", image_height_ - bottom);
78  wordstr_box_str.add_str_int(" ", right + 5);
79  wordstr_box_str.add_str_int(" ", image_height_ - top);
80  wordstr_box_str.add_str_int(" ", page_number); // row for tab for EOL
81  wordstr_box_str += "\n";
82  }
83  char* ret = new char[wordstr_box_str.length() + 1];
84  strcpy(ret, wordstr_box_str.string());
85  delete res_it;
86  return ret;
87 }

◆ Init() [1/4]

int tesseract::TessBaseAPI::Init ( const char *  data,
int  data_size,
const char *  language,
OcrEngineMode  mode,
char **  configs,
int  configs_size,
const GenericVector< STRING > *  vars_vec,
const GenericVector< STRING > *  vars_values,
bool  set_only_non_debug_params,
FileReader  reader 
)

Definition at line 358 of file baseapi.cpp.

362  {
363  // Default language is "eng".
364  if (language == nullptr) language = "eng";
365  STRING datapath = data_size == 0 ? data : language;
366  // If the datapath, OcrEngineMode or the language have changed - start again.
367  // Note that the language_ field stores the last requested language that was
368  // initialized successfully, while tesseract_->lang stores the language
369  // actually used. They differ only if the requested language was nullptr, in
370  // which case tesseract_->lang is set to the Tesseract default ("eng").
371  if (tesseract_ != nullptr &&
372  (datapath_ == nullptr || language_ == nullptr || *datapath_ != datapath ||
374  (*language_ != language && tesseract_->lang != language))) {
375  delete tesseract_;
376  tesseract_ = nullptr;
377  }
378 #ifdef USE_OPENCL
379  OpenclDevice od;
380  od.InitEnv();
381 #endif
382  bool reset_classifier = true;
383  if (tesseract_ == nullptr) {
384  reset_classifier = false;
385  tesseract_ = new Tesseract;
386  if (reader != nullptr) reader_ = reader;
387  TessdataManager mgr(reader_);
388  if (data_size != 0) {
389  mgr.LoadMemBuffer(language, data, data_size);
390  }
392  datapath.string(),
393  output_file_ != nullptr ? output_file_->string() : nullptr,
394  language, oem, configs, configs_size, vars_vec, vars_values,
395  set_only_non_debug_params, &mgr) != 0) {
396  return -1;
397  }
398  }
399 
400  // Update datapath and language requested for the last valid initialization.
401  if (datapath_ == nullptr)
402  datapath_ = new STRING(datapath);
403  else
404  *datapath_ = datapath;
405  if ((strcmp(datapath_->string(), "") == 0) &&
406  (strcmp(tesseract_->datadir.string(), "") != 0))
408 
409  if (language_ == nullptr)
410  language_ = new STRING(language);
411  else
412  *language_ = language;
414 
415 #ifndef DISABLED_LEGACY_ENGINE
416  // For same language and datapath, just reset the adaptive classifier.
417  if (reset_classifier) {
419  }
420 #endif // ndef DISABLED_LEGACY_ENGINE
421  return 0;
422 }
OcrEngineMode oem() const
Definition: baseapi.h:803

◆ Init() [2/4]

int tesseract::TessBaseAPI::Init ( const char *  datapath,
const char *  language 
)
inline

Definition at line 223 of file baseapi.h.

223  {
224  return Init(datapath, language, OEM_DEFAULT, nullptr, 0, nullptr, nullptr, false);
225  }
int Init(const char *datapath, const char *language, OcrEngineMode mode, char **configs, int configs_size, const GenericVector< STRING > *vars_vec, const GenericVector< STRING > *vars_values, bool set_only_non_debug_params)
Definition: baseapi.cpp:346

◆ Init() [3/4]

int tesseract::TessBaseAPI::Init ( const char *  datapath,
const char *  language,
OcrEngineMode  oem,
char **  configs,
int  configs_size,
const GenericVector< STRING > *  vars_vec,
const GenericVector< STRING > *  vars_values,
bool  set_only_non_debug_params 
)

Instances are now mostly thread-safe and totally independent, but some global parameters remain. Basically it is safe to use multiple TessBaseAPIs in different threads in parallel, UNLESS: you use SetVariable on some of the Params in classify and textord. If you do, then the effect will be to change it for all your instances.

Start tesseract. Returns zero on success and -1 on failure. NOTE that the only members that may be called before Init are those listed above here in the class definition.

The datapath must be the name of the tessdata directory. The language is (usually) an ISO 639-3 string or nullptr will default to eng. It is entirely safe (and eventually will be efficient too) to call Init multiple times on the same instance to change language, or just to reset the classifier. The language may be a string of the form [~]<lang>[+[~]<lang>]* indicating that multiple languages are to be loaded. Eg hin+eng will load Hindi and English. Languages may specify internally that they want to be loaded with one or more other languages, so the ~ sign is available to override that. Eg if hin were set to load eng by default, then hin+~eng would force loading only hin. The number of loaded languages is limited only by memory, with the caveat that loading additional languages will impact both speed and accuracy, as there is more work to do to decide on the applicable language, and there is more chance of hallucinating incorrect words. WARNING: On changing languages, all Tesseract parameters are reset back to their default values. (Which may vary between languages.) If you have a rare need to set a Variable that controls initialization for a second call to Init you should explicitly call End() and then use SetVariable before Init. This is only a very rare use case, since there are very few uses that require any parameters to be set before Init.

If set_only_non_debug_params is true, only params that do not contain "debug" in the name will be set.

The datapath must be the name of the data directory or some other file in which the data directory resides (for instance argv[0].) The language is (usually) an ISO 639-3 string or nullptr will default to eng. If numeric_mode is true, then only digits and Roman numerals will be returned.

Returns
: 0 on success and -1 on initialization failure.

Definition at line 346 of file baseapi.cpp.

350  {
351  return Init(datapath, 0, language, oem, configs, configs_size, vars_vec,
352  vars_values, set_only_non_debug_params, nullptr);
353 }

◆ Init() [4/4]

int tesseract::TessBaseAPI::Init ( const char *  datapath,
const char *  language,
OcrEngineMode  oem 
)
inline

Definition at line 220 of file baseapi.h.

220  {
221  return Init(datapath, language, oem, nullptr, 0, nullptr, nullptr, false);
222  }

◆ InitForAnalysePage()

void tesseract::TessBaseAPI::InitForAnalysePage ( )

Init only for page layout analysis. Use only for calls to SetImage and AnalysePage. Calls that attempt recognition will generate an error.

Definition at line 487 of file baseapi.cpp.

487  {
488  if (tesseract_ == nullptr) {
489  tesseract_ = new Tesseract;
490  #ifndef DISABLED_LEGACY_ENGINE
492  #endif
493  }
494 }

◆ InitLangMod()

int tesseract::TessBaseAPI::InitLangMod ( const char *  datapath,
const char *  language 
)

Init only the lang model component of Tesseract. The only functions that work after this init are SetVariable and IsValidWord. WARNING: temporary! This function will be removed from here and placed in a separate API at some future time.

Definition at line 473 of file baseapi.cpp.

473  {
474  if (tesseract_ == nullptr)
475  tesseract_ = new Tesseract;
476  else
478  TessdataManager mgr;
479  return tesseract_->init_tesseract_lm(datapath, nullptr, language, &mgr);
480 }
int init_tesseract_lm(const char *arg0, const char *textbase, const char *language, TessdataManager *mgr)
Definition: tessedit.cpp:452
static void ResetToDefaults(ParamsVectors *member_params)
Definition: params.cpp:199

◆ InitTruthCallback()

void tesseract::TessBaseAPI::InitTruthCallback ( TruthCallback cb)
inline

Definition at line 805 of file baseapi.h.

805 { truth_cb_ = cb; }

◆ InternalSetImage()

bool tesseract::TessBaseAPI::InternalSetImage ( )
protected

Common code for setting the image. Returns true if Init has been called.

Common code for setting the image.

Definition at line 1997 of file baseapi.cpp.

1997  {
1998  if (tesseract_ == nullptr) {
1999  tprintf("Please call Init before attempting to set an image.\n");
2000  return false;
2001  }
2002  if (thresholder_ == nullptr)
2003  thresholder_ = new ImageThresholder;
2004  ClearResults();
2005  return true;
2006 }

◆ IsValidCharacter()

bool tesseract::TessBaseAPI::IsValidCharacter ( const char *  utf8_character)

Definition at line 1922 of file baseapi.cpp.

1922  {
1923  return tesseract_->unicharset.contains_unichar(utf8_character);
1924 }
bool contains_unichar(const char *const unichar_repr) const
Definition: unicharset.cpp:671

◆ IsValidWord()

int tesseract::TessBaseAPI::IsValidWord ( const char *  word)

Check whether a word is valid according to Tesseract's language model

Returns
0 if the word is invalid, non-zero if valid.
Warning
temporary! This function will be removed from here and placed in a separate API at some future time.

Check whether a word is valid according to Tesseract's language model returns 0 if the word is invalid, non-zero if valid

Definition at line 1918 of file baseapi.cpp.

1918  {
1919  return tesseract_->getDict().valid_word(word);
1920 }
int valid_word(const WERD_CHOICE &word, bool numbers_ok) const
Definition: dict.cpp:778

◆ MakeTBLOB()

TBLOB * tesseract::TessBaseAPI::MakeTBLOB ( Pix *  pix)
static

Returns a TBLOB corresponding to the entire input image.

Creates a TBLOB* from the whole pix.

Definition at line 2368 of file baseapi.cpp.

2368  {
2369  int width = pixGetWidth(pix);
2370  int height = pixGetHeight(pix);
2371  BLOCK block("a character", true, 0, 0, 0, 0, width, height);
2372 
2373  // Create C_BLOBs from the page
2374  extract_edges(pix, &block);
2375 
2376  // Merge all C_BLOBs
2377  C_BLOB_LIST *list = block.blob_list();
2378  C_BLOB_IT c_blob_it(list);
2379  if (c_blob_it.empty())
2380  return nullptr;
2381  // Move all the outlines to the first blob.
2382  C_OUTLINE_IT ol_it(c_blob_it.data()->out_list());
2383  for (c_blob_it.forward();
2384  !c_blob_it.at_first();
2385  c_blob_it.forward()) {
2386  C_BLOB *c_blob = c_blob_it.data();
2387  ol_it.add_list_after(c_blob->out_list());
2388  }
2389  // Convert the first blob to the output TBLOB.
2390  return TBLOB::PolygonalCopy(false, c_blob_it.data());
2391 }
void extract_edges(Pix *pix, BLOCK *block)
Definition: edgblob.cpp:329
static TBLOB * PolygonalCopy(bool allow_detailed_fx, C_BLOB *src)
Definition: blobs.cpp:327
C_OUTLINE_LIST * out_list()
Definition: stepblob.h:70

◆ MakeTessOCRRow()

ROW * tesseract::TessBaseAPI::MakeTessOCRRow ( float  baseline,
float  xheight,
float  descender,
float  ascender 
)
static

Returns a ROW object created from the input row specification.

Definition at line 2351 of file baseapi.cpp.

2354  {
2355  int32_t xstarts[] = {-32000};
2356  double quad_coeffs[] = {0, 0, baseline};
2357  return new ROW(1,
2358  xstarts,
2359  quad_coeffs,
2360  xheight,
2361  ascender - (baseline + xheight),
2362  descender - baseline,
2363  0,
2364  0);
2365 }

◆ MeanTextConf()

int tesseract::TessBaseAPI::MeanTextConf ( )

Returns the (average) confidence value between 0 and 100.

Returns the average word confidence for Tesseract page result.

Definition at line 1752 of file baseapi.cpp.

1752  {
1753  int* conf = AllWordConfidences();
1754  if (!conf) return 0;
1755  int sum = 0;
1756  int *pt = conf;
1757  while (*pt >= 0) sum += *pt++;
1758  if (pt != conf) sum /= pt - conf;
1759  delete [] conf;
1760  return sum;
1761 }

◆ NormalizeTBLOB()

void tesseract::TessBaseAPI::NormalizeTBLOB ( TBLOB tblob,
ROW row,
bool  numeric_mode 
)
static

This method baseline normalizes a TBLOB in-place. The input row is used for normalization. The denorm is an optional parameter in which the normalization-antidote is returned.

Definition at line 2398 of file baseapi.cpp.

2398  {
2399  TBOX box = tblob->bounding_box();
2400  float x_center = (box.left() + box.right()) / 2.0f;
2401  float baseline = row->base_line(x_center);
2402  float scale = kBlnXHeight / row->x_height();
2403  tblob->Normalize(nullptr, nullptr, nullptr, x_center, baseline, scale, scale,
2404  0.0f, static_cast<float>(kBlnBaselineOffset), false, nullptr);
2405 }
const int kBlnBaselineOffset
Definition: normalis.h:25
const int kBlnXHeight
Definition: normalis.h:24
TBOX bounding_box() const
Definition: blobs.cpp:468
void Normalize(const BLOCK *block, const FCOORD *rotation, const DENORM *predecessor, float x_origin, float y_origin, float x_scale, float y_scale, float final_xshift, float final_yshift, bool inverse, Pix *pix)
Definition: blobs.cpp:397
float base_line(float xpos) const
Definition: ocrrow.h:59
float x_height() const
Definition: ocrrow.h:64
int16_t left() const
Definition: rect.h:72
int16_t right() const
Definition: rect.h:79

◆ NumDawgs()

int tesseract::TessBaseAPI::NumDawgs ( ) const

Return the number of dawgs loaded into tesseract_ object.

Definition at line 2305 of file baseapi.cpp.

2305  {
2306  return tesseract_ == nullptr ? 0 : tesseract_->getDict().NumDawgs();
2307 }
int NumDawgs() const
Return the number of dawgs in the dawgs_ vector.
Definition: dict.h:430

◆ oem()

OcrEngineMode tesseract::TessBaseAPI::oem ( ) const
inline

Definition at line 803 of file baseapi.h.

803 { return last_oem_requested_; }

◆ PrintVariables()

void tesseract::TessBaseAPI::PrintVariables ( FILE *  fp) const

Print Tesseract parameters to the given file.

Definition at line 334 of file baseapi.cpp.

334  {
336 }
static void PrintParams(FILE *fp, const ParamsVectors *member_params)
Definition: params.cpp:168

◆ ProcessPage()

bool tesseract::TessBaseAPI::ProcessPage ( Pix *  pix,
int  page_index,
const char *  filename,
const char *  retry_config,
int  timeout_millisec,
TessResultRenderer renderer 
)

Turn a single image into symbolic text.

The pix is the image processed. filename and page_index are metadata used by side-effect processes, such as reading a box file or formatting as hOCR.

See ProcessPages for desciptions of other parameters.

Definition at line 1240 of file baseapi.cpp.

1242  {
1243  SetInputName(filename);
1244  SetImage(pix);
1245  bool failed = false;
1246 
1248  // Disabled character recognition
1249  PageIterator* it = AnalyseLayout();
1250 
1251  if (it == nullptr) {
1252  failed = true;
1253  } else {
1254  delete it;
1255  }
1257  failed = FindLines() != 0;
1258  } else if (timeout_millisec > 0) {
1259  // Running with a timeout.
1260  ETEXT_DESC monitor;
1261  monitor.cancel = nullptr;
1262  monitor.cancel_this = nullptr;
1263  monitor.set_deadline_msecs(timeout_millisec);
1264 
1265  // Now run the main recognition.
1266  failed = Recognize(&monitor) < 0;
1267  } else {
1268  // Normal layout and character recognition with no timeout.
1269  failed = Recognize(nullptr) < 0;
1270  }
1271 
1273 #ifndef ANDROID_BUILD
1274  Pix* page_pix = GetThresholdedImage();
1275  pixWrite("tessinput.tif", page_pix, IFF_TIFF_G4);
1276 #endif // ANDROID_BUILD
1277  }
1278 
1279  if (failed && retry_config != nullptr && retry_config[0] != '\0') {
1280  // Save current config variables before switching modes.
1281  FILE* fp = fopen(kOldVarsFile, "wb");
1282  if (fp == nullptr) {
1283  tprintf("Error, failed to open file \"%s\"\n", kOldVarsFile);
1284  } else {
1285  PrintVariables(fp);
1286  fclose(fp);
1287  }
1288  // Switch to alternate mode for retry.
1289  ReadConfigFile(retry_config);
1290  SetImage(pix);
1291  Recognize(nullptr);
1292  // Restore saved config variables.
1293  ReadConfigFile(kOldVarsFile);
1294  }
1295 
1296  if (renderer && !failed) {
1297  failed = !renderer->AddImage(this);
1298  }
1299 
1300  return !failed;
1301 }
@ PSM_OSD_ONLY
Orientation and script detection only.
Definition: publictypes.h:164
@ PSM_AUTO_ONLY
Automatic page segmentation, but no OSD, or OCR.
Definition: publictypes.h:167
void PrintVariables(FILE *fp) const
Definition: baseapi.cpp:334
void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
Definition: baseapi.cpp:580
void ReadConfigFile(const char *filename)
Definition: baseapi.cpp:501
Pix * GetThresholdedImage()
Definition: baseapi.cpp:635
void set_deadline_msecs(int32_t deadline_msecs)
Definition: ocrclass.h:129
void * cancel_this
monitor-aware progress callback
Definition: ocrclass.h:116
CANCEL_FUNC cancel
for errcode use
Definition: ocrclass.h:112

◆ ProcessPages()

bool tesseract::TessBaseAPI::ProcessPages ( const char *  filename,
const char *  retry_config,
int  timeout_millisec,
TessResultRenderer renderer 
)

Turns images into symbolic text.

filename can point to a single image, a multi-page TIFF, or a plain text list of image filenames.

retry_config is useful for debugging. If not nullptr, you can fall back to an alternate configuration if a page fails for some reason.

timeout_millisec terminates processing if any single page takes too long. Set to 0 for unlimited time.

renderer is responible for creating the output. For example, use the TessTextRenderer if you want plaintext output, or the TessPDFRender to produce searchable PDF.

If tessedit_page_number is non-negative, will only process that single page. Works for multi-page tiff file, or filelist.

Returns true if successful, false on error.

Definition at line 1076 of file baseapi.cpp.

1078  {
1079  bool result =
1080  ProcessPagesInternal(filename, retry_config, timeout_millisec, renderer);
1081  #ifndef DISABLED_LEGACY_ENGINE
1082  if (result) {
1085  tprintf("Write of TR file failed: %s\n", output_file_->string());
1086  return false;
1087  }
1088  }
1089  #endif // ndef DISABLED_LEGACY_ENGINE
1090  return result;
1091 }
bool ProcessPagesInternal(const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
Definition: baseapi.cpp:1113
bool WriteTRFile(const STRING &filename)
Definition: blobclass.cpp:98

◆ ProcessPagesInternal()

bool tesseract::TessBaseAPI::ProcessPagesInternal ( const char *  filename,
const char *  retry_config,
int  timeout_millisec,
TessResultRenderer renderer 
)

Definition at line 1113 of file baseapi.cpp.

1116  {
1117  bool stdInput = !strcmp(filename, "stdin") || !strcmp(filename, "-");
1118  if (stdInput) {
1119 #ifdef WIN32
1120  if (_setmode(_fileno(stdin), _O_BINARY) == -1)
1121  tprintf("ERROR: cin to binary: %s", strerror(errno));
1122 #endif // WIN32
1123  }
1124 
1125  if (stream_filelist) {
1126  return ProcessPagesFileList(stdin, nullptr, retry_config,
1127  timeout_millisec, renderer,
1129  }
1130 
1131  // At this point we are officially in autodection territory.
1132  // That means any data in stdin must be buffered, to make it
1133  // seekable.
1134  std::string buf;
1135  const l_uint8 *data = nullptr;
1136  if (stdInput) {
1137  buf.assign((std::istreambuf_iterator<char>(std::cin)),
1138  (std::istreambuf_iterator<char>()));
1139  data = reinterpret_cast<const l_uint8 *>(buf.data());
1140  } else if (strncmp(filename, "http:", 5) == 0 ||
1141  strncmp(filename, "https:", 6) == 0 ) {
1142  // Get image or image list by URL.
1143 #ifdef HAVE_LIBCURL
1144  CURL* curl = curl_easy_init();
1145  if (curl == nullptr) {
1146  fprintf(stderr, "Error, curl_easy_init failed\n");
1147  return false;
1148  } else {
1149  CURLcode curlcode;
1150  curlcode = curl_easy_setopt(curl, CURLOPT_URL, filename);
1151  ASSERT_HOST(curlcode == CURLE_OK);
1152  curlcode = curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteMemoryCallback);
1153  ASSERT_HOST(curlcode == CURLE_OK);
1154  curlcode = curl_easy_setopt(curl, CURLOPT_WRITEDATA, &buf);
1155  ASSERT_HOST(curlcode == CURLE_OK);
1156  curlcode = curl_easy_perform(curl);
1157  ASSERT_HOST(curlcode == CURLE_OK);
1158  curl_easy_cleanup(curl);
1159  data = reinterpret_cast<const l_uint8 *>(buf.data());
1160  }
1161 #else
1162  fprintf(stderr, "Error, this tesseract has no URL support\n");
1163  return false;
1164 #endif
1165  } else {
1166  // Check whether the input file can be read.
1167  if (FILE* file = fopen(filename, "rb")) {
1168  fclose(file);
1169  } else {
1170  fprintf(stderr, "Error, cannot read input file %s: %s\n",
1171  filename, strerror(errno));
1172  return false;
1173  }
1174  }
1175 
1176  // Here is our autodetection
1177  int format;
1178  int r = (data != nullptr) ?
1179  findFileFormatBuffer(data, &format) :
1180  findFileFormat(filename, &format);
1181 
1182  // Maybe we have a filelist
1183  if (r != 0 || format == IFF_UNKNOWN) {
1184  STRING s;
1185  if (data != nullptr) {
1186  s = buf.c_str();
1187  } else {
1188  std::ifstream t(filename);
1189  std::string u((std::istreambuf_iterator<char>(t)),
1190  std::istreambuf_iterator<char>());
1191  s = u.c_str();
1192  }
1193  return ProcessPagesFileList(nullptr, &s, retry_config,
1194  timeout_millisec, renderer,
1196  }
1197 
1198  // Maybe we have a TIFF which is potentially multipage
1199  bool tiff = (format == IFF_TIFF || format == IFF_TIFF_PACKBITS ||
1200  format == IFF_TIFF_RLE || format == IFF_TIFF_G3 ||
1201  format == IFF_TIFF_G4 || format == IFF_TIFF_LZW ||
1202 #if LIBLEPT_MAJOR_VERSION > 1 || LIBLEPT_MINOR_VERSION > 76
1203  format == IFF_TIFF_JPEG ||
1204 #endif
1205  format == IFF_TIFF_ZIP);
1206 
1207  // Fail early if we can, before producing any output
1208  Pix *pix = nullptr;
1209  if (!tiff) {
1210  pix = (data != nullptr) ? pixReadMem(data, buf.size()) : pixRead(filename);
1211  if (pix == nullptr) {
1212  return false;
1213  }
1214  }
1215 
1216  // Begin the output
1217  if (renderer && !renderer->BeginDocument(document_title.c_str())) {
1218  pixDestroy(&pix);
1219  return false;
1220  }
1221 
1222  // Produce output
1223  r = (tiff) ?
1224  ProcessPagesMultipageTiff(data, buf.size(), filename, retry_config,
1225  timeout_millisec, renderer,
1227  ProcessPage(pix, 0, filename, retry_config,
1228  timeout_millisec, renderer);
1229 
1230  // Clean up memory as needed
1231  pixDestroy(&pix);
1232 
1233  // End the output
1234  if (!r || (renderer && !renderer->EndDocument())) {
1235  return false;
1236  }
1237  return true;
1238 }
bool ProcessPage(Pix *pix, int page_index, const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
Definition: baseapi.cpp:1240

◆ ReadConfigFile()

void tesseract::TessBaseAPI::ReadConfigFile ( const char *  filename)

Read a "config" file containing a set of param, value pairs. Searches the standard places: tessdata/configs, tessdata/tessconfigs and also accepts a relative or absolute path name. Note: only non-init params will be set (init params are set by Init()).

Read a "config" file containing a set of parameter name, value pairs. Searches the standard places: tessdata/configs, tessdata/tessconfigs and also accepts a relative or absolute path name.

Definition at line 501 of file baseapi.cpp.

501  {
503 }
@ SET_PARAM_CONSTRAINT_NON_INIT_ONLY
Definition: params.h:39
void read_config_file(const char *filename, SetParamConstraint constraint)
Definition: tessedit.cpp:48

◆ ReadDebugConfigFile()

void tesseract::TessBaseAPI::ReadDebugConfigFile ( const char *  filename)

Same as above, but only set debug params from the given config file.

Definition at line 506 of file baseapi.cpp.

506  {
508 }
@ SET_PARAM_CONSTRAINT_DEBUG_ONLY
Definition: params.h:37

◆ RecognitionPass1()

PAGE_RES * tesseract::TessBaseAPI::RecognitionPass1 ( BLOCK_LIST *  block_list)
protected

Recognize text doing one pass only, using settings for a given pass.

Definition at line 2464 of file baseapi.cpp.

2464  {
2465  auto *page_res = new PAGE_RES(false, block_list,
2467  tesseract_->recog_all_words(page_res, nullptr, nullptr, nullptr, 1);
2468  return page_res;
2469 }
bool recog_all_words(PAGE_RES *page_res, ETEXT_DESC *monitor, const TBOX *target_word_box, const char *word_config, int dopasses)
Definition: control.cpp:302
WERD_CHOICE * prev_word_best_choice_
Definition: wordrec.h:476

◆ RecognitionPass2()

PAGE_RES * tesseract::TessBaseAPI::RecognitionPass2 ( BLOCK_LIST *  block_list,
PAGE_RES pass1_result 
)
protected

Definition at line 2471 of file baseapi.cpp.

2472  {
2473  if (!pass1_result)
2474  pass1_result = new PAGE_RES(false, block_list,
2476  tesseract_->recog_all_words(pass1_result, nullptr, nullptr, nullptr, 2);
2477  return pass1_result;
2478 }

◆ Recognize()

int tesseract::TessBaseAPI::Recognize ( ETEXT_DESC monitor)

Recognize the image from SetAndThresholdImage, generating Tesseract internal structures. Returns 0 on success. Optional. The Get*Text functions below will call Recognize if needed. After Recognize, the output is kept internally until the next SetImage.

Recognize the tesseract global image and return the result as Tesseract internal structures.

Definition at line 830 of file baseapi.cpp.

830  {
831  if (tesseract_ == nullptr)
832  return -1;
833  if (FindLines() != 0)
834  return -1;
835  delete page_res_;
836  if (block_list_->empty()) {
837  page_res_ = new PAGE_RES(false, block_list_,
839  return 0; // Empty page.
840  }
841 
843  recognition_done_ = true;
844 #ifndef DISABLED_LEGACY_ENGINE
849  } else
850 #endif // ndef DISABLED_LEGACY_ENGINE
851  {
854  }
855 
856  if (page_res_ == nullptr) {
857  return -1;
858  }
859 
862  return -1;
863  }
865  return 0;
866  }
867 #ifndef DISABLED_LEGACY_ENGINE
870  return 0;
871  }
872 #endif // ndef DISABLED_LEGACY_ENGINE
873 
874  if (truth_cb_ != nullptr) {
875  tesseract_->wordrec_run_blamer.set_value(true);
876  auto *page_it = new PageIterator(
881  image_height_, page_it, this->tesseract()->pix_grey());
882  delete page_it;
883  }
884 
885  int result = 0;
887  #ifndef GRAPHICS_DISABLED
889  #endif // GRAPHICS_DISABLED
890  // The page_res is invalid after an interactive session, so cleanup
891  // in a way that lets us continue to the next page without crashing.
892  delete page_res_;
893  page_res_ = nullptr;
894  return -1;
895  #ifndef DISABLED_LEGACY_ENGINE
897  STRING fontname;
898  ExtractFontName(*output_file_, &fontname);
900  } else if (tesseract_->tessedit_ambigs_training) {
901  FILE *training_output_file = tesseract_->init_recog_training(*input_file_);
902  // OCR the page segmented into words by tesseract.
904  *input_file_, page_res_, monitor, training_output_file);
905  fclose(training_output_file);
906  #endif // ndef DISABLED_LEGACY_ENGINE
907  } else {
908  // Now run the main recognition.
909  bool wait_for_text = true;
910  GetBoolVariable("paragraph_text_based", &wait_for_text);
911  if (!wait_for_text) DetectParagraphs(false);
912  if (tesseract_->recog_all_words(page_res_, monitor, nullptr, nullptr, 0)) {
913  if (wait_for_text) DetectParagraphs(true);
914  } else {
915  result = -1;
916  }
917  }
918  return result;
919 }
void ExtractFontName(const STRING &filename, STRING *fontname)
Definition: blobclass.cpp:45
void recog_training_segmented(const STRING &fname, PAGE_RES *page_res, volatile ETEXT_DESC *monitor, FILE *output_file)
FILE * init_recog_training(const STRING &fname)
void ApplyBoxTraining(const STRING &fontname, PAGE_RES *page_res)
bool tessedit_resegment_from_line_boxes
void CorrectClassifyWords(PAGE_RES *page_res)
void pgeditor_main(int width, int height, PAGE_RES *page_res)
Definition: pgedit.cpp:378
bool TrainLineRecognizer(const STRING &input_imagename, const STRING &output_basename, BLOCK_LIST *block_list)
Definition: linerec.cpp:44
PAGE_RES * ApplyBoxes(const STRING &fname, bool find_segmentation, BLOCK_LIST *block_list)
Definition: applybox.cpp:109
bool AnyLSTMLang() const
virtual void Run(A1, A2, A3, A4)=0
const UNICHARSET & getUnicharset() const
Definition: dict.h:101
bool wordrec_run_blamer
Definition: wordrec.h:232

◆ RecognizeForChopTest()

int tesseract::TessBaseAPI::RecognizeForChopTest ( ETEXT_DESC monitor)

Methods to retrieve information after SetAndThresholdImage(), Recognize() or TesseractRect(). (Recognize is called implicitly if needed.) Variant on Recognize used for testing chopper.

Tests the chopper by exhaustively running chop_one_blob.

Definition at line 923 of file baseapi.cpp.

923  {
924  if (tesseract_ == nullptr)
925  return -1;
926  if (thresholder_ == nullptr || thresholder_->IsEmpty()) {
927  tprintf("Please call SetImage before attempting recognition.\n");
928  return -1;
929  }
930  if (page_res_ != nullptr)
931  ClearResults();
932  if (FindLines() != 0)
933  return -1;
934  // Additional conditions under which chopper test cannot be run
935  if (tesseract_->interactive_display_mode) return -1;
936 
937  recognition_done_ = true;
938 
939  page_res_ = new PAGE_RES(false, block_list_,
941 
942  PAGE_RES_IT page_res_it(page_res_);
943 
944  while (page_res_it.word() != nullptr) {
945  WERD_RES *word_res = page_res_it.word();
946  GenericVector<TBOX> boxes;
947  tesseract_->MaximallyChopWord(boxes, page_res_it.block()->block,
948  page_res_it.row()->row, word_res);
949  page_res_it.forward();
950  }
951  return 0;
952 }
void MaximallyChopWord(const GenericVector< TBOX > &boxes, BLOCK *block, ROW *row, WERD_RES *word_res)
Definition: applybox.cpp:243

◆ RunAdaptiveClassifier()

void tesseract::TessBaseAPI::RunAdaptiveClassifier ( TBLOB blob,
int  num_max_matches,
int *  unichar_ids,
float *  ratings,
int *  num_matches_returned 
)

Method to run adaptive classifier on a blob. It returns at max num_max_matches results.

Method to run adaptive classifier on a blob.

Definition at line 2660 of file baseapi.cpp.

2664  {
2665  auto* choices = new BLOB_CHOICE_LIST;
2666  tesseract_->AdaptiveClassifier(blob, choices);
2667  BLOB_CHOICE_IT choices_it(choices);
2668  int& index = *num_matches_returned;
2669  index = 0;
2670  for (choices_it.mark_cycle_pt();
2671  !choices_it.cycled_list() && index < num_max_matches;
2672  choices_it.forward()) {
2673  BLOB_CHOICE* choice = choices_it.data();
2674  unichar_ids[index] = choice->unichar_id();
2675  ratings[index] = choice->rating();
2676  ++index;
2677  }
2678  *num_matches_returned = index;
2679  delete choices;
2680 }
float rating() const
Definition: ratngs.h:80
UNICHAR_ID unichar_id() const
Definition: ratngs.h:77

◆ set_min_orientation_margin()

void tesseract::TessBaseAPI::set_min_orientation_margin ( double  margin)

Definition at line 2215 of file baseapi.cpp.

2215  {
2216  tesseract_->min_orientation_margin.set_value(margin);
2217 }

◆ SetDebugVariable()

bool tesseract::TessBaseAPI::SetDebugVariable ( const char *  name,
const char *  value 
)

Definition at line 292 of file baseapi.cpp.

292  {
293  if (tesseract_ == nullptr) tesseract_ = new Tesseract;
295  tesseract_->params());
296 }
static bool SetParam(const char *name, const char *value, SetParamConstraint constraint, ParamsVectors *member_params)
Definition: params.cpp:79

◆ SetDictFunc()

void tesseract::TessBaseAPI::SetDictFunc ( DictFunc  f)

Sets Dict::letter_is_okay_ function to point to the given function.

Definition at line 1964 of file baseapi.cpp.

1964  {
1965  if (tesseract_ != nullptr) {
1967  }
1968 }
int(Dict::* letter_is_okay_)(void *void_dawg_args, const UNICHARSET &unicharset, UNICHAR_ID unichar_id, bool word_end) const
Definition: dict.h:372

◆ SetFillLatticeFunc()

void tesseract::TessBaseAPI::SetFillLatticeFunc ( FillLatticeFunc  f)

Sets Wordrec::fill_lattice_ function to point to the given function.

Definition at line 1991 of file baseapi.cpp.

1991  {
1992  if (tesseract_ != nullptr) tesseract_->fill_lattice_ = f;
1993 }
void(Wordrec::* fill_lattice_)(const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)
Definition: wordrec.h:480

◆ SetImage() [1/2]

void tesseract::TessBaseAPI::SetImage ( const unsigned char *  imagedata,
int  width,
int  height,
int  bytes_per_pixel,
int  bytes_per_line 
)

Provide an image for Tesseract to recognize. Format is as TesseractRect above. Copies the image buffer and converts to Pix. SetImage clears all recognition results, and sets the rectangle to the full image, so it may be followed immediately by a GetUTF8Text, and it will automatically perform recognition.

Definition at line 580 of file baseapi.cpp.

582  {
583  if (InternalSetImage()) {
584  thresholder_->SetImage(imagedata, width, height,
585  bytes_per_pixel, bytes_per_line);
587  }
588 }
TESS_LOCAL bool InternalSetImage()
Definition: baseapi.cpp:1997
void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
Definition: thresholder.cpp:65

◆ SetImage() [2/2]

void tesseract::TessBaseAPI::SetImage ( Pix *  pix)

Provide an image for Tesseract to recognize. As with SetImage above, Tesseract takes its own copy of the image, so it need not persist until after Recognize. Pix vs raw, which to use? Use Pix where possible. Tesseract uses Pix as its internal representation and it is therefore more efficient to provide a Pix directly.

Definition at line 605 of file baseapi.cpp.

605  {
606  if (InternalSetImage()) {
607  if (pixGetSpp(pix) == 4 && pixGetInputFormat(pix) == IFF_PNG) {
608  // remove alpha channel from png
609  Pix* p1 = pixRemoveAlpha(pix);
610  pixSetSpp(p1, 3);
611  (void)pixCopy(pix, p1);
612  pixDestroy(&p1);
613  }
614  thresholder_->SetImage(pix);
616  }
617 }

◆ SetInputImage()

void tesseract::TessBaseAPI::SetInputImage ( Pix *  pix)

Definition at line 956 of file baseapi.cpp.

956 { tesseract_->set_pix_original(pix); }
void set_pix_original(Pix *original_pix)

◆ SetInputName()

void tesseract::TessBaseAPI::SetInputName ( const char *  name)

Set the name of the input file. Needed for training and reading a UNLV zone file, and for searchable PDF output.

Set the name of the input file. Needed only for training and loading a UNLV zone file.

Definition at line 271 of file baseapi.cpp.

271  {
272  if (input_file_ == nullptr)
273  input_file_ = new STRING(name);
274  else
275  *input_file_ = name;
276 }

◆ SetOutputName()

void tesseract::TessBaseAPI::SetOutputName ( const char *  name)

Set the name of the bonus output files. Needed only for debugging.

Set the name of the output files. Needed only for debugging.

Definition at line 279 of file baseapi.cpp.

279  {
280  if (output_file_ == nullptr)
281  output_file_ = new STRING(name);
282  else
283  *output_file_ = name;
284 }

◆ SetPageSegMode()

void tesseract::TessBaseAPI::SetPageSegMode ( PageSegMode  mode)

Set the current page segmentation mode. Defaults to PSM_SINGLE_BLOCK. The mode is stored as an IntParam so it can also be modified by ReadConfigFile or SetVariable("tessedit_pageseg_mode", mode as string).

Set the current page segmentation mode. Defaults to PSM_AUTO. The mode is stored as an IntParam so it can also be modified by ReadConfigFile or SetVariable("tessedit_pageseg_mode", mode as string).

Definition at line 515 of file baseapi.cpp.

515  {
516  if (tesseract_ == nullptr)
517  tesseract_ = new Tesseract;
518  tesseract_->tessedit_pageseg_mode.set_value(mode);
519 }

◆ SetProbabilityInContextFunc()

void tesseract::TessBaseAPI::SetProbabilityInContextFunc ( ProbabilityInContextFunc  f)

Sets Dict::probability_in_context_ function to point to the given function.

Sets Dict::probability_in_context_ function to point to the given function.

Parameters
fA single function that returns the probability of the current "character" (in general a utf-8 string), given the context of a previous utf-8 string.

Definition at line 1978 of file baseapi.cpp.

1978  {
1979  if (tesseract_ != nullptr) {
1981  // Set it for the sublangs too.
1982  int num_subs = tesseract_->num_sub_langs();
1983  for (int i = 0; i < num_subs; ++i) {
1985  }
1986  }
1987 }
double(Dict::* probability_in_context_)(const char *lang, const char *context, int context_bytes, const char *character, int character_bytes)
Probability in context function used by the ngram permuter.
Definition: dict.h:384

◆ SetRectangle()

void tesseract::TessBaseAPI::SetRectangle ( int  left,
int  top,
int  width,
int  height 
)

Restrict recognition to a sub-rectangle of the image. Call after SetImage. Each SetRectangle clears the recogntion results so multiple rectangles can be recognized with the same image.

Definition at line 624 of file baseapi.cpp.

624  {
625  if (thresholder_ == nullptr)
626  return;
627  thresholder_->SetRectangle(left, top, width, height);
628  ClearResults();
629 }
void SetRectangle(int left, int top, int width, int height)

◆ SetSourceResolution()

void tesseract::TessBaseAPI::SetSourceResolution ( int  ppi)

Set the resolution of the source image in pixels per inch so font size information can be calculated in results. Call this after SetImage().

Definition at line 590 of file baseapi.cpp.

590  {
591  if (thresholder_)
593  else
594  tprintf("Please call SetImage before SetSourceResolution.\n");
595 }
void SetSourceYResolution(int ppi)
Definition: thresholder.h:85

◆ SetThresholder()

void tesseract::TessBaseAPI::SetThresholder ( ImageThresholder thresholder)
inline

In extreme cases only, usually with a subclass of Thresholder, it is possible to provide a different Thresholder. The Thresholder may be preloaded with an image, settings etc, or they may be set after. Note that Tesseract takes ownership of the Thresholder and will delete it when it it is replaced or the API is destructed.

Definition at line 365 of file baseapi.h.

365  {
366  delete thresholder_;
367  thresholder_ = thresholder;
368  ClearResults();
369  }

◆ SetVariable()

bool tesseract::TessBaseAPI::SetVariable ( const char *  name,
const char *  value 
)

Set the value of an internal "parameter." Supply the name of the parameter and the value as a string, just as you would in a config file. Returns false if the name lookup failed. Eg SetVariable("tessedit_char_blacklist", "xyz"); to ignore x, y and z. Or SetVariable("classify_bln_numeric_mode", "1"); to set numeric-only mode. SetVariable may be used before Init, but settings will revert to defaults on End().

Note: Must be called after Init(). Only works for non-init variables (init variables should be passed to Init()).

Definition at line 286 of file baseapi.cpp.

286  {
287  if (tesseract_ == nullptr) tesseract_ = new Tesseract;
289  tesseract_->params());
290 }

◆ tesseract()

Tesseract* tesseract::TessBaseAPI::tesseract ( ) const
inline

Definition at line 801 of file baseapi.h.

801 { return tesseract_; }

◆ TesseractExtractResult()

int tesseract::TessBaseAPI::TesseractExtractResult ( char **  text,
int **  lengths,
float **  costs,
int **  x0,
int **  y0,
int **  x1,
int **  y1,
PAGE_RES page_res 
)
staticprotected

Extract the OCR results, costs (penalty points for uncertainty), and the bounding boxes of the characters.

Definition at line 2555 of file baseapi.cpp.

2562  {
2563  TESS_CHAR_LIST tess_chars;
2564  TESS_CHAR_IT tess_chars_it(&tess_chars);
2565  extract_result(&tess_chars_it, page_res);
2566  tess_chars_it.move_to_first();
2567  int n = tess_chars.length();
2568  int text_len = 0;
2569  *lengths = new int[n];
2570  *costs = new float[n];
2571  *x0 = new int[n];
2572  *y0 = new int[n];
2573  *x1 = new int[n];
2574  *y1 = new int[n];
2575  int i = 0;
2576  for (tess_chars_it.mark_cycle_pt();
2577  !tess_chars_it.cycled_list();
2578  tess_chars_it.forward(), i++) {
2579  TESS_CHAR *tc = tess_chars_it.data();
2580  text_len += (*lengths)[i] = tc->length;
2581  (*costs)[i] = tc->cost;
2582  (*x0)[i] = tc->box.left();
2583  (*y0)[i] = tc->box.bottom();
2584  (*x1)[i] = tc->box.right();
2585  (*y1)[i] = tc->box.top();
2586  }
2587  char *p = *text = new char[text_len];
2588 
2589  tess_chars_it.move_to_first();
2590  for (tess_chars_it.mark_cycle_pt();
2591  !tess_chars_it.cycled_list();
2592  tess_chars_it.forward()) {
2593  TESS_CHAR *tc = tess_chars_it.data();
2594  strncpy(p, tc->unicode_repr, tc->length);
2595  p += tc->length;
2596  }
2597  return n;
2598 }

◆ TesseractRect()

char * tesseract::TessBaseAPI::TesseractRect ( const unsigned char *  imagedata,
int  bytes_per_pixel,
int  bytes_per_line,
int  left,
int  top,
int  width,
int  height 
)

Recognize a rectangle from an image and return the result as a string. May be called many times for a single Init. Currently has no error checking. Greyscale of 8 and color of 24 or 32 bits per pixel may be given. Palette color images will not work properly and must be converted to 24 bit. Binary images of 1 bit per pixel may also be given but they must be byte packed with the MSB of the first byte being the first pixel, and a 1 represents WHITE. For binary images set bytes_per_pixel=0. The recognized text is returned as a char* which is coded as UTF8 and must be freed with the delete [] operator.

Note that TesseractRect is the simplified convenience interface. For advanced uses, use SetImage, (optionally) SetRectangle, Recognize, and one or more of the Get*Text functions below.

Recognize a rectangle from an image and return the result as a string. May be called many times for a single Init. Currently has no error checking. Greyscale of 8 and color of 24 or 32 bits per pixel may be given. Palette color images will not work properly and must be converted to 24 bit. Binary images of 1 bit per pixel may also be given but they must be byte packed with the MSB of the first byte being the first pixel, and a one pixel is WHITE. For binary images set bytes_per_pixel=0. The recognized text is returned as a char* which is coded as UTF8 and must be freed with the delete [] operator.

Definition at line 542 of file baseapi.cpp.

546  {
547  if (tesseract_ == nullptr || width < kMinRectSize || height < kMinRectSize)
548  return nullptr; // Nothing worth doing.
549 
550  // Since this original api didn't give the exact size of the image,
551  // we have to invent a reasonable value.
552  int bits_per_pixel = bytes_per_pixel == 0 ? 1 : bytes_per_pixel * 8;
553  SetImage(imagedata, bytes_per_line * 8 / bits_per_pixel, height + top,
554  bytes_per_pixel, bytes_per_line);
555  SetRectangle(left, top, width, height);
556 
557  return GetUTF8Text();
558 }
const int kMinRectSize
Definition: baseapi.cpp:104
void SetRectangle(int left, int top, int width, int height)
Definition: baseapi.cpp:624

◆ TextLength()

int tesseract::TessBaseAPI::TextLength ( int *  blob_count)
protected

Return the length of the output text string, as UTF8, assuming one newline per line and one per block, with a terminator, and assuming a single character reject marker for each rejected character. Also return the number of recognized blobs in blob_count.

Return the length of the output text string, as UTF8, assuming liberally two spacing marks after each word (as paragraphs end with two newlines), and assuming a single character reject marker for each rejected character. Also return the number of recognized blobs in blob_count.

Definition at line 2169 of file baseapi.cpp.

2169  {
2170  if (tesseract_ == nullptr || page_res_ == nullptr)
2171  return 0;
2172 
2173  PAGE_RES_IT page_res_it(page_res_);
2174  int total_length = 2;
2175  int total_blobs = 0;
2176  // Iterate over the data structures to extract the recognition result.
2177  for (page_res_it.restart_page(); page_res_it.word () != nullptr;
2178  page_res_it.forward()) {
2179  WERD_RES *word = page_res_it.word();
2180  WERD_CHOICE* choice = word->best_choice;
2181  if (choice != nullptr) {
2182  total_blobs += choice->length() + 2;
2183  total_length += choice->unichar_string().length() + 2;
2184  for (int i = 0; i < word->reject_map.length(); ++i) {
2185  if (word->reject_map[i].rejected())
2186  ++total_length;
2187  }
2188  }
2189  }
2190  if (blob_count != nullptr)
2191  *blob_count = total_blobs;
2192  return total_length;
2193 }
int length() const
Definition: ratngs.h:293
int32_t length() const
Definition: rejctmap.h:223

◆ Threshold()

bool tesseract::TessBaseAPI::Threshold ( Pix **  pix)
protectedvirtual

Run the thresholder to make the thresholded image. If pix is not nullptr, the source is thresholded to pix instead of the internal IMAGE.

Run the thresholder to make the thresholded image, returned in pix, which must not be nullptr. *pix must be initialized to nullptr, or point to an existing pixDestroyable Pix. The usual argument to Threshold is Tesseract::mutable_pix_binary().

Definition at line 2014 of file baseapi.cpp.

2014  {
2015  ASSERT_HOST(pix != nullptr);
2016  if (*pix != nullptr)
2017  pixDestroy(pix);
2018  // Zero resolution messes up the algorithms, so make sure it is credible.
2019  int user_dpi = 0;
2020  GetIntVariable("user_defined_dpi", &user_dpi);
2021  int y_res = thresholder_->GetScaledYResolution();
2022  if (user_dpi && (user_dpi < kMinCredibleResolution ||
2023  user_dpi > kMaxCredibleResolution)) {
2024  tprintf("Warning: User defined image dpi is outside of expected range "
2025  "(%d - %d)!\n",
2027  }
2028  // Always use user defined dpi
2029  if (user_dpi) {
2031  } else if (y_res < kMinCredibleResolution ||
2032  y_res > kMaxCredibleResolution) {
2033  tprintf("Warning: Invalid resolution %d dpi. Using %d instead.\n",
2034  y_res, kMinCredibleResolution);
2036  }
2037  auto pageseg_mode =
2038  static_cast<PageSegMode>(
2039  static_cast<int>(tesseract_->tessedit_pageseg_mode));
2040  if (!thresholder_->ThresholdToPix(pageseg_mode, pix)) return false;
2044  if (!thresholder_->IsBinary()) {
2047  } else {
2048  tesseract_->set_pix_thresholds(nullptr);
2049  tesseract_->set_pix_grey(nullptr);
2050  }
2051  // Set the internal resolution that is used for layout parameters from the
2052  // estimated resolution, rather than the image resolution, which may be
2053  // fabricated, but we will use the image resolution, if there is one, to
2054  // report output point sizes.
2055  int estimated_res = ClipToRange(thresholder_->GetScaledEstimatedResolution(),
2058  if (estimated_res != thresholder_->GetScaledEstimatedResolution()) {
2059  tprintf("Estimated internal resolution %d out of range! "
2060  "Corrected to %d.\n",
2061  thresholder_->GetScaledEstimatedResolution(), estimated_res);
2062  }
2063  tesseract_->set_source_resolution(estimated_res);
2064  return true;
2065 }
constexpr int kMinCredibleResolution
Definition: publictypes.h:38
constexpr int kMaxCredibleResolution
Definition: publictypes.h:40
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
Definition: helpers.h:108
void set_pix_grey(Pix *grey_pix)
void set_pix_thresholds(Pix *thresholds)
int GetScaledEstimatedResolution() const
Definition: thresholder.h:105
virtual Pix * GetPixRectGrey()
virtual bool ThresholdToPix(PageSegMode pageseg_mode, Pix **pix)
Returns false on error.
virtual void GetImageSizes(int *left, int *top, int *width, int *height, int *imagewidth, int *imageheight)
virtual Pix * GetPixRectThresholds()
bool IsBinary() const
Returns true if the source image is binary.
Definition: thresholder.h:74

◆ Version()

const char * tesseract::TessBaseAPI::Version ( )
static

Returns the version identifier as a static string. Do not delete.

Definition at line 233 of file baseapi.cpp.

233  {
234  return PACKAGE_VERSION;
235 }

Member Data Documentation

◆ block_list_

BLOCK_LIST* tesseract::TessBaseAPI::block_list_
protected

The page layout.

Definition at line 894 of file baseapi.h.

◆ datapath_

STRING* tesseract::TessBaseAPI::datapath_
protected

Current location of tessdata.

Definition at line 898 of file baseapi.h.

◆ equ_detect_

EquationDetect* tesseract::TessBaseAPI::equ_detect_
protected

The equation detector.

Definition at line 890 of file baseapi.h.

◆ image_height_

int tesseract::TessBaseAPI::image_height_
protected

Definition at line 914 of file baseapi.h.

◆ image_width_

int tesseract::TessBaseAPI::image_width_
protected

Definition at line 913 of file baseapi.h.

◆ input_file_

STRING* tesseract::TessBaseAPI::input_file_
protected

Name used by training code.

Definition at line 896 of file baseapi.h.

◆ language_

STRING* tesseract::TessBaseAPI::language_
protected

Last initialized language.

Definition at line 899 of file baseapi.h.

◆ last_oem_requested_

OcrEngineMode tesseract::TessBaseAPI::last_oem_requested_
protected

Last ocr language mode requested.

Definition at line 900 of file baseapi.h.

◆ osd_tesseract_

Tesseract* tesseract::TessBaseAPI::osd_tesseract_
protected

For orientation & script detection.

Definition at line 889 of file baseapi.h.

◆ output_file_

STRING* tesseract::TessBaseAPI::output_file_
protected

Name used by debug code.

Definition at line 897 of file baseapi.h.

◆ page_res_

PAGE_RES* tesseract::TessBaseAPI::page_res_
protected

The page-level data.

Definition at line 895 of file baseapi.h.

◆ paragraph_models_

GenericVector<ParagraphModel *>* tesseract::TessBaseAPI::paragraph_models_
protected

Definition at line 893 of file baseapi.h.

◆ reader_

FileReader tesseract::TessBaseAPI::reader_
protected

Reads files from any filesystem.

Definition at line 891 of file baseapi.h.

◆ recognition_done_

bool tesseract::TessBaseAPI::recognition_done_
protected

page_res_ contains recognition data.

Definition at line 901 of file baseapi.h.

◆ rect_height_

int tesseract::TessBaseAPI::rect_height_
protected

Definition at line 912 of file baseapi.h.

◆ rect_left_

int tesseract::TessBaseAPI::rect_left_
protected

Definition at line 909 of file baseapi.h.

◆ rect_top_

int tesseract::TessBaseAPI::rect_top_
protected

Definition at line 910 of file baseapi.h.

◆ rect_width_

int tesseract::TessBaseAPI::rect_width_
protected

Definition at line 911 of file baseapi.h.

◆ tesseract_

Tesseract* tesseract::TessBaseAPI::tesseract_
protected

The underlying data object.

Definition at line 888 of file baseapi.h.

◆ thresholder_

ImageThresholder* tesseract::TessBaseAPI::thresholder_
protected

Image thresholding module.

Definition at line 892 of file baseapi.h.

◆ truth_cb_

TruthCallback* tesseract::TessBaseAPI::truth_cb_
protected

Definition at line 902 of file baseapi.h.


The documentation for this class was generated from the following files: