tesseract  4.1.1
tesseract::DocumentData Class Reference

#include <imagedata.h>

Public Member Functions

 DocumentData (const STRING &name)
 
 ~DocumentData ()
 
bool LoadDocument (const char *filename, int start_page, int64_t max_memory, FileReader reader)
 
void SetDocument (const char *filename, int64_t max_memory, FileReader reader)
 
bool SaveDocument (const char *filename, FileWriter writer)
 
bool SaveToBuffer (GenericVector< char > *buffer)
 
void AddPageToDocument (ImageData *page)
 
const STRINGdocument_name () const
 
int NumPages () const
 
size_t PagesSize () const
 
int64_t memory_used () const
 
void LoadPageInBackground (int index)
 
const ImageDataGetPage (int index)
 
bool IsPageAvailable (int index, ImageData **page)
 
ImageDataTakePage (int index)
 
bool IsCached () const
 
int64_t UnCache ()
 
void Shuffle ()
 

Friends

void * ReCachePagesFunc (void *data)
 

Detailed Description

Definition at line 209 of file imagedata.h.

Constructor & Destructor Documentation

◆ DocumentData()

tesseract::DocumentData::DocumentData ( const STRING name)
explicit

Definition at line 383 of file imagedata.cpp.

384  : document_name_(name),
385  pages_offset_(-1),
386  total_pages_(-1),
387  memory_used_(0),
388  max_memory_(0),
389  reader_(nullptr) {}

◆ ~DocumentData()

tesseract::DocumentData::~DocumentData ( )

Definition at line 391 of file imagedata.cpp.

391  {
392  SVAutoLock lock_p(&pages_mutex_);
393  SVAutoLock lock_g(&general_mutex_);
394 }

Member Function Documentation

◆ AddPageToDocument()

void tesseract::DocumentData::AddPageToDocument ( ImageData page)

Definition at line 435 of file imagedata.cpp.

435  {
436  SVAutoLock lock(&pages_mutex_);
437  pages_.push_back(page);
438  set_memory_used(memory_used() + page->MemoryUsed());
439 }

◆ document_name()

const STRING& tesseract::DocumentData::document_name ( ) const
inline

Definition at line 229 of file imagedata.h.

229  {
230  SVAutoLock lock(&general_mutex_);
231  return document_name_;
232  }

◆ GetPage()

const ImageData * tesseract::DocumentData::GetPage ( int  index)

Definition at line 455 of file imagedata.cpp.

455  {
456  ImageData* page = nullptr;
457  while (!IsPageAvailable(index, &page)) {
458  // If there is no background load scheduled, schedule one now.
459  pages_mutex_.Lock();
460  bool needs_loading = pages_offset_ != index;
461  pages_mutex_.Unlock();
462  if (needs_loading) LoadPageInBackground(index);
463  // We can't directly load the page, or the background load will delete it
464  // while the caller is using it, so give it a chance to work.
465 #if defined(__MINGW32__)
466  sleep(1);
467 #else
468  std::this_thread::sleep_for(std::chrono::seconds(1));
469 #endif
470  }
471  return page;
472 }

◆ IsCached()

bool tesseract::DocumentData::IsCached ( ) const
inline

Definition at line 272 of file imagedata.h.

272 { return NumPages() >= 0; }

◆ IsPageAvailable()

bool tesseract::DocumentData::IsPageAvailable ( int  index,
ImageData **  page 
)

Definition at line 477 of file imagedata.cpp.

477  {
478  SVAutoLock lock(&pages_mutex_);
479  int num_pages = NumPages();
480  if (num_pages == 0 || index < 0) {
481  *page = nullptr; // Empty Document.
482  return true;
483  }
484  if (num_pages > 0) {
485  index = Modulo(index, num_pages);
486  if (pages_offset_ <= index && index < pages_offset_ + pages_.size()) {
487  *page = pages_[index - pages_offset_]; // Page is available already.
488  return true;
489  }
490  }
491  return false;
492 }

◆ LoadDocument()

bool tesseract::DocumentData::LoadDocument ( const char *  filename,
int  start_page,
int64_t  max_memory,
FileReader  reader 
)

Definition at line 398 of file imagedata.cpp.

399  {
400  SetDocument(filename, max_memory, reader);
401  pages_offset_ = start_page;
402  return ReCachePages();
403 }

◆ LoadPageInBackground()

void tesseract::DocumentData::LoadPageInBackground ( int  index)

Definition at line 443 of file imagedata.cpp.

443  {
444  ImageData* page = nullptr;
445  if (IsPageAvailable(index, &page)) return;
446  SVAutoLock lock(&pages_mutex_);
447  if (pages_offset_ == index) return;
448  pages_offset_ = index;
449  pages_.clear();
451 }

◆ memory_used()

int64_t tesseract::DocumentData::memory_used ( ) const
inline

Definition at line 240 of file imagedata.h.

240  {
241  SVAutoLock lock(&general_mutex_);
242  return memory_used_;
243  }

◆ NumPages()

int tesseract::DocumentData::NumPages ( ) const
inline

Definition at line 233 of file imagedata.h.

233  {
234  SVAutoLock lock(&general_mutex_);
235  return total_pages_;
236  }

◆ PagesSize()

size_t tesseract::DocumentData::PagesSize ( ) const
inline

Definition at line 237 of file imagedata.h.

237  {
238  return pages_.size();
239  }

◆ SaveDocument()

bool tesseract::DocumentData::SaveDocument ( const char *  filename,
FileWriter  writer 
)

Definition at line 417 of file imagedata.cpp.

417  {
418  SVAutoLock lock(&pages_mutex_);
419  TFile fp;
420  fp.OpenWrite(nullptr);
421  if (!pages_.Serialize(&fp) || !fp.CloseWrite(filename, writer)) {
422  tprintf("Serialize failed: %s\n", filename);
423  return false;
424  }
425  return true;
426 }

◆ SaveToBuffer()

bool tesseract::DocumentData::SaveToBuffer ( GenericVector< char > *  buffer)

Definition at line 427 of file imagedata.cpp.

427  {
428  SVAutoLock lock(&pages_mutex_);
429  TFile fp;
430  fp.OpenWrite(buffer);
431  return pages_.Serialize(&fp);
432 }

◆ SetDocument()

void tesseract::DocumentData::SetDocument ( const char *  filename,
int64_t  max_memory,
FileReader  reader 
)

Definition at line 406 of file imagedata.cpp.

407  {
408  SVAutoLock lock_p(&pages_mutex_);
409  SVAutoLock lock(&general_mutex_);
410  document_name_ = filename;
411  pages_offset_ = -1;
412  max_memory_ = max_memory;
413  reader_ = reader;
414 }

◆ Shuffle()

void tesseract::DocumentData::Shuffle ( )

Definition at line 509 of file imagedata.cpp.

509  {
510  TRand random;
511  // Different documents get shuffled differently, but the same for the same
512  // name.
513  random.set_seed(document_name_.string());
514  int num_pages = pages_.size();
515  // Execute one random swap for each page in the document.
516  for (int i = 0; i < num_pages; ++i) {
517  int src = random.IntRand() % num_pages;
518  int dest = random.IntRand() % num_pages;
519  std::swap(pages_[src], pages_[dest]);
520  }
521 }

◆ TakePage()

ImageData* tesseract::DocumentData::TakePage ( int  index)
inline

Definition at line 264 of file imagedata.h.

264  {
265  SVAutoLock lock(&pages_mutex_);
266  ImageData* page = pages_[index];
267  pages_[index] = nullptr;
268  return page;
269  }

◆ UnCache()

int64_t tesseract::DocumentData::UnCache ( )

Definition at line 496 of file imagedata.cpp.

496  {
497  SVAutoLock lock(&pages_mutex_);
498  int64_t memory_saved = memory_used();
499  pages_.clear();
500  pages_offset_ = -1;
501  set_total_pages(-1);
502  set_memory_used(0);
503  tprintf("Unloaded document %s, saving %" PRId64 " memory\n",
504  document_name_.string(), memory_saved);
505  return memory_saved;
506 }

Friends And Related Function Documentation

◆ ReCachePagesFunc

void* ReCachePagesFunc ( void *  data)
friend

Definition at line 377 of file imagedata.cpp.

377  {
378  auto* document_data = static_cast<DocumentData*>(data);
379  document_data->ReCachePages();
380  return nullptr;
381 }

The documentation for this class was generated from the following files:
SVMutex::Unlock
void Unlock()
Unlocks on a mutex.
Definition: svutil.cpp:72
STRING::string
const char * string() const
Definition: strngs.cpp:194
tesstrain_utils.dest
dest
Definition: tesstrain_utils.py:139
tesseract::DocumentData::LoadPageInBackground
void LoadPageInBackground(int index)
Definition: imagedata.cpp:443
tprintf
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
tesseract::DocumentData::ReCachePagesFunc
friend void * ReCachePagesFunc(void *data)
Definition: imagedata.cpp:377
tesseract::DocumentData::SetDocument
void SetDocument(const char *filename, int64_t max_memory, FileReader reader)
Definition: imagedata.cpp:406
tesseract::DocumentData::memory_used
int64_t memory_used() const
Definition: imagedata.h:240
tesseract::DocumentData::DocumentData
DocumentData(const STRING &name)
Definition: imagedata.cpp:383
SVSync::StartThread
static void StartThread(void *(*func)(void *), void *arg)
Create new thread.
Definition: svutil.cpp:81
tesseract::DocumentData::IsPageAvailable
bool IsPageAvailable(int index, ImageData **page)
Definition: imagedata.cpp:477
Modulo
int Modulo(int a, int b)
Definition: helpers.h:158
SVMutex::Lock
void Lock()
Locks on a mutex.
Definition: svutil.cpp:64
tesseract::DocumentData::NumPages
int NumPages() const
Definition: imagedata.h:233
SVAutoLock
Definition: svutil.h:86