183 std::deque<std::tuple<int, int>> best_choices;
184 ExtractBestPaths(&best_nodes, &second_nodes);
186 DebugPath(unicharset, best_nodes);
187 ExtractPathAsUnicharIds(second_nodes, &unichar_ids, &certs, &ratings,
189 tprintf(
"\nSecond choice path:\n");
190 DebugUnicharPath(unicharset, second_nodes, unichar_ids, certs, ratings,
196 if (lstm_choice_mode == 2) {
197 ExtractPathAsUnicharIds(best_nodes, &unichar_ids, &certs, &ratings,
198 &xcoords, &best_choices);
199 if (best_choices.size() > 0) {
200 timestepEnd = std::get<1>(best_choices.front());
201 best_choices.pop_front();
204 ExtractPathAsUnicharIds(best_nodes, &unichar_ids, &certs, &ratings,
207 int num_ids = unichar_ids.
size();
209 DebugUnicharPath(unicharset, best_nodes, unichar_ids, certs, ratings,
214 float prev_space_cert = 0.0f;
215 for (
int word_start = 0; word_start < num_ids; word_start = word_end) {
216 for (word_end = word_start + 1; word_end < num_ids; ++word_end) {
221 int index = xcoords[word_end];
222 if (best_nodes[index]->start_of_word)
break;
228 float space_cert = 0.0f;
229 if (word_end < num_ids && unichar_ids[word_end] ==
UNICHAR_SPACE)
230 space_cert = certs[word_end];
232 word_start > 0 && unichar_ids[word_start - 1] ==
UNICHAR_SPACE;
234 WERD_RES* word_res = InitializeWord(
235 leading_space, line_box, word_start, word_end,
236 std::min(space_cert, prev_space_cert), unicharset, xcoords, scale_factor);
237 if (lstm_choice_mode == 1) {
238 for (
size_t i = timestepEnd; i < xcoords[word_end]; i++) {
241 timestepEnd = xcoords[word_end];
242 }
else if (lstm_choice_mode == 2){
245 std::vector<std::pair<const char*, float>> choice_pairs;
246 for (
size_t i = timestepEnd; i < xcoords[word_end]; i++) {
247 for (std::pair<const char*, float> choice :
timesteps[i]) {
248 if (std::strcmp(choice.first,
"")) {
249 sum += choice.second;
250 choice_pairs.push_back(choice);
253 if ((best_choices.size() > 0 && i == std::get<1>(best_choices.front()) - 1)
254 || i == xcoords[word_end]-1) {
255 std::map<const char*, float> summed_propabilities;
256 for (
auto & choice_pair : choice_pairs) {
257 summed_propabilities[choice_pair.first] += choice_pair.second;
259 std::vector<std::pair<const char*, float>> accumulated_timestep;
260 for (
auto& summed_propability : summed_propabilities) {
262 summed_propability.second/=sum;
264 while (accumulated_timestep.size() > pos
265 && accumulated_timestep[pos].second > summed_propability.second) {
268 accumulated_timestep.insert(accumulated_timestep.begin() + pos,
269 std::pair<const char*,float>(summed_propability.first,
270 summed_propability.second));
272 if (best_choices.size() > 0) {
273 best_choices.pop_front();
275 choice_pairs.clear();
276 word_res->
timesteps.push_back(accumulated_timestep);
280 timestepEnd = xcoords[word_end];
282 for (
int i = word_start; i < word_end; ++i) {
283 auto* choices =
new BLOB_CHOICE_LIST;
284 BLOB_CHOICE_IT bc_it(choices);
286 unichar_ids[i], ratings[i], certs[i], -1, 1.0f,
288 int col = i - word_start;
289 choice->set_matrix_cell(col, col);
290 bc_it.add_after_then_move(choice);
293 int index = xcoords[word_end - 1];
296 prev_space_cert = space_cert;
297 if (word_end < num_ids && unichar_ids[word_end] ==
UNICHAR_SPACE)
void put(ICOORD pos, const T &thing)
std::vector< std::vector< std::pair< const char *, float > > > timesteps
void FakeWordFromRatings(PermuterType permuter)
bool IsSpaceDelimited(UNICHAR_ID unichar_id) const