Branch Coverage

morphodita/morphodita.cpp
Criterion Covered Total %
branch 1065 5220 20.4


line true false branch
106 0 0 return a.len == b.len && memcmp(a.str, b.str, a.len) == 0;
0 0 return a.len == b.len && memcmp(a.str, b.str, a.len) == 0;
0 0 return a.len == b.len && memcmp(a.str, b.str, a.len) == 0;
0 0 return a.len == b.len && memcmp(a.str, b.str, a.len) == 0;
0 0 return a.len == b.len && memcmp(a.str, b.str, a.len) == 0;
0 0 return a.len == b.len && memcmp(a.str, b.str, a.len) == 0;
0 0 return a.len == b.len && memcmp(a.str, b.str, a.len) == 0;
0 0 return a.len == b.len && memcmp(a.str, b.str, a.len) == 0;
376 0 0 format_tagged_lemma(result);
381 0 0 for (auto&& lemma : lemmas)
384 0 0 if (lemmas.size() > 1)
392 0 0 if (converter) converter->convert(lemma);
396 0 0 if (converter) converter->convert_analyzed(lemmas);
409 0 0 for (derivated_lemma parent; derinet->parent(lemma.lemma, parent); )
0 0 for (derivated_lemma parent; derinet->parent(lemma.lemma, parent); )
411 0 0 if (converter) converter->convert(lemma);
419 0 0 return derinet ? new root_derivation_formatter(derinet) : nullptr;
0 0 return derinet ? new root_derivation_formatter(derinet) : nullptr;
428 0 0 if (converter) converter->convert(lemma);
0 0 if (converter) converter->convert(lemma);
429 0 0 for (derivated_lemma parent; derinet->parent(current.lemma, parent); current.lemma.swap(parent.lemma)) {
0 0 for (derivated_lemma parent; derinet->parent(current.lemma, parent); current.lemma.swap(parent.lemma)) {
430 0 0 tagged_lemma parrent_lemma(parent.lemma, current.tag);
431 0 0 if (converter) converter->convert(parrent_lemma);
0 0 if (converter) converter->convert(parrent_lemma);
432 0 0 lemma.lemma.append(" ").append(parrent_lemma.lemma);
441 0 0 return derinet ? new path_derivation_formatter(derinet) : nullptr;
0 0 return derinet ? new path_derivation_formatter(derinet) : nullptr;
450 0 0 if (converter) converter->convert(lemma);
0 0 if (converter) converter->convert(lemma);
451 0 0 for (derivated_lemma parent; derinet->parent(root, parent); root.swap(parent.lemma)) {}
0 0 for (derivated_lemma parent; derinet->parent(root, parent); root.swap(parent.lemma)) {}
452 0 0 format_tree(root, tag, lemma, converter);
458 0 0 if (converter) {
459 0 0 tagged_lemma current(root, tag);
460 0 0 converter->convert(current);
461 0 0 tree.lemma.append(" ").append(current.lemma);
463 0 0 tree.lemma.append(" ").append(root);
466 0 0 if (derinet->children(root, children))
0 0 if (derinet->children(root, children))
467 0 0 for (auto&& child : children)
468 0 0 format_tree(child.lemma, tag, tree, converter);
469 0 0 tree.lemma.push_back(' ');
477 0 0 return derinet ? new tree_derivation_formatter(derinet) : nullptr;
0 0 return derinet ? new tree_derivation_formatter(derinet) : nullptr;
481 0 0 if (name == "none") return new_none_derivation_formatter();
482 0 0 if (name == "root") return new_root_derivation_formatter(derinet);
483 0 0 if (name == "path") return new_path_derivation_formatter(derinet);
484 0 0 if (name == "tree") return new_tree_derivation_formatter(derinet);
510 0 0 while (len--)
0 0 while (len--)
0 0 while (len--)
0 0 while (len--)
0 0 while (len--)
0 0 while (len--)
0 0 while (len--)
0 0 while (len--)
0 0 while (len--)
127 16 while (len--)
0 0 while (len--)
0 0 while (len--)
0 0 while (len--)
0 0 while (len--)
53 12 while (len--)
146 30 while (len--)
24 4 while (len--)
68 6 while (len--)
0 0 while (len--)
0 0 while (len--)
0 0 while (len--)
0 0 while (len--)
0 0 while (len--)
0 0 while (len--)
0 0 while (len--)
511 0 0 if (*a++ != *b++)
0 0 if (*a++ != *b++)
0 0 if (*a++ != *b++)
0 0 if (*a++ != *b++)
0 0 if (*a++ != *b++)
0 0 if (*a++ != *b++)
0 0 if (*a++ != *b++)
0 0 if (*a++ != *b++)
0 0 if (*a++ != *b++)
47 80 if (*a++ != *b++)
0 0 if (*a++ != *b++)
0 0 if (*a++ != *b++)
0 0 if (*a++ != *b++)
0 0 if (*a++ != *b++)
39 14 if (*a++ != *b++)
137 9 if (*a++ != *b++)
24 0 if (*a++ != *b++)
54 14 if (*a++ != *b++)
0 0 if (*a++ != *b++)
0 0 if (*a++ != *b++)
0 0 if (*a++ != *b++)
0 0 if (*a++ != *b++)
0 0 if (*a++ != *b++)
0 0 if (*a++ != *b++)
0 0 if (*a++ != *b++)
520 0 0 while (len--)
346 158 while (len--)
0 0 while (len--)
0 0 while (len--)
8 48 while (len--)
0 0 while (len--)
0 0 while (len--)
0 0 while (len--)
0 0 while (len--)
0 0 while (len--)
112 18 while (len--)
545 0 0 explicit binary_decoder_error(const char* description) : runtime_error(description) {}
0 0 explicit binary_decoder_error(const char* description) : runtime_error(description) {}
0 0 explicit binary_decoder_error(const char* description) : runtime_error(description) {}
0 0 explicit binary_decoder_error(const char* description) : runtime_error(description) {}
0 0 explicit binary_decoder_error(const char* description) : runtime_error(description) {}
0 0 explicit binary_decoder_error(const char* description) : runtime_error(description) {}
548 0 0 class binary_decoder {
0 0 class binary_decoder {
0 0 class binary_decoder {
0 0 class binary_decoder {
0 0 class binary_decoder {
0 0 class binary_decoder {
0 0 class binary_decoder {
0 0 class binary_decoder {
573 4 0 buffer.resize(len);
581 0 463 if (data + 1 > data_end) throw binary_decoder_error("No more data in binary_decoder");
586 0 30 if (data + sizeof(uint16_t) > data_end) throw binary_decoder_error("No more data in binary_decoder");
594 0 472 if (data + sizeof(uint32_t) > data_end) throw binary_decoder_error("No more data in binary_decoder");
608 0 140 if (data + sizeof(T) * elements > data_end) throw binary_decoder_error("No more data in binary_decoder");
0 79 if (data + sizeof(T) * elements > data_end) throw binary_decoder_error("No more data in binary_decoder");
623 0 2 if (pos > buffer.size()) throw binary_decoder_error("Cannot seek past end of binary_decoder");
857 11 6 while (size) {
0 0 while (size) {
0 0 while (size) {
859 6 5 if (unaligned_load(first + step) < val) {
0 0 if (unaligned_load(first + step) < val) {
0 0 if (unaligned_load(first + step) < val) {
948 104 32 while (mask < num)
950 32 0 hash.resize(mask + 1);
954 140 0 uint32_t size = data.next_4B();
956 140 0 hash.resize(size);
957 140 0 memcpy(hash.data(), data.next(size), size * sizeof(uint32_t));
959 140 0 size = data.next_4B();
960 140 0 this->data.resize(size);
961 71 69 if (size) memcpy(this->data.data(), data.next(size), size);
71 0 if (size) memcpy(this->data.data(), data.next(size), size);
965 0 0 if (len <= 0) return 0;
0 0 if (len <= 0) return 0;
0 0 if (len <= 0) return 0;
0 0 if (len <= 0) return 0;
0 0 if (len <= 0) return 0;
0 0 if (len <= 0) return 0;
0 0 if (len <= 0) return 0;
0 0 if (len <= 0) return 0;
4 0 if (len <= 0) return 0;
21 0 if (len <= 0) return 0;
21 15 if (len <= 0) return 0;
0 0 if (len <= 0) return 0;
0 0 if (len <= 0) return 0;
0 0 if (len <= 0) return 0;
0 0 if (len <= 0) return 0;
0 0 if (len <= 0) return 0;
0 0 if (len <= 0) return 0;
158 0 if (len <= 0) return 0;
49 0 if (len <= 0) return 0;
18 0 if (len <= 0) return 0;
18 0 if (len <= 0) return 0;
966 0 0 if (len == 1) return unaligned_load(data);
0 0 if (len == 1) return unaligned_load(data);
0 0 if (len == 1) return unaligned_load(data);
0 0 if (len == 1) return unaligned_load(data);
0 0 if (len == 1) return unaligned_load(data);
0 0 if (len == 1) return unaligned_load(data);
0 0 if (len == 1) return unaligned_load(data);
0 0 if (len == 1) return unaligned_load(data);
0 4 if (len == 1) return unaligned_load(data);
3 18 if (len == 1) return unaligned_load(data);
15 6 if (len == 1) return unaligned_load(data);
0 0 if (len == 1) return unaligned_load(data);
0 0 if (len == 1) return unaligned_load(data);
0 0 if (len == 1) return unaligned_load(data);
0 0 if (len == 1) return unaligned_load(data);
0 0 if (len == 1) return unaligned_load(data);
0 0 if (len == 1) return unaligned_load(data);
0 158 if (len == 1) return unaligned_load(data);
9 40 if (len == 1) return unaligned_load(data);
0 18 if (len == 1) return unaligned_load(data);
0 18 if (len == 1) return unaligned_load(data);
967 0 0 if (len == 2) return unaligned_load(data);
0 0 if (len == 2) return unaligned_load(data);
0 0 if (len == 2) return unaligned_load(data);
0 0 if (len == 2) return unaligned_load(data);
0 0 if (len == 2) return unaligned_load(data);
0 0 if (len == 2) return unaligned_load(data);
0 0 if (len == 2) return unaligned_load(data);
0 0 if (len == 2) return unaligned_load(data);
4 0 if (len == 2) return unaligned_load(data);
17 1 if (len == 2) return unaligned_load(data);
0 6 if (len == 2) return unaligned_load(data);
0 0 if (len == 2) return unaligned_load(data);
0 0 if (len == 2) return unaligned_load(data);
0 0 if (len == 2) return unaligned_load(data);
0 0 if (len == 2) return unaligned_load(data);
0 0 if (len == 2) return unaligned_load(data);
0 0 if (len == 2) return unaligned_load(data);
30 128 if (len == 2) return unaligned_load(data);
34 6 if (len == 2) return unaligned_load(data);
18 0 if (len == 2) return unaligned_load(data);
18 0 if (len == 2) return unaligned_load(data);
970 0 0 while (len--)
0 0 while (len--)
0 0 while (len--)
0 0 while (len--)
0 0 while (len--)
0 0 while (len--)
0 0 while (len--)
0 0 while (len--)
24 4 while (len--)
93 17 while (len--)
0 0 while (len--)
0 0 while (len--)
0 0 while (len--)
0 0 while (len--)
0 0 while (len--)
0 0 while (len--)
0 0 while (len--)
90 30 while (len--)
144 34 while (len--)
112 18 while (len--)
112 18 while (len--)
984 0 0 if (unsigned(len) >= hashes.size()) return nullptr;
0 0 if (unsigned(len) >= hashes.size()) return nullptr;
0 0 if (unsigned(len) >= hashes.size()) return nullptr;
0 0 if (unsigned(len) >= hashes.size()) return nullptr;
0 0 if (unsigned(len) >= hashes.size()) return nullptr;
0 0 if (unsigned(len) >= hashes.size()) return nullptr;
0 0 if (unsigned(len) >= hashes.size()) return nullptr;
0 0 if (unsigned(len) >= hashes.size()) return nullptr;
36 0 if (unsigned(len) >= hashes.size()) return nullptr;
0 0 if (unsigned(len) >= hashes.size()) return nullptr;
0 0 if (unsigned(len) >= hashes.size()) return nullptr;
990 0 0 if (len <= 2)
0 0 if (len <= 2)
0 0 if (len <= 2)
0 0 if (len <= 2)
0 0 if (len <= 2)
0 0 if (len <= 2)
0 0 if (len <= 2)
0 0 if (len <= 2)
0 36 if (len <= 2)
0 0 if (len <= 2)
0 0 if (len <= 2)
991 0 0 return data != end ? data + len : nullptr;
0 0 return data != end ? data + len : nullptr;
0 0 return data != end ? data + len : nullptr;
0 0 return data != end ? data + len : nullptr;
0 0 return data != end ? data + len : nullptr;
0 0 return data != end ? data + len : nullptr;
0 0 return data != end ? data + len : nullptr;
0 0 return data != end ? data + len : nullptr;
21 15 return data != end ? data + len : nullptr;
0 0 return data != end ? data + len : nullptr;
0 0 return data != end ? data + len : nullptr;
993 0 0 while (data < end) {
0 0 while (data < end) {
0 0 while (data < end) {
0 0 while (data < end) {
0 0 while (data < end) {
0 0 while (data < end) {
0 0 while (data < end) {
0 0 while (data < end) {
0 0 while (data < end) {
0 0 while (data < end) {
0 0 while (data < end) {
994 0 0 if (small_memeq(str, data, len)) return data + len;
0 0 if (small_memeq(str, data, len)) return data + len;
0 0 if (small_memeq(str, data, len)) return data + len;
0 0 if (small_memeq(str, data, len)) return data + len;
0 0 if (small_memeq(str, data, len)) return data + len;
0 0 if (small_memeq(str, data, len)) return data + len;
0 0 if (small_memeq(str, data, len)) return data + len;
0 0 if (small_memeq(str, data, len)) return data + len;
0 0 if (small_memeq(str, data, len)) return data + len;
0 0 if (small_memeq(str, data, len)) return data + len;
0 0 if (small_memeq(str, data, len)) return data + len;
1005 158 0 if (unsigned(len) >= hashes.size()) return nullptr;
49 71 if (unsigned(len) >= hashes.size()) return nullptr;
1011 30 128 if (len <= 2)
34 15 if (len <= 2)
1012 88 40 return data != end ? (const T*)(data + len) : nullptr;
15 0 return data != end ? (const T*)(data + len) : nullptr;
1014 26 18 while (data < end) {
39 4 while (data < end) {
1015 12 14 if (small_memeq(str, data, len)) return (const T*)(data + len);
30 9 if (small_memeq(str, data, len)) return (const T*)(data + len);
1024 4 0 if (unsigned(len) >= hashes.size()) return;
21 0 if (unsigned(len) >= hashes.size()) return;
0 0 if (unsigned(len) >= hashes.size()) return;
0 0 if (unsigned(len) >= hashes.size()) return;
0 0 if (unsigned(len) >= hashes.size()) return;
0 0 if (unsigned(len) >= hashes.size()) return;
1030 4 4 while (data < end) {
20 21 while (data < end) {
0 0 while (data < end) {
0 0 while (data < end) {
0 0 while (data < end) {
0 0 while (data < end) {
1040 6 2 for (unsigned len = 0; len < hashes.size(); len++) {
0 0 for (unsigned len = 0; len < hashes.size(); len++) {
0 0 for (unsigned len = 0; len < hashes.size(); len++) {
1044 26 6 while (data < end) {
0 0 while (data < end) {
0 0 while (data < end) {
1058 14 0 return unsigned(len) < hashes.size() ? hashes[len].data.data() : nullptr;
0 0 return unsigned(len) < hashes.size() ? hashes[len].data.data() : nullptr;
0 0 return unsigned(len) < hashes.size() ? hashes[len].data.data() : nullptr;
0 0 return unsigned(len) < hashes.size() ? hashes[len].data.data() : nullptr;
0 0 return unsigned(len) < hashes.size() ? hashes[len].data.data() : nullptr;
0 0 return unsigned(len) < hashes.size() ? hashes[len].data.data() : nullptr;
0 0 return unsigned(len) < hashes.size() ? hashes[len].data.data() : nullptr;
1062 4 28 if (hashes.size() == 0) hashes.emplace_back(1);
1063 4 24 else if (hashes.size() == 1) hashes.emplace_back(1<<8);
1064 4 20 else if (hashes.size() == 2) hashes.emplace_back(1<<16);
1069 18 0 if (unsigned(str_len) < hashes.size())
1074 32 4 for (auto&& hash : hashes) {
1076 263232 32 for (auto&& len : hash.hash) total += len, len = total - len;
1082 18 0 if (unsigned(str_len) < hashes.size()) {
1093 32 4 for (auto&& hash : hashes)
1094 263232 32 for (int i = hash.hash.size() - 1; i >= 0; i--)
1095 263200 32 hash.hash[i] = i > 0 ? hash.hash[i-1] : 0;
1102 140 49 for (unsigned i = 0; i < sizes; i++)
1172 0 0 if (dictionary) lemma.len = dictionary->lemma_id_len(lemma);
1179 0 0 if (lemma_data) {
1181 0 0 if (parent_encoded) {
1185 0 0 if (parent_data[parent_len])
1195 0 0 if (dictionary) lemma.len = dictionary->lemma_id_len(lemma);
1202 0 0 if (lemma_data) {
1205 0 0 if (children_len) {
1207 0 0 for (unsigned i = 0; i < children_len; i++) {
1211 0 0 if (child_data[child_len])
1223 0 0 if (!compressor::load(is, data)) return false;
0 0 if (!compressor::load(is, data)) return false;
1226 0 0 for (int i = data.next_1B(); i > 0; i--)
0 0 for (int i = data.next_1B(); i > 0; i--)
1227 0 0 derinet.resize(data.next_4B());
0 0 derinet.resize(data.next_4B());
1231 0 0 for (int pass = 1; pass <= 3; pass++) {
1232 0 0 if (pass > 1) data.seek(data_position);
0 0 if (pass > 1) data.seek(data_position);
1235 0 0 for (int i = data.next_4B(); i > 0; i--) {
0 0 for (int i = data.next_4B(); i > 0; i--) {
1236 0 0 lemma.resize(lemma.size() - data.next_1B());
0 0 lemma.resize(lemma.size() - data.next_1B());
1237 0 0 for (int i = data.next_1B(); i > 0; i--)
0 0 for (int i = data.next_1B(); i > 0; i--)
1238 0 0 lemma.push_back(data.next_1B());
1240 0 0 unsigned char lemma_comment_len = data.next_1B();
1241 0 0 const char* lemma_comment = lemma_comment_len ? data.next(lemma_comment_len) : nullptr;
0 0 const char* lemma_comment = lemma_comment_len ? data.next(lemma_comment_len) : nullptr;
1243 0 0 unsigned children = data.next_2B();
1245 0 0 if (pass == 3) parent.clear();
1247 0 0 int operations = data.next_1B();
1248 0 0 if (operations) {
1249 0 0 int remove_start = operations & REMOVE_START ? data.next_1B() : 0;
0 0 int remove_start = operations & REMOVE_START ? data.next_1B() : 0;
1250 0 0 int remove_end = operations & REMOVE_END ? data.next_1B() : 0;
0 0 int remove_end = operations & REMOVE_END ? data.next_1B() : 0;
1251 0 0 if (operations & ADD_START) {
1252 0 0 int add_start = data.next_1B();
1253 0 0 const char* str = data.next(add_start);
1254 0 0 if (pass == 3) parent.assign(str, str + add_start);
1256 0 0 if (pass == 3) parent.insert(parent.end(), lemma.begin() + remove_start, lemma.end() - remove_end);
0 0 if (pass == 3) parent.insert(parent.end(), lemma.begin() + remove_start, lemma.end() - remove_end);
1257 0 0 if (operations & ADD_END) {
1258 0 0 int add_end = data.next_1B();
1259 0 0 const char* str = data.next(add_end);
1260 0 0 if (pass == 3) parent.insert(parent.end(), str, str + add_end);
1264 0 0 if (pass == 1) {
1266 0 0 } else if (pass == 2) {
1269 0 0 while (lemma_comment_len--) *lemma_data++ = *lemma_comment++;
1272 0 0 if (children) unaligned_store(((uint32_t*)lemma_data) + children - 1, 0);
1273 0 0 } else if (pass == 3 && !parent.empty()) {
0 0 } else if (pass == 3 && !parent.empty()) {
0 0 } else if (pass == 3 && !parent.empty()) {
1284 0 0 assert(lemma_data && parent_data);
1287 0 0 assert(parent.size() < (1<<8) && parent_offset < (1<<24));
0 0 assert(parent.size() < (1<<8) && parent_offset < (1<<24));
1291 0 0 assert(lemma.size() < (1<<8) && lemma_offset < (1<<24));
0 0 assert(lemma.size() < (1<<8) && lemma_offset < (1<<24));
1296 0 0 if (child_index+1 < children_len)
1301 0 0 if (pass == 1)
1302 0 0 derinet.done_adding();
1303 0 0 if (pass == 2)
1305 0 0 }
1374 0 0 return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT;
14 0 return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT;
0 0 return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT;
3 0 return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT;
15 0 return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT;
3 0 return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT;
115 0 return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT;
8 0 return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT;
8 0 return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT;
3 0 return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT;
6 0 return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT;
6 0 return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT;
0 0 return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT;
0 0 return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT;
0 0 return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT;
0 0 return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT;
0 0 return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT;
0 0 return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT;
0 0 return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT;
0 0 return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT;
0 0 return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT;
0 0 return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT;
0 0 return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT;
0 0 return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT;
0 0 return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT;
0 0 return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT;
0 0 return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT;
0 0 return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT;
13 0 return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT;
34 0 return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT;
1378 13 0 if (chr < CHARS) {
1380 3 10 if ((othercase & 0xFF) == othercase_type::LOWER_ONLY) return othercase >> 8;
1381 0 10 if ((othercase & 0xFF) == othercase_type::LOWER_THEN_UPPER) return othercase >> 8;
1382 0 10 if ((othercase & 0xFF) == othercase_type::TITLE_THEN_LOWER) return othercase_block[othercase_index[(othercase >> 8) >> 8]][(othercase >> 8) & 0xFF] >> 8;
1487 0 0 if (((unsigned char)*str) < 0x80) return (unsigned char)*str++;
1488 0 0 else if (((unsigned char)*str) < 0xC0) return ++str, REPLACEMENT_CHAR;
1489 0 0 else if (((unsigned char)*str) < 0xE0) {
1491 0 0 if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR;
0 0 if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR;
1493 0 0 } else if (((unsigned char)*str) < 0xF0) {
1495 0 0 if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR;
0 0 if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR;
1497 0 0 if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR;
0 0 if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR;
1499 0 0 } else if (((unsigned char)*str) < 0xF8) {
1501 0 0 if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR;
0 0 if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR;
1503 0 0 if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR;
0 0 if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR;
1505 0 0 if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR;
0 0 if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR;
1511 218 1 if (!len) return 0;
1513 193 25 if (((unsigned char)*str) < 0x80) return (unsigned char)*str++;
1514 0 25 else if (((unsigned char)*str) < 0xC0) return ++str, REPLACEMENT_CHAR;
1515 22 3 else if (((unsigned char)*str) < 0xE0) {
1517 22 0 if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR;
22 0 if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR;
22 0 if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR;
1519 3 0 } else if (((unsigned char)*str) < 0xF0) {
1521 3 0 if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR;
3 0 if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR;
3 0 if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR;
1523 3 0 if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR;
3 0 if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR;
3 0 if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR;
1525 0 0 } else if (((unsigned char)*str) < 0xF8) {
1527 0 0 if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR;
0 0 if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR;
0 0 if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR;
1529 0 0 if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR;
0 0 if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR;
0 0 if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR;
1531 0 0 if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR;
0 0 if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR;
0 0 if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR;
1622 13 0 if (chr < 0x80) str += chr;
1623 0 0 else if (chr < 0x800) { str += 0xC0 + (chr >> 6); str += 0x80 + (chr & 0x3F); }
1624 0 0 else if (chr < 0x10000) { str += 0xE0 + (chr >> 12); str += 0x80 + ((chr >> 6) & 0x3F); str += 0x80 + (chr & 0x3F); }
1625 0 0 else if (chr < 0x200000) { str += 0xF0 + (chr >> 18); str += 0x80 + ((chr >> 12) & 0x3F); str += 0x80 + ((chr >> 6) & 0x3F); str += 0x80 + (chr & 0x3F); }
1639 0 0 while (len)
1671 34 13 while (form_tmp.len && !rest_has_Lut)
34 0 while (form_tmp.len && !rest_has_Lut)
1680 2 11 if (first_Lut && !rest_has_Lut) { // common case allowing fast execution
1685 0 11 } else if (!first_Lut && rest_has_Lut) {
1688 0 11 } else if (first_Lut && rest_has_Lut) {
1695 0 0 while (form_tmp.len) {
1732 0 0 for (unsigned len = 1; len < lemma.len; len++)
1733 0 0 if (lemma.str[len] == '`' || lemma.str[len] == '_' ||
0 0 if (lemma.str[len] == '`' || lemma.str[len] == '_' ||
1734 0 0 (lemma.str[len] == '-' && len+1 < lemma.len && lemma.str[len+1] >= '0' && lemma.str[len+1] <= '9'))
0 0 (lemma.str[len] == '-' && len+1 < lemma.len && lemma.str[len+1] >= '0' && lemma.str[len+1] <= '9'))
0 0 (lemma.str[len] == '-' && len+1 < lemma.len && lemma.str[len+1] >= '0' && lemma.str[len+1] <= '9'))
1741 0 0 for (unsigned len = 1; len < lemma.len; len++) {
1742 0 0 if (lemma.str[len] == '`' || lemma.str[len] == '_')
1744 0 0 if (lemma.str[len] == '-' && len+1 < lemma.len && lemma.str[len+1] >= '0' && lemma.str[len+1] <= '9') {
0 0 if (lemma.str[len] == '-' && len+1 < lemma.len && lemma.str[len+1] >= '0' && lemma.str[len+1] <= '9') {
0 0 if (lemma.str[len] == '-' && len+1 < lemma.len && lemma.str[len+1] >= '0' && lemma.str[len+1] <= '9') {
0 0 if (lemma.str[len] == '-' && len+1 < lemma.len && lemma.str[len+1] >= '0' && lemma.str[len+1] <= '9') {
1746 0 0 while (len < lemma.len && lemma.str[len] >= '0' && lemma.str[len] <= '9') len++;
0 0 while (len < lemma.len && lemma.str[len] >= '0' && lemma.str[len] <= '9') len++;
0 0 while (len < lemma.len && lemma.str[len] >= '0' && lemma.str[len] <= '9') len++;
1756 0 0 if (addinfo_len) {
1757 0 0 res.reserve(addinfo_len + 4);
1758 0 0 if (addinfo[0] != 255) {
1763 0 0 for (int i = 1; i < addinfo_len; i++)
1771 0 0 for (int i = 1; i + 2 < addinfo_len; i++)
1772 0 0 if (addinfo[i] == '_' && addinfo[i+1] == ',' && addinfo[i+2] == 'x')
0 0 if (addinfo[i] == '_' && addinfo[i+1] == ',' && addinfo[i+2] == 'x')
0 0 if (addinfo[i] == '_' && addinfo[i+1] == ',' && addinfo[i+2] == 'x')
1782 0 0 if (lemma_info < lemma.str + lemma.len) {
1786 0 0 if (*lemma_info == '-') {
1789 0 0 lemma_additional_info < lemma.str + lemma.len && (*lemma_additional_info >= '0' && *lemma_additional_info <= '9');
0 0 lemma_additional_info < lemma.str + lemma.len && (*lemma_additional_info >= '0' && *lemma_additional_info <= '9');
1793 0 0 if (lemma_additional_info == lemma_info + 1 || (lemma_additional_info < lemma.str + lemma.len && *lemma_additional_info != '`' && *lemma_additional_info != '_') || lemma_num >= 255) {
0 0 if (lemma_additional_info == lemma_info + 1 || (lemma_additional_info < lemma.str + lemma.len && *lemma_additional_info != '`' && *lemma_additional_info != '_') || lemma_num >= 255) {
0 0 if (lemma_additional_info == lemma_info + 1 || (lemma_additional_info < lemma.str + lemma.len && *lemma_additional_info != '`' && *lemma_additional_info != '_') || lemma_num >= 255) {
0 0 if (lemma_additional_info == lemma_info + 1 || (lemma_additional_info < lemma.str + lemma.len && *lemma_additional_info != '`' && *lemma_additional_info != '_') || lemma_num >= 255) {
0 0 if (lemma_additional_info == lemma_info + 1 || (lemma_additional_info < lemma.str + lemma.len && *lemma_additional_info != '`' && *lemma_additional_info != '_') || lemma_num >= 255) {
1794 0 0 if (die_on_failure)
1801 0 0 while (lemma_additional_info < lemma.str + lemma.len)
1804 0 0 if (data.size() > 255) {
1805 0 0 if (die_on_failure)
1816 0 0 if (data.empty()) return true;
1817 0 0 if (data[0] != 255 && (!other_addinfo_len || other_addinfo[0] != data[0])) return false;
0 0 if (data[0] != 255 && (!other_addinfo_len || other_addinfo[0] != data[0])) return false;
0 0 if (data[0] != 255 && (!other_addinfo_len || other_addinfo[0] != data[0])) return false;
0 0 if (data[0] != 255 && (!other_addinfo_len || other_addinfo[0] != data[0])) return false;
1857 20 20 if (filters.empty()) return true;
1860 36 3 for (auto&& filter : filters) {
1862 30 36 while (tag_pos < filter.pos)
1863 30 0 if (!tag[tag_pos++])
1865 36 0 if (!tag[tag_pos])
1870 7 35 for (int i = 1; i < filter.chars_len && ((!matched) ^ filter.negate); i++)
6 1 for (int i = 1; i < filter.chars_len && ((!matched) ^ filter.negate); i++)
1872 19 17 if (!matched) return false;
1908 14 2 for (int i = data.next_1B(); i > 0; i--)
0 0 for (int i = data.next_1B(); i > 0; i--)
0 0 for (int i = data.next_1B(); i > 0; i--)
1910 18 2 for (int i = data.next_1B(); i > 0; i--)
0 0 for (int i = data.next_1B(); i > 0; i--)
0 0 for (int i = data.next_1B(); i > 0; i--)
1916 2 0 vector root(max(lemmas.max_length(), roots.max_length()));
0 0 vector root(max(lemmas.max_length(), roots.max_length()));
0 0 vector root(max(lemmas.max_length(), roots.max_length()));
1918 4 2 for (int pass = 1; pass <= 2; pass++) {
0 0 for (int pass = 1; pass <= 2; pass++) {
0 0 for (int pass = 1; pass <= 2; pass++) {
1919 2 2 if (pass > 1) data.seek(data_position);
2 0 if (pass > 1) data.seek(data_position);
0 0 if (pass > 1) data.seek(data_position);
0 0 if (pass > 1) data.seek(data_position);
0 0 if (pass > 1) data.seek(data_position);
0 0 if (pass > 1) data.seek(data_position);
1924 4 0 for (int i = data.next_4B(); i > 0; i--) {
8 4 for (int i = data.next_4B(); i > 0; i--) {
0 0 for (int i = data.next_4B(); i > 0; i--) {
0 0 for (int i = data.next_4B(); i > 0; i--) {
0 0 for (int i = data.next_4B(); i > 0; i--) {
0 0 for (int i = data.next_4B(); i > 0; i--) {
1925 8 0 lemma_len -= data.next_1B();
0 0 lemma_len -= data.next_1B();
0 0 lemma_len -= data.next_1B();
1926 8 0 for (int i = data.next_1B(); i > 0; i--)
48 8 for (int i = data.next_1B(); i > 0; i--)
0 0 for (int i = data.next_1B(); i > 0; i--)
0 0 for (int i = data.next_1B(); i > 0; i--)
0 0 for (int i = data.next_1B(); i > 0; i--)
0 0 for (int i = data.next_1B(); i > 0; i--)
1927 48 0 lemma[lemma_len++] = data.next_1B();
0 0 lemma[lemma_len++] = data.next_1B();
0 0 lemma[lemma_len++] = data.next_1B();
1928 8 0 unsigned char lemma_info_len = data.next_1B();
0 0 unsigned char lemma_info_len = data.next_1B();
0 0 unsigned char lemma_info_len = data.next_1B();
1929 0 8 const char* lemma_info = lemma_info_len ? data.next(lemma_info_len) : nullptr;
0 0 const char* lemma_info = lemma_info_len ? data.next(lemma_info_len) : nullptr;
0 0 const char* lemma_info = lemma_info_len ? data.next(lemma_info_len) : nullptr;
0 0 const char* lemma_info = lemma_info_len ? data.next(lemma_info_len) : nullptr;
0 0 const char* lemma_info = lemma_info_len ? data.next(lemma_info_len) : nullptr;
0 0 const char* lemma_info = lemma_info_len ? data.next(lemma_info_len) : nullptr;
1930 8 0 unsigned lemma_roots = data.next_1B();
0 0 unsigned lemma_roots = data.next_1B();
0 0 unsigned lemma_roots = data.next_1B();
1935 4 4 if (pass == 1) {
0 0 if (pass == 1) {
0 0 if (pass == 1) {
1942 0 4 if (lemma_info_len) small_memcpy(lemma_data, lemma_info, lemma_info_len), lemma_data += lemma_info_len;
0 0 if (lemma_info_len) small_memcpy(lemma_data, lemma_info, lemma_info_len), lemma_data += lemma_info_len;
0 0 if (lemma_info_len) small_memcpy(lemma_data, lemma_info, lemma_info_len), lemma_data += lemma_info_len;
1947 28 8 for (unsigned i = 0; i < lemma_roots; i++) {
0 0 for (unsigned i = 0; i < lemma_roots; i++) {
0 0 for (unsigned i = 0; i < lemma_roots; i++) {
1949 28 0 int operations = data.next_1B();
0 0 int operations = data.next_1B();
0 0 int operations = data.next_1B();
1950 0 28 if (operations & REMOVE_START) { int from = data.next_1B(), to = 0; while (from < root_len) root[to++] = root[from++]; root_len = to; }
0 0 if (operations & REMOVE_START) { int from = data.next_1B(), to = 0; while (from < root_len) root[to++] = root[from++]; root_len = to; }
0 0 if (operations & REMOVE_START) { int from = data.next_1B(), to = 0; while (from < root_len) root[to++] = root[from++]; root_len = to; }
0 0 if (operations & REMOVE_START) { int from = data.next_1B(), to = 0; while (from < root_len) root[to++] = root[from++]; root_len = to; }
0 0 if (operations & REMOVE_START) { int from = data.next_1B(), to = 0; while (from < root_len) root[to++] = root[from++]; root_len = to; }
0 0 if (operations & REMOVE_START) { int from = data.next_1B(), to = 0; while (from < root_len) root[to++] = root[from++]; root_len = to; }
0 0 if (operations & REMOVE_START) { int from = data.next_1B(), to = 0; while (from < root_len) root[to++] = root[from++]; root_len = to; }
0 0 if (operations & REMOVE_START) { int from = data.next_1B(), to = 0; while (from < root_len) root[to++] = root[from++]; root_len = to; }
0 0 if (operations & REMOVE_START) { int from = data.next_1B(), to = 0; while (from < root_len) root[to++] = root[from++]; root_len = to; }
1951 24 4 if (operations & REMOVE_END) root_len -= data.next_1B();
24 0 if (operations & REMOVE_END) root_len -= data.next_1B();
0 0 if (operations & REMOVE_END) root_len -= data.next_1B();
0 0 if (operations & REMOVE_END) root_len -= data.next_1B();
0 0 if (operations & REMOVE_END) root_len -= data.next_1B();
0 0 if (operations & REMOVE_END) root_len -= data.next_1B();
1952 0 28 if (operations & ADD_START) {
0 0 if (operations & ADD_START) {
0 0 if (operations & ADD_START) {
1953 0 0 int from = root_len, to = from + data.next_1B(); while (from > 0) root[--to] = root[--from]; root_len += to;
0 0 int from = root_len, to = from + data.next_1B(); while (from > 0) root[--to] = root[--from]; root_len += to;
0 0 int from = root_len, to = from + data.next_1B(); while (from > 0) root[--to] = root[--from]; root_len += to;
0 0 int from = root_len, to = from + data.next_1B(); while (from > 0) root[--to] = root[--from]; root_len += to;
0 0 int from = root_len, to = from + data.next_1B(); while (from > 0) root[--to] = root[--from]; root_len += to;
0 0 int from = root_len, to = from + data.next_1B(); while (from > 0) root[--to] = root[--from]; root_len += to;
1954 0 0 for (int i = 0; i < to; i++) root[i] = data.next_1B();
0 0 for (int i = 0; i < to; i++) root[i] = data.next_1B();
0 0 for (int i = 0; i < to; i++) root[i] = data.next_1B();
0 0 for (int i = 0; i < to; i++) root[i] = data.next_1B();
0 0 for (int i = 0; i < to; i++) root[i] = data.next_1B();
0 0 for (int i = 0; i < to; i++) root[i] = data.next_1B();
1956 28 0 if (operations & ADD_END)
0 0 if (operations & ADD_END)
0 0 if (operations & ADD_END)
1957 28 0 for (int len = data.next_1B(); len > 0; len--)
56 28 for (int len = data.next_1B(); len > 0; len--)
0 0 for (int len = data.next_1B(); len > 0; len--)
0 0 for (int len = data.next_1B(); len > 0; len--)
0 0 for (int len = data.next_1B(); len > 0; len--)
0 0 for (int len = data.next_1B(); len > 0; len--)
1958 56 0 root[root_len++] = data.next_1B();
0 0 root[root_len++] = data.next_1B();
0 0 root[root_len++] = data.next_1B();
1959 28 0 uint16_t clas = data.next_2B();
0 0 uint16_t clas = data.next_2B();
0 0 uint16_t clas = data.next_2B();
1961 14 14 if (pass == 1) { // for each root
0 0 if (pass == 1) { // for each root
0 0 if (pass == 1) { // for each root
1970 0 14 assert(uint8_t(lemma_len) == lemma_len);
0 0 assert(uint8_t(lemma_len) == lemma_len);
0 0 assert(uint8_t(lemma_len) == lemma_len);
1975 0 14 assert(uint8_t(root_len) == root_len);
0 0 assert(uint8_t(root_len) == root_len);
0 0 assert(uint8_t(root_len) == root_len);
1980 2 2 if (pass == 1) { // after the whole pass
0 0 if (pass == 1) { // after the whole pass
0 0 if (pass == 1) { // after the whole pass
1981 2 0 lemmas.done_adding();
0 0 lemmas.done_adding();
0 0 lemmas.done_adding();
1982 2 0 roots.done_adding();
0 0 roots.done_adding();
0 0 roots.done_adding();
1990 2 0 tags.resize(data.next_2B());
2 0 tags.resize(data.next_2B());
0 0 tags.resize(data.next_2B());
0 0 tags.resize(data.next_2B());
0 0 tags.resize(data.next_2B());
0 0 tags.resize(data.next_2B());
1991 40 2 for (auto&& tag : tags) {
0 0 for (auto&& tag : tags) {
0 0 for (auto&& tag : tags) {
1992 40 0 tag.resize(data.next_1B());
0 0 tag.resize(data.next_1B());
0 0 tag.resize(data.next_1B());
1993 120 40 for (unsigned i = 0; i < tag.size(); i++)
0 0 for (unsigned i = 0; i < tag.size(); i++)
0 0 for (unsigned i = 0; i < tag.size(); i++)
1994 120 0 tag[i] = data.next_1B();
0 0 tag[i] = data.next_1B();
0 0 tag[i] = data.next_1B();
1998 2 0 suffixes.load(data);
0 0 suffixes.load(data);
0 0 suffixes.load(data);
2001 2 0 suffixes.iter_all([this](const char* suffix, int len, pointer_decoder& data) mutable {
0 0 suffixes.iter_all([this](const char* suffix, int len, pointer_decoder& data) mutable {
0 0 suffixes.iter_all([this](const char* suffix, int len, pointer_decoder& data) mutable {
2008 30 26 for (unsigned i = 0; i < classes_len; i++) {
0 0 for (unsigned i = 0; i < classes_len; i++) {
0 0 for (unsigned i = 0; i < classes_len; i++) {
2010 10 20 if (classes_ptr_i >= classes.size()) classes.resize(classes_ptr_i + 1);
10 0 if (classes_ptr_i >= classes.size()) classes.resize(classes_ptr_i + 1);
0 0 if (classes_ptr_i >= classes.size()) classes.resize(classes_ptr_i + 1);
0 0 if (classes_ptr_i >= classes.size()) classes.resize(classes_ptr_i + 1);
0 0 if (classes_ptr_i >= classes.size()) classes.resize(classes_ptr_i + 1);
0 0 if (classes_ptr_i >= classes.size()) classes.resize(classes_ptr_i + 1);
2011 30 0 classes[classes_ptr_i].emplace_back(suffix_str, vector());
0 0 classes[classes_ptr_i].emplace_back(suffix_str, vector());
0 0 classes[classes_ptr_i].emplace_back(suffix_str, vector());
2012 40 30 for (const uint16_t* ptr = tags_ptr + unaligned_load(indices_ptr + i),
0 0 for (const uint16_t* ptr = tags_ptr + unaligned_load(indices_ptr + i),
0 0 for (const uint16_t* ptr = tags_ptr + unaligned_load(indices_ptr + i),
2015 40 0 classes[classes_ptr_i].back().second.emplace_back(unaligned_load(ptr));
0 0 classes[classes_ptr_i].back().second.emplace_back(unaligned_load(ptr));
0 0 classes[classes_ptr_i].back().second.emplace_back(unaligned_load(ptr));
2025 0 15 uint16_t** suff = max_suffix_len <= 16 ? suff_stack : (suff_heap.resize(max_suffix_len), suff_heap.data());
0 0 uint16_t** suff = max_suffix_len <= 16 ? suff_stack : (suff_heap.resize(max_suffix_len), suff_heap.data());
0 0 uint16_t** suff = max_suffix_len <= 16 ? suff_stack : (suff_heap.resize(max_suffix_len), suff_heap.data());
0 0 uint16_t** suff = max_suffix_len <= 16 ? suff_stack : (suff_heap.resize(max_suffix_len), suff_heap.data());
0 0 uint16_t** suff = max_suffix_len <= 16 ? suff_stack : (suff_heap.resize(max_suffix_len), suff_heap.data());
0 0 uint16_t** suff = max_suffix_len <= 16 ? suff_stack : (suff_heap.resize(max_suffix_len), suff_heap.data());
2027 36 0 for (int i = form.len; i >= 0 && suff_len < max_suffix_len; i--, suff_len++) {
0 0 for (int i = form.len; i >= 0 && suff_len < max_suffix_len; i--, suff_len++) {
0 0 for (int i = form.len; i >= 0 && suff_len < max_suffix_len; i--, suff_len++) {
2035 21 15 for (int root_len = int(form.len) - --suff_len; suff_len >= 0 && root_len < int(roots.max_length()); suff_len--, root_len++)
0 21 for (int root_len = int(form.len) - --suff_len; suff_len >= 0 && root_len < int(roots.max_length()); suff_len--, root_len++)
21 15 for (int root_len = int(form.len) - --suff_len; suff_len >= 0 && root_len < int(roots.max_length()); suff_len--, root_len++)
0 0 for (int root_len = int(form.len) - --suff_len; suff_len >= 0 && root_len < int(roots.max_length()); suff_len--, root_len++)
0 0 for (int root_len = int(form.len) - --suff_len; suff_len >= 0 && root_len < int(roots.max_length()); suff_len--, root_len++)
0 0 for (int root_len = int(form.len) - --suff_len; suff_len >= 0 && root_len < int(roots.max_length()); suff_len--, root_len++)
0 0 for (int root_len = int(form.len) - --suff_len; suff_len >= 0 && root_len < int(roots.max_length()); suff_len--, root_len++)
0 0 for (int root_len = int(form.len) - --suff_len; suff_len >= 0 && root_len < int(roots.max_length()); suff_len--, root_len++)
0 0 for (int root_len = int(form.len) - --suff_len; suff_len >= 0 && root_len < int(roots.max_length()); suff_len--, root_len++)
2036 21 0 if (unaligned_load(suff[suff_len])) {
0 0 if (unaligned_load(suff[suff_len])) {
0 0 if (unaligned_load(suff[suff_len])) {
2040 21 0 roots.iter(form.str, root_len, [&](const char* root, pointer_decoder& root_data) {
0 0 roots.iter(form.str, root_len, [&](const char* root, pointer_decoder& root_data) {
0 0 roots.iter(form.str, root_len, [&](const char* root, pointer_decoder& root_data) {
2045 6 14 if (small_memeq(form.str, root, root_len)) {
0 0 if (small_memeq(form.str, root, root_len)) {
0 0 if (small_memeq(form.str, root, root_len)) {
2047 6 0 if (suffix_class_ptr < suff_data + suff_classes && unaligned_load(suffix_class_ptr) == root_class) {
0 6 if (suffix_class_ptr < suff_data + suff_classes && unaligned_load(suffix_class_ptr) == root_class) {
6 0 if (suffix_class_ptr < suff_data + suff_classes && unaligned_load(suffix_class_ptr) == root_class) {
0 0 if (suffix_class_ptr < suff_data + suff_classes && unaligned_load(suffix_class_ptr) == root_class) {
0 0 if (suffix_class_ptr < suff_data + suff_classes && unaligned_load(suffix_class_ptr) == root_class) {
0 0 if (suffix_class_ptr < suff_data + suff_classes && unaligned_load(suffix_class_ptr) == root_class) {
0 0 if (suffix_class_ptr < suff_data + suff_classes && unaligned_load(suffix_class_ptr) == root_class) {
0 0 if (suffix_class_ptr < suff_data + suff_classes && unaligned_load(suffix_class_ptr) == root_class) {
0 0 if (suffix_class_ptr < suff_data + suff_classes && unaligned_load(suffix_class_ptr) == root_class) {
2050 0 6 if (lemma_data[lemma_len]) lemma += LemmaAddinfo::format(lemma_data + lemma_len + 1, lemma_data[lemma_len]);
0 0 if (lemma_data[lemma_len]) lemma += LemmaAddinfo::format(lemma_data + lemma_len + 1, lemma_data[lemma_len]);
0 0 if (lemma_data[lemma_len]) lemma += LemmaAddinfo::format(lemma_data + lemma_len + 1, lemma_data[lemma_len]);
0 0 if (lemma_data[lemma_len]) lemma += LemmaAddinfo::format(lemma_data + lemma_len + 1, lemma_data[lemma_len]);
2054 17 6 for (unsigned i = unaligned_load(suff_tag_indices + (suffix_class_ptr - suff_data));
0 0 for (unsigned i = unaligned_load(suff_tag_indices + (suffix_class_ptr - suff_data));
0 0 for (unsigned i = unaligned_load(suff_tag_indices + (suffix_class_ptr - suff_data));
2056 17 0 lemmas.emplace_back(lemma, tags[unaligned_load(suff_tags + i)]);
0 0 lemmas.emplace_back(lemma, tags[unaligned_load(suff_tags + i)]);
0 0 lemmas.emplace_back(lemma, tags[unaligned_load(suff_tags + i)]);
2066 0 0 int raw_lemma_len = addinfo.parse(lemma);
0 0 int raw_lemma_len = addinfo.parse(lemma);
2069 4 0 lemmas.iter(lemma.str, raw_lemma_len, [&](const char* lemma_str, pointer_decoder& data) {
0 0 lemmas.iter(lemma.str, raw_lemma_len, [&](const char* lemma_str, pointer_decoder& data) {
0 0 lemmas.iter(lemma.str, raw_lemma_len, [&](const char* lemma_str, pointer_decoder& data) {
2075 4 0 if (small_memeq(lemma.str, lemma_str, raw_lemma_len) && addinfo.match_lemma_id(lemma_info, lemma_info_len) && LemmaAddinfo::generatable(lemma_info, lemma_info_len)) {
4 0 if (small_memeq(lemma.str, lemma_str, raw_lemma_len) && addinfo.match_lemma_id(lemma_info, lemma_info_len) && LemmaAddinfo::generatable(lemma_info, lemma_info_len)) {
0 0 if (small_memeq(lemma.str, lemma_str, raw_lemma_len) && addinfo.match_lemma_id(lemma_info, lemma_info_len) && LemmaAddinfo::generatable(lemma_info, lemma_info_len)) {
0 0 if (small_memeq(lemma.str, lemma_str, raw_lemma_len) && addinfo.match_lemma_id(lemma_info, lemma_info_len) && LemmaAddinfo::generatable(lemma_info, lemma_info_len)) {
0 0 if (small_memeq(lemma.str, lemma_str, raw_lemma_len) && addinfo.match_lemma_id(lemma_info, lemma_info_len) && LemmaAddinfo::generatable(lemma_info, lemma_info_len)) {
0 0 if (small_memeq(lemma.str, lemma_str, raw_lemma_len) && addinfo.match_lemma_id(lemma_info, lemma_info_len) && LemmaAddinfo::generatable(lemma_info, lemma_info_len)) {
0 0 if (small_memeq(lemma.str, lemma_str, raw_lemma_len) && addinfo.match_lemma_id(lemma_info, lemma_info_len) && LemmaAddinfo::generatable(lemma_info, lemma_info_len)) {
0 0 if (small_memeq(lemma.str, lemma_str, raw_lemma_len) && addinfo.match_lemma_id(lemma_info, lemma_info_len) && LemmaAddinfo::generatable(lemma_info, lemma_info_len)) {
0 0 if (small_memeq(lemma.str, lemma_str, raw_lemma_len) && addinfo.match_lemma_id(lemma_info, lemma_info_len) && LemmaAddinfo::generatable(lemma_info, lemma_info_len)) {
2080 14 4 for (unsigned i = 0; i < lemma_roots_len; i++) {
0 0 for (unsigned i = 0; i < lemma_roots_len; i++) {
0 0 for (unsigned i = 0; i < lemma_roots_len; i++) {
2086 30 14 for (auto&& suffix : classes[clas]) {
0 0 for (auto&& suffix : classes[clas]) {
0 0 for (auto&& suffix : classes[clas]) {
2088 40 30 for (auto&& tag : suffix.second)
0 0 for (auto&& tag : suffix.second)
0 0 for (auto&& tag : suffix.second)
2089 23 17 if (filter.matches(tags[tag].c_str())) {
0 0 if (filter.matches(tags[tag].c_str())) {
0 0 if (filter.matches(tags[tag].c_str())) {
2090 4 19 if (!forms) {
0 0 if (!forms) {
0 0 if (!forms) {
2091 4 0 lemmas_forms.emplace_back(string(lemma.str, raw_lemma_len) + LemmaAddinfo::format(lemma_info, lemma_info_len));
4 0 lemmas_forms.emplace_back(string(lemma.str, raw_lemma_len) + LemmaAddinfo::format(lemma_info, lemma_info_len));
0 0 lemmas_forms.emplace_back(string(lemma.str, raw_lemma_len) + LemmaAddinfo::format(lemma_info, lemma_info_len));
0 0 lemmas_forms.emplace_back(string(lemma.str, raw_lemma_len) + LemmaAddinfo::format(lemma_info, lemma_info_len));
0 0 lemmas_forms.emplace_back(string(lemma.str, raw_lemma_len) + LemmaAddinfo::format(lemma_info, lemma_info_len));
0 0 lemmas_forms.emplace_back(string(lemma.str, raw_lemma_len) + LemmaAddinfo::format(lemma_info, lemma_info_len));
0 0 lemmas_forms.emplace_back(string(lemma.str, raw_lemma_len) + LemmaAddinfo::format(lemma_info, lemma_info_len));
2095 18 5 if (root_with_suffix.empty() && root_len + suffix.first.size()) {
0 18 if (root_with_suffix.empty() && root_len + suffix.first.size()) {
18 5 if (root_with_suffix.empty() && root_len + suffix.first.size()) {
0 0 if (root_with_suffix.empty() && root_len + suffix.first.size()) {
0 0 if (root_with_suffix.empty() && root_len + suffix.first.size()) {
0 0 if (root_with_suffix.empty() && root_len + suffix.first.size()) {
0 0 if (root_with_suffix.empty() && root_len + suffix.first.size()) {
0 0 if (root_with_suffix.empty() && root_len + suffix.first.size()) {
0 0 if (root_with_suffix.empty() && root_len + suffix.first.size()) {
2096 18 0 root_with_suffix.reserve(root_len + suffix.first.size());
0 0 root_with_suffix.reserve(root_len + suffix.first.size());
0 0 root_with_suffix.reserve(root_len + suffix.first.size());
2101 23 0 forms->emplace_back(root_with_suffix, tags[tag]);
0 0 forms->emplace_back(root_with_suffix, tags[tag]);
0 0 forms->emplace_back(root_with_suffix, tags[tag]);
2144 0 0 for (unsigned tag_filters_len = data.next_1B(); tag_filters_len; tag_filters_len--) {
2148 0 0 tag_filters.emplace_back(tag_filter.c_str());
2159 0 0 if (!form.len) return;
2163 0 0 middle_masks.reserve(form.len);
2165 0 0 for (unsigned initial = 0; initial < form.len; initial++) {
2168 0 0 if (initial) {
2170 0 0 if (!found) break;
2175 0 0 if (initial_mask) {
2176 0 0 middle_masks.resize(initial);
2177 0 0 middle_masks.emplace_back(initial_mask);
2178 0 0 for (unsigned middle = initial; middle < middle_masks.size(); middle++) {
2179 0 0 if (!middle_masks[middle]) continue;
2181 0 0 for (unsigned i = middle + 1; i < form.len; i++) {
2183 0 0 if (!found) break;
2184 0 0 if (unaligned_load(found)) {
2185 0 0 if (i + 1 > middle_masks.size()) middle_masks.resize(i + 1);
0 0 if (i + 1 > middle_masks.size()) middle_masks.resize(i + 1);
2191 0 0 if (middle > initial && middle < form.len ) {
0 0 if (middle > initial && middle < form.len ) {
2192 0 0 if (initial) {
2193 0 0 if (form_tmp.empty()) form_tmp.assign(form.str, form.str + form.len);
2197 0 0 dictionary.analyze(string_piece((initial ? form_tmp.data() : form.str) + middle - initial, form.len - middle + initial), lemmas);
0 0 dictionary.analyze(string_piece((initial ? form_tmp.data() : form.str) + middle - initial, form.len - middle + initial), lemmas);
2199 0 0 for (unsigned i = lemmas_ori_size; i < lemmas.size(); i++) {
2200 0 0 for (unsigned filter = 0; filter < tag_filters.size(); filter++)
2201 0 0 if ((middle_masks[middle] & (1<
0 0 if ((middle_masks[middle] & (1<
0 0 if ((middle_masks[middle] & (1<
2202 0 0 if (i == lemmas_new_size) {
2205 0 0 lemmas[lemmas_new_size].lemma.reserve(lemmas[i].lemma.size() + middle - initial);
2214 0 0 if (lemmas_new_size < lemmas.size()) lemmas.erase(lemmas.begin() + lemmas_new_size, lemmas.end());
2328 138 0 return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27);
8 130 return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27);
0 0 return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27);
0 0 return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27);
0 0 return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27);
0 0 return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27);
29 0 return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27);
4 25 return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27);
0 0 return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27);
0 0 return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27);
0 0 return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27);
0 0 return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27);
0 0 return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27);
0 0 return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27);
0 0 return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27);
0 0 return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27);
0 0 return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27);
0 0 return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27);
0 0 return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27);
0 0 return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27);
0 0 return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27);
0 0 return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27);
0 0 return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27);
0 0 return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27);
0 0 return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27);
0 0 return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27);
154 0 return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27);
7 147 return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27);
1 0 return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27);
0 1 return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27);
0 0 return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27);
0 0 return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27);
2380 0 0 czech_morpho(morpho_language language, unsigned version) : language(language), version(version) {}
0 0 czech_morpho(morpho_language language, unsigned version) : language(language), version(version) {}
0 0 czech_morpho(morpho_language language, unsigned version) : language(language), version(version) {}
2419 0 0 if (!compressor::load(is, data)) return false;
0 0 if (!compressor::load(is, data)) return false;
2423 0 0 unsigned tag_length = data.next_1B();
2424 0 0 if (tag_length < unknown_tag.size()) unknown_tag.erase(tag_length);
0 0 if (tag_length < unknown_tag.size()) unknown_tag.erase(tag_length);
2425 0 0 if (tag_length < number_tag.size()) number_tag.erase(tag_length);
0 0 if (tag_length < number_tag.size()) number_tag.erase(tag_length);
2426 0 0 if (tag_length < punctuation_tag.size()) punctuation_tag.erase(tag_length);
0 0 if (tag_length < punctuation_tag.size()) punctuation_tag.erase(tag_length);
2429 0 0 dictionary.load(data);
2433 0 0 if (data.next_1B()) {
0 0 if (data.next_1B()) {
2434 0 0 prefix_guesser.reset(new morpho_prefix_guesser(dictionary));
2435 0 0 prefix_guesser->load(data);
2440 0 0 if (data.next_1B()) {
0 0 if (data.next_1B()) {
2441 0 0 statistical_guesser.reset(new morpho_statistical_guesser());
2442 0 0 statistical_guesser->load(data);
2443 0 0 }
2454 0 0 if (form.len) {
2458 0 0 generate_casing_variants(form, form_uclc, form_lc);
2461 0 0 dictionary.analyze(form, lemmas);
2462 0 0 if (!form_uclc.empty()) dictionary.analyze(form_uclc, lemmas);
0 0 if (!form_uclc.empty()) dictionary.analyze(form_uclc, lemmas);
2463 0 0 if (!form_lc.empty()) dictionary.analyze(form_lc, lemmas);
0 0 if (!form_lc.empty()) dictionary.analyze(form_lc, lemmas);
2464 0 0 if (!lemmas.empty()) return NO_GUESSER;
2467 0 0 analyze_special(form, lemmas);
2468 0 0 if (!lemmas.empty()) return NO_GUESSER;
2471 0 0 if (guesser == GUESSER && prefix_guesser)
0 0 if (guesser == GUESSER && prefix_guesser)
0 0 if (guesser == GUESSER && prefix_guesser)
2472 0 0 prefix_guesser->analyze(form_lc.empty() ? form : form_lc, lemmas);
0 0 prefix_guesser->analyze(form_lc.empty() ? form : form_lc, lemmas);
2476 0 0 if (guesser == GUESSER && statistical_guesser) {
0 0 if (guesser == GUESSER && statistical_guesser) {
0 0 if (guesser == GUESSER && statistical_guesser) {
2477 0 0 if (form_uclc.empty() && form_lc.empty())
0 0 if (form_uclc.empty() && form_lc.empty())
0 0 if (form_uclc.empty() && form_lc.empty())
2478 0 0 statistical_guesser->analyze(form, lemmas, nullptr);
2480 0 0 morpho_statistical_guesser::used_rules used_rules; used_rules.reserve(3);
2481 0 0 statistical_guesser->analyze(form, lemmas, &used_rules);
2482 0 0 if (!form_uclc.empty()) statistical_guesser->analyze(form_uclc, lemmas, &used_rules);
0 0 if (!form_uclc.empty()) statistical_guesser->analyze(form_uclc, lemmas, &used_rules);
2483 0 0 if (!form_lc.empty()) statistical_guesser->analyze(form_lc, lemmas, &used_rules);
0 0 if (!form_lc.empty()) statistical_guesser->analyze(form_lc, lemmas, &used_rules);
2489 0 0 if (prefix_guesser_guesses) {
2492 0 0 return lemma_compare < 0 || (lemma_compare == 0 && a.tag < b.tag);
2495 0 0 return a.lemma == b.lemma && a.tag == b.tag;
0 0 return a.lemma == b.lemma && a.tag == b.tag;
2497 0 0 if (lemmas_end != lemmas.end()) lemmas.erase(lemmas_end, lemmas.end());
2500 0 0 if (!lemmas.empty()) return GUESSER;
2503 0 0 lemmas.emplace_back(string(form.str, form.len), unknown_tag);
2512 0 0 if (lemma.len) {
2513 0 0 if (dictionary.generate(lemma, filter, forms))
0 0 if (dictionary.generate(lemma, filter, forms))
2516 0 0 if (guesser == GUESSER && prefix_guesser)
0 0 if (guesser == GUESSER && prefix_guesser)
2537 0 0 return new czech_tokenizer(language, version, this);
2568 0 0 if (!form.len) return;
2576 0 0 if (codepoint == '+' || codepoint == '-') codepoint = utf8::decode(form.str, form.len);
2577 0 0 while (unicode::category(codepoint) & unicode::N) any_digit = true, codepoint = utf8::decode(form.str, form.len);
2578 0 0 if ((codepoint == '.' && form.len) || codepoint == ',') codepoint = utf8::decode(form.str, form.len);
0 0 if ((codepoint == '.' && form.len) || codepoint == ',') codepoint = utf8::decode(form.str, form.len);
0 0 if ((codepoint == '.' && form.len) || codepoint == ',') codepoint = utf8::decode(form.str, form.len);
2579 0 0 while (unicode::category(codepoint) & unicode::N) any_digit = true, codepoint = utf8::decode(form.str, form.len);
2580 0 0 if (any_digit && (codepoint == 'e' || codepoint == 'E')) {
0 0 if (any_digit && (codepoint == 'e' || codepoint == 'E')) {
2582 0 0 if (codepoint == '+' || codepoint == '-') codepoint = utf8::decode(form.str, form.len);
2584 0 0 while (unicode::category(codepoint) & unicode::N) any_digit = true, codepoint = utf8::decode(form.str, form.len);
2587 0 0 if (any_digit && !form.len && (!codepoint || codepoint == '.')) {
0 0 if (any_digit && !form.len && (!codepoint || codepoint == '.')) {
0 0 if (any_digit && !form.len && (!codepoint || codepoint == '.')) {
2588 0 0 lemmas.emplace_back(string(form_ori.str, form_ori.len), number_tag);
2589 0 0 } else if ((first < sizeof(punctuation_additional) && punctuation_additional[first]) ||
0 0 } else if ((first < sizeof(punctuation_additional) && punctuation_additional[first]) ||
0 0 } else if ((first < sizeof(punctuation_additional) && punctuation_additional[first]) ||
0 0 } else if ((first < sizeof(punctuation_additional) && punctuation_additional[first]) ||
2590 0 0 ((unicode::category(first) & unicode::P) && (first >= sizeof(punctuation_exceptions) || !punctuation_exceptions[first])))
0 0 ((unicode::category(first) & unicode::P) && (first >= sizeof(punctuation_exceptions) || !punctuation_exceptions[first])))
2591 0 0 lemmas.emplace_back(string(form_ori.str, form_ori.len), punctuation_tag);
2625 0 0 for (unsigned len = 1; len < lemma.len; len++) {
2626 0 0 if (len + 1 == lemma.len && (lemma.str[len] == '^' || lemma.str[len] == '+'))
0 0 if (len + 1 == lemma.len && (lemma.str[len] == '^' || lemma.str[len] == '+'))
2628 0 0 if (len + 1 < lemma.len && lemma.str[len] == '^') {
0 0 if (len + 1 < lemma.len && lemma.str[len] == '^') {
2630 0 0 for (unsigned i = len + 1; ok && i < lemma.len; i++)
0 0 for (unsigned i = len + 1; ok && i < lemma.len; i++)
2631 0 0 ok &= (lemma.str[i] >= 'A' && lemma.str[i] <= 'Z') ||
2632 0 0 (lemma.str[i] >= 'a' && lemma.str[i] <= 'z') ||
0 0 (lemma.str[i] >= 'a' && lemma.str[i] <= 'z') ||
2633 0 0 (i > len + 1 && lemma.str[i] == '-');
2634 0 0 if (ok) return len;
2657 0 0 for (size_t i = len; i < lemma.len; i++)
2664 0 0 if (data.empty()) return true;
2665 0 0 if (data.size() == 1 && data[0] == '^') return other_addinfo_len > 0 && other_addinfo[0] == '^';
0 0 if (data.size() == 1 && data[0] == '^') return other_addinfo_len > 0 && other_addinfo[0] == '^';
0 0 if (data.size() == 1 && data[0] == '^') return other_addinfo_len > 0 && other_addinfo[0] == '^';
0 0 if (data.size() == 1 && data[0] == '^') return other_addinfo_len > 0 && other_addinfo[0] == '^';
0 0 if (data.size() == 1 && data[0] == '^') return other_addinfo_len > 0 && other_addinfo[0] == '^';
2666 0 0 if (data.size() == 1 && data[0] == '+') return other_addinfo_len == 0;
0 0 if (data.size() == 1 && data[0] == '+') return other_addinfo_len == 0;
0 0 if (data.size() == 1 && data[0] == '+') return other_addinfo_len == 0;
2667 0 0 return data.size() == size_t(other_addinfo_len) && small_memeq(data.data(), other_addinfo, other_addinfo_len);
0 0 return data.size() == size_t(other_addinfo_len) && small_memeq(data.data(), other_addinfo, other_addinfo_len);
2683 0 0 class english_morpho_guesser {
0 0 class english_morpho_guesser {
0 0 class english_morpho_guesser {
0 0 class english_morpho_guesser {
0 0 class english_morpho_guesser {
0 0 class english_morpho_guesser {
0 0 class english_morpho_guesser {
0 0 class english_morpho_guesser {
0 0 class english_morpho_guesser {
0 0 class english_morpho_guesser {
0 0 class english_morpho_guesser {
0 0 class english_morpho_guesser {
0 0 class english_morpho_guesser {
0 0 class english_morpho_guesser {
0 0 class english_morpho_guesser {
0 0 class english_morpho_guesser {
0 0 class english_morpho_guesser {
0 0 class english_morpho_guesser {
0 0 class english_morpho_guesser {
2727 0 0 english_morpho(unsigned version) : version(version) {}
0 0 english_morpho(unsigned version) : version(version) {}
0 0 english_morpho(unsigned version) : version(version) {}
0 0 english_morpho(unsigned version) : version(version) {}
0 0 english_morpho(unsigned version) : version(version) {}
0 0 english_morpho(unsigned version) : version(version) {}
0 0 english_morpho(unsigned version) : version(version) {}
0 0 english_morpho(unsigned version) : version(version) {}
0 0 english_morpho(unsigned version) : version(version) {}
0 0 english_morpho(unsigned version) : version(version) {}
0 0 english_morpho(unsigned version) : version(version) {}
0 0 english_morpho(unsigned version) : version(version) {}
0 0 english_morpho(unsigned version) : version(version) {}
0 0 english_morpho(unsigned version) : version(version) {}
0 0 english_morpho(unsigned version) : version(version) {}
0 0 english_morpho(unsigned version) : version(version) {}
0 0 english_morpho(unsigned version) : version(version) {}
0 0 english_morpho(unsigned version) : version(version) {}
0 0 english_morpho(unsigned version) : version(version) {}
0 0 english_morpho(unsigned version) : version(version) {}
2793 0 0 if (!compressor::load(is, data)) return false;
0 0 if (!compressor::load(is, data)) return false;
2796 0 0 dictionary.load(data);
2797 0 0 morpho_guesser.load(data);
0 0 morpho_guesser.load(data);
2808 0 0 if (form.len) {
2812 0 0 generate_casing_variants(form, form_uclc, form_lc);
2815 0 0 dictionary.analyze(form, lemmas);
2816 0 0 if (!form_uclc.empty()) dictionary.analyze(form_uclc, lemmas);
0 0 if (!form_uclc.empty()) dictionary.analyze(form_uclc, lemmas);
2817 0 0 if (!form_lc.empty()) dictionary.analyze(form_lc, lemmas);
0 0 if (!form_lc.empty()) dictionary.analyze(form_lc, lemmas);
2818 0 0 if (!lemmas.empty())
2819 0 0 return guesser == NO_GUESSER || !morpho_guesser.analyze_proper_names(form, form_lc.empty() ? form : form_lc, lemmas) ? NO_GUESSER : GUESSER;
0 0 return guesser == NO_GUESSER || !morpho_guesser.analyze_proper_names(form, form_lc.empty() ? form : form_lc, lemmas) ? NO_GUESSER : GUESSER;
0 0 return guesser == NO_GUESSER || !morpho_guesser.analyze_proper_names(form, form_lc.empty() ? form : form_lc, lemmas) ? NO_GUESSER : GUESSER;
0 0 return guesser == NO_GUESSER || !morpho_guesser.analyze_proper_names(form, form_lc.empty() ? form : form_lc, lemmas) ? NO_GUESSER : GUESSER;
2822 0 0 analyze_special(form, lemmas);
2823 0 0 if (!lemmas.empty()) return NO_GUESSER;
2826 0 0 if (guesser == GUESSER)
2827 0 0 morpho_guesser.analyze(form, form_lc.empty() ? form : form_lc, lemmas);
0 0 morpho_guesser.analyze(form, form_lc.empty() ? form : form_lc, lemmas);
2828 0 0 if (!lemmas.empty()) return GUESSER;
2831 0 0 lemmas.emplace_back(string(form.str, form.len), unknown_tag);
2840 0 0 if (lemma.len) {
2841 0 0 if (dictionary.generate(lemma, filter, forms))
0 0 if (dictionary.generate(lemma, filter, forms))
2861 0 0 return new english_tokenizer(version <= 2 ? 1 : 2);
2868 0 0 if (!form.len) return;
2871 0 0 if (form.len == 1)
2875 0 0 case '?': lemmas.emplace_back(string(form.str, form.len), dot_tag); return;
2876 0 0 case ',': lemmas.emplace_back(string(form.str, form.len), comma_tag); return;
2877 0 0 case '#': lemmas.emplace_back(string(form.str, form.len), hash_tag); return;
2878 0 0 case '$': lemmas.emplace_back(string(form.str, form.len), dollar_tag); return;
2879 0 0 case '[': lemmas.emplace_back(string(form.str, form.len), sym_tag); return;
2880 0 0 case ']': lemmas.emplace_back(string(form.str, form.len), sym_tag); return;
2881 0 0 case '%': lemmas.emplace_back(string(form.str, form.len), jj_tag);
2882 0 0 lemmas.emplace_back(string(form.str, form.len), nn_tag); return;
2883 0 0 case '&': lemmas.emplace_back(string(form.str, form.len), cc_tag);
2884 0 0 lemmas.emplace_back(string(form.str, form.len), sym_tag); return;
2885 0 0 case '*': lemmas.emplace_back(string(form.str, form.len), sym_tag);
2886 0 0 lemmas.emplace_back(string(form.str, form.len), nn_tag); return;
2887 0 0 case '@': lemmas.emplace_back(string(form.str, form.len), sym_tag);
2888 0 0 lemmas.emplace_back(string(form.str, form.len), in_tag); return;
2889 0 0 case '\'': lemmas.emplace_back(string(form.str, form.len), close_quotation_tag);
2890 0 0 lemmas.emplace_back(string(form.str, form.len), pos_tag); return;
2897 0 0 if (codepoint == '+' || codepoint == '-') codepoint = utf8::decode(number.str, number.len);
2898 0 0 while (unicode::category(codepoint) & unicode::N) any_digit = true, codepoint = utf8::decode(number.str, number.len);
2899 0 0 while (codepoint == ',') {
2901 0 0 if (unicode::category(utf8::decode(group.str, group.len) & ~unicode::N)) break;
2902 0 0 if (unicode::category(utf8::decode(group.str, group.len) & ~unicode::N)) break;
2903 0 0 if (unicode::category(utf8::decode(group.str, group.len) & ~unicode::N)) break;
2908 0 0 if (codepoint == '.' && number.len) {
0 0 if (codepoint == '.' && number.len) {
2910 0 0 while (unicode::category(codepoint) & unicode::N) any_digit = true, codepoint = utf8::decode(number.str, number.len);
2912 0 0 if (version >= 2 && any_digit && codepoint == 's' && !number.len) {
0 0 if (version >= 2 && any_digit && codepoint == 's' && !number.len) {
0 0 if (version >= 2 && any_digit && codepoint == 's' && !number.len) {
2913 0 0 lemmas.emplace_back(string(form.str, form.len), number_tag);
2914 0 0 lemmas.emplace_back(string(form.str, form.len - 1), nns_tag);
2917 0 0 if (any_digit && (codepoint == 'e' || codepoint == 'E')) {
0 0 if (any_digit && (codepoint == 'e' || codepoint == 'E')) {
2919 0 0 if (codepoint == '+' || codepoint == '-') codepoint = utf8::decode(number.str, number.len);
2921 0 0 while (unicode::category(codepoint) & unicode::N) any_digit = true, codepoint = utf8::decode(number.str, number.len);
2923 0 0 if (any_digit && !number.len && (!codepoint || codepoint == '.')) {
0 0 if (any_digit && !number.len && (!codepoint || codepoint == '.')) {
0 0 if (any_digit && !number.len && (!codepoint || codepoint == '.')) {
2924 0 0 lemmas.emplace_back(string(form.str, form.len), number_tag);
2925 0 0 lemmas.emplace_back(string(form.str, form.len), nnp_tag);
2926 0 0 if (form.len == 1 + (codepoint == '.') && *form.str >= '1' && *form.str <= '9')
0 0 if (form.len == 1 + (codepoint == '.') && *form.str >= '1' && *form.str <= '9')
0 0 if (form.len == 1 + (codepoint == '.') && *form.str >= '1' && *form.str <= '9')
0 0 if (form.len == 1 + (codepoint == '.') && *form.str >= '1' && *form.str <= '9')
2927 0 0 lemmas.emplace_back(string(form.str, form.len), ls_tag);
2934 0 0 while ((symbol || any_punctuation) && punctuation.len) {
0 0 while ((symbol || any_punctuation) && punctuation.len) {
2936 0 0 if (open_quotation) open_quotation = codepoint == '`' || unicode::category(codepoint) & unicode::Pi;
0 0 if (open_quotation) open_quotation = codepoint == '`' || unicode::category(codepoint) & unicode::Pi;
0 0 if (open_quotation) open_quotation = codepoint == '`' || unicode::category(codepoint) & unicode::Pi;
2937 0 0 if (close_quotation) close_quotation = codepoint == '\'' || codepoint == '"' || unicode::category(codepoint) & unicode::Pf;
0 0 if (close_quotation) close_quotation = codepoint == '\'' || codepoint == '"' || unicode::category(codepoint) & unicode::Pf;
0 0 if (close_quotation) close_quotation = codepoint == '\'' || codepoint == '"' || unicode::category(codepoint) & unicode::Pf;
2938 0 0 if (open_parenthesis) open_parenthesis = unicode::category(codepoint) & unicode::Ps;
2939 0 0 if (close_parenthesis) close_parenthesis = unicode::category(codepoint) & unicode::Pe;
2940 0 0 if (any_punctuation) any_punctuation = unicode::category(codepoint) & unicode::P;
2941 0 0 if (symbol) symbol = codepoint == '*' || unicode::category(codepoint) & unicode::S;
0 0 if (symbol) symbol = codepoint == '*' || unicode::category(codepoint) & unicode::S;
0 0 if (symbol) symbol = codepoint == '*' || unicode::category(codepoint) & unicode::S;
2943 0 0 if (!punctuation.len && open_quotation) { lemmas.emplace_back(string(form.str, form.len), open_quotation_tag); return; }
0 0 if (!punctuation.len && open_quotation) { lemmas.emplace_back(string(form.str, form.len), open_quotation_tag); return; }
0 0 if (!punctuation.len && open_quotation) { lemmas.emplace_back(string(form.str, form.len), open_quotation_tag); return; }
2944 0 0 if (!punctuation.len && close_quotation) { lemmas.emplace_back(string(form.str, form.len), close_quotation_tag); return; }
0 0 if (!punctuation.len && close_quotation) { lemmas.emplace_back(string(form.str, form.len), close_quotation_tag); return; }
0 0 if (!punctuation.len && close_quotation) { lemmas.emplace_back(string(form.str, form.len), close_quotation_tag); return; }
2945 0 0 if (!punctuation.len && open_parenthesis) { lemmas.emplace_back(string(form.str, form.len), open_parenthesis_tag); return; }
0 0 if (!punctuation.len && open_parenthesis) { lemmas.emplace_back(string(form.str, form.len), open_parenthesis_tag); return; }
0 0 if (!punctuation.len && open_parenthesis) { lemmas.emplace_back(string(form.str, form.len), open_parenthesis_tag); return; }
2946 0 0 if (!punctuation.len && close_parenthesis) { lemmas.emplace_back(string(form.str, form.len), close_parenthesis_tag); return; }
0 0 if (!punctuation.len && close_parenthesis) { lemmas.emplace_back(string(form.str, form.len), close_parenthesis_tag); return; }
0 0 if (!punctuation.len && close_parenthesis) { lemmas.emplace_back(string(form.str, form.len), close_parenthesis_tag); return; }
2947 0 0 if (!punctuation.len && symbol) { lemmas.emplace_back(string(form.str, form.len), sym_tag); return; }
0 0 if (!punctuation.len && symbol) { lemmas.emplace_back(string(form.str, form.len), sym_tag); return; }
0 0 if (!punctuation.len && symbol) { lemmas.emplace_back(string(form.str, form.len), sym_tag); return; }
2948 0 0 if (!punctuation.len && any_punctuation) { lemmas.emplace_back(string(form.str, form.len), punctuation_tag); return; }
0 0 if (!punctuation.len && any_punctuation) { lemmas.emplace_back(string(form.str, form.len), punctuation_tag); return; }
0 0 if (!punctuation.len && any_punctuation) { lemmas.emplace_back(string(form.str, form.len), punctuation_tag); return; }
2978 0 0 while (tags--) {
2980 0 0 exceptions_tags.emplace_back(string(data.next(len), len));
3116 0 0 for (unsigned len = data.next_1B(); len; len--) {
3122 0 0 if (exception) {
3125 0 0 for (unsigned len = data.next_1B(); len; len--) {
3128 0 0 for (unsigned tags = data.next_1B(); tags; tags--)
3129 0 0 lemmas.emplace_back(lemma, exceptions_tags[data.next_2B()]);
3136 0 0 for (unsigned prefix = 1; prefix <= form_lc.len; prefix++) {
3138 0 0 if (!found) break;
3139 0 0 if (found[NEGATION_LEN]) {
3140 0 0 if (form_lc.len - prefix >= found[TO_FOLLOW]) negation_len = found[NEGATION_LEN];
3146 0 0 add(JJ, lemma_lc, negation_len, lemmas);
3147 0 0 add(RB, lemma_lc, negation_len, lemmas);
3148 0 0 add(NN, lemma_lc, negation_len, lemmas);
3149 0 0 add_NNS(lemma_lc, negation_len, lemmas);
3166 0 0 if ( p == ( (form_lc.str + form_lc.len)) )
3173 0 0 if ( _klen > 0 ) {
3178 0 0 if ( _upper < _lower )
3182 0 0 if ( ( form_lc.str[form_lc.len - 1 - (p - form_lc.str)]) < *_mid )
3184 0 0 else if ( ( form_lc.str[form_lc.len - 1 - (p - form_lc.str)]) > *_mid )
3196 0 0 if ( _klen > 0 ) {
3201 0 0 if ( _upper < _lower )
3205 0 0 if ( ( form_lc.str[form_lc.len - 1 - (p - form_lc.str)]) < _mid[0] )
3207 0 0 else if ( ( form_lc.str[form_lc.len - 1 - (p - form_lc.str)]) > _mid[1] )
3221 0 0 if ( _tag_guesser_trans_actions[_trans] == 0 )
3226 0 0 while ( _nacts-- > 0 )
3231 0 0 { if (!added_JJR_RBR) added_JJR_RBR = true, add_JJR_RBR(lemma_lc, negation_len, lemmas); }
0 0 { if (!added_JJR_RBR) added_JJR_RBR = true, add_JJR_RBR(lemma_lc, negation_len, lemmas); }
3234 0 0 { if (!added_JJS_RBS) added_JJS_RBS = true, add_JJS_RBS(lemma_lc, negation_len, lemmas); }
0 0 { if (!added_JJS_RBS) added_JJS_RBS = true, add_JJS_RBS(lemma_lc, negation_len, lemmas); }
3237 0 0 { add_VBG(lemma_lc, lemmas); }
3240 0 0 { add_VBD_VBN(lemma_lc, lemmas); }
3243 0 0 { add_VBZ(lemma_lc, lemmas); }
3249 0 0 { if (!added_SYM) added_SYM = true, add(SYM, lemma_lc, lemmas); }
3252 0 0 { if (!added_CD) added_CD = true, add(CD, lemma_lc, lemmas); }
3258 0 0 if ( ++p != ( (form_lc.str + form_lc.len)) )
3261 0 0 if ( p == ( (form_lc.str + form_lc.len)) )
3265 0 0 while ( __nacts-- > 0 ) {
3266 0 0 switch ( *__acts++ ) {
3268 0 0 { if (!added_CD) added_CD = true, add(CD, lemma_lc, lemmas); }
3284 0 0 bool is_NNP = form.str != form_lc.str || (form.len && (*form.str == '\'' || (*form.str >= '0' && *form.str <= '9')));
0 0 bool is_NNP = form.str != form_lc.str || (form.len && (*form.str == '\'' || (*form.str >= '0' && *form.str <= '9')));
0 0 bool is_NNP = form.str != form_lc.str || (form.len && (*form.str == '\'' || (*form.str >= '0' && *form.str <= '9')));
0 0 bool is_NNP = form.str != form_lc.str || (form.len && (*form.str == '\'' || (*form.str >= '0' && *form.str <= '9')));
3286 0 0 if (!is_NNP && !is_NNPS) return false;
3289 0 0 for (auto&& lemma : lemmas) {
3293 0 0 if (!((is_NNP && !was_NNP) || (is_NNPS && !was_NNPS))) return false;
0 0 if (!((is_NNP && !was_NNP) || (is_NNPS && !was_NNPS))) return false;
3296 0 0 if (is_NNP && !was_NNP) add(NNP, lemma, lemmas);
3297 0 0 if (is_NNPS && !was_NNPS) add_NNPS(lemma, lemmas);
0 0 if (is_NNPS && !was_NNPS) add_NNPS(lemma, lemmas);
3302 0 0 lemmas.emplace_back(form, tag);
0 0 lemmas.emplace_back(form, tag);
0 0 lemmas.emplace_back(form, tag);
0 0 lemmas.emplace_back(form, tag);
0 0 lemmas.emplace_back(form, tag);
0 0 lemmas.emplace_back(form, tag);
0 0 lemmas.emplace_back(form, tag);
0 0 lemmas.emplace_back(form, tag);
0 0 lemmas.emplace_back(form, tag);
0 0 lemmas.emplace_back(form, tag);
0 0 lemmas.emplace_back(form, tag);
0 0 lemmas.emplace_back(form, tag);
3311 0 0 lemmas.emplace_back(negation_len ? form.substr(negation_len) + "^" + form.substr(0, negation_len) : form, tag);
0 0 lemmas.emplace_back(negation_len ? form.substr(negation_len) + "^" + form.substr(0, negation_len) : form, tag);
0 0 lemmas.emplace_back(negation_len ? form.substr(negation_len) + "^" + form.substr(0, negation_len) : form, tag);
0 0 lemmas.emplace_back(negation_len ? form.substr(negation_len) + "^" + form.substr(0, negation_len) : form, tag);
0 0 lemmas.emplace_back(negation_len ? form.substr(negation_len) + "^" + form.substr(0, negation_len) : form, tag);
0 0 lemmas.emplace_back(negation_len ? form.substr(negation_len) + "^" + form.substr(0, negation_len) : form, tag);
0 0 lemmas.emplace_back(negation_len ? form.substr(negation_len) + "^" + form.substr(0, negation_len) : form, tag);
0 0 lemmas.emplace_back(negation_len ? form.substr(negation_len) + "^" + form.substr(0, negation_len) : form, tag);
0 0 lemmas.emplace_back(negation_len ? form.substr(negation_len) + "^" + form.substr(0, negation_len) : form, tag);
0 0 lemmas.emplace_back(negation_len ? form.substr(negation_len) + "^" + form.substr(0, negation_len) : form, tag);
0 0 lemmas.emplace_back(negation_len ? form.substr(negation_len) + "^" + form.substr(0, negation_len) : form, tag);
0 0 lemmas.emplace_back(negation_len ? form.substr(negation_len) + "^" + form.substr(0, negation_len) : form, tag);
3413 0 0 if ( p == ( (form.c_str() + form.size())) )
3422 0 0 if ( _klen > 0 ) {
3427 0 0 if ( _upper < _lower )
3431 0 0 if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) < *_mid )
3433 0 0 else if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) > *_mid )
3445 0 0 if ( _klen > 0 ) {
3450 0 0 if ( _upper < _lower )
3454 0 0 if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) < _mid[0] )
3456 0 0 else if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) > _mid[1] )
3470 0 0 if ( _NNS_trans_actions[_trans] == 0 )
3475 0 0 while ( _nacts-- > 0 )
3480 0 0 { if (best > 'a') best = 'a', remove = 2, append = "an"; }
3483 0 0 { if (best > 'b') best = 'b', remove = 1, append = nullptr; }
3486 0 0 { if (best > 'c') best = 'c', remove = 3, append = "fe"; }
3489 0 0 { if (best > 'd') best = 'd', remove = 2, append = nullptr; }
3492 0 0 { if (best > 'e') best = 'e', remove = 1, append = nullptr; }
3495 0 0 { if (best > 'f') best = 'f', remove = 2, append = nullptr; }
3498 0 0 { if (best > 'g') best = 'g', remove = 1, append = nullptr; }
3501 0 0 { if (best > 'h') best = 'h', remove = 2, append = nullptr; }
3504 0 0 { if (best > 'i') best = 'i', remove = 1, append = nullptr; }
3507 0 0 { if (best > 'j') best = 'j', remove = 1, append = nullptr; }
3510 0 0 { if (best > 'k') best = 'k', remove = 2, append = nullptr; }
3513 0 0 { if (best > 'l') best = 'l', remove = 3, append = "y"; }
3516 0 0 { if (best > 'm') best = 'm', remove = 2, append = nullptr; }
3519 0 0 { if (best > 'n') best = 'n', remove = 1, append = nullptr; }
3525 0 0 if ( cs == 0 )
3527 0 0 if ( ++p != ( (form.c_str() + form.size())) )
3533 0 0 add(NNS, form.substr(0, form.size() - remove).append(append ? append : ""), negation_len, lemmas);
0 0 add(NNS, form.substr(0, form.size() - remove).append(append ? append : ""), negation_len, lemmas);
0 0 add(NNS, form.substr(0, form.size() - remove).append(append ? append : ""), negation_len, lemmas);
3659 0 0 if ( p == ( (form.c_str() + form.size())) )
3668 0 0 if ( _klen > 0 ) {
3673 0 0 if ( _upper < _lower )
3677 0 0 if ( ( form[form.size() - 1 - (p - form.c_str())]) < *_mid )
3679 0 0 else if ( ( form[form.size() - 1 - (p - form.c_str())]) > *_mid )
3691 0 0 if ( _klen > 0 ) {
3696 0 0 if ( _upper < _lower )
3700 0 0 if ( ( form[form.size() - 1 - (p - form.c_str())]) < _mid[0] )
3702 0 0 else if ( ( form[form.size() - 1 - (p - form.c_str())]) > _mid[1] )
3716 0 0 if ( _NNPS_trans_actions[_trans] == 0 )
3721 0 0 while ( _nacts-- > 0 )
3726 0 0 { if (best > 'a') best = 'a', remove = 2, append = "AN"; }
3729 0 0 { if (best > 'b') best = 'b', remove = 2, append = "an"; }
3732 0 0 { if (best > 'c') best = 'c', remove = 1, append = nullptr; }
3735 0 0 { if (best > 'd') best = 'd', remove = 3, append = "FE"; }
3738 0 0 { if (best > 'e') best = 'e', remove = 3, append = "fe"; }
3741 0 0 { if (best > 'f') best = 'f', remove = 2, append = nullptr; }
3744 0 0 { if (best > 'g') best = 'g', remove = 1, append = nullptr; }
3747 0 0 { if (best > 'h') best = 'h', remove = 2, append = nullptr; }
3750 0 0 { if (best > 'i') best = 'i', remove = 1, append = nullptr; }
3753 0 0 { if (best > 'j') best = 'j', remove = 2, append = nullptr; }
3756 0 0 { if (best > 'k') best = 'k', remove = 1, append = nullptr; }
3759 0 0 { if (best > 'l') best = 'l', remove = 1, append = nullptr; }
3762 0 0 { if (best > 'm') best = 'm', remove = 2, append = nullptr; }
3765 0 0 { if (best > 'n') best = 'n', remove = 3, append = "Y"; }
3768 0 0 { if (best > 'o') best = 'o', remove = 3, append = "y"; }
3771 0 0 { if (best > 'p') best = 'p', remove = 2, append = nullptr; }
3774 0 0 { if (best > 'q') best = 'q', remove = 1, append = nullptr; }
3780 0 0 if ( cs == 0 )
3782 0 0 if ( ++p != ( (form.c_str() + form.size())) )
3788 0 0 add(NNPS, form.substr(0, form.size() - remove).append(append ? append : ""), lemmas);
0 0 add(NNPS, form.substr(0, form.size() - remove).append(append ? append : ""), lemmas);
4088 0 0 if ( p == ( (form.c_str() + form.size())) )
4097 0 0 if ( _klen > 0 ) {
4102 0 0 if ( _upper < _lower )
4106 0 0 if ( ( form[form.size() - 1 - (p - form.c_str())]) < *_mid )
4108 0 0 else if ( ( form[form.size() - 1 - (p - form.c_str())]) > *_mid )
4120 0 0 if ( _klen > 0 ) {
4125 0 0 if ( _upper < _lower )
4129 0 0 if ( ( form[form.size() - 1 - (p - form.c_str())]) < _mid[0] )
4131 0 0 else if ( ( form[form.size() - 1 - (p - form.c_str())]) > _mid[1] )
4145 0 0 if ( _VBG_trans_actions[_trans] == 0 )
4150 0 0 while ( _nacts-- > 0 )
4155 0 0 { if (best > 'a') best = 'a', remove = 3, append = nullptr; }
4158 0 0 { if (best > 'b') best = 'b', remove = 3, append = "e"; }
4161 0 0 { if (best > 'c') best = 'c', remove = 3, append = nullptr; }
4164 0 0 { if (best > 'd') best = 'd', remove = 3, append = "e"; }
4167 0 0 { if (best > 'e') best = 'e', remove = 3, append = nullptr; }
4170 0 0 { if (best > 'f') best = 'f', remove = 3, append = "e"; }
4173 0 0 { if (best > 'g') best = 'g', remove = 3, append = nullptr; }
4176 0 0 { if (best > 'h') best = 'h', remove = 3, append = "e"; }
4179 0 0 { if (best > 'i') best = 'i', remove = 3, append = nullptr; }
4182 0 0 { if (best > 'j') best = 'j', remove = 3, append = "e"; }
4185 0 0 { if (best > 'k') best = 'k', remove = 3, append = nullptr; }
4188 0 0 { if (best > 'l') best = 'l', remove = 3, append = "e"; }
4191 0 0 { if (best > 'm') best = 'm', remove = 3, append = nullptr; }
4194 0 0 { if (best > 'n') best = 'n', remove = 3, append = "e"; }
4197 0 0 { if (best > 'o') best = 'o', remove = 3, append = nullptr; }
4200 0 0 { if (best > 'p') best = 'p', remove = 3, append = "e"; }
4203 0 0 { if (best > 'q') best = 'q', remove = 3, append = nullptr; }
4206 0 0 { if (best > 'r') best = 'r', remove = 3, append = "e"; }
4212 0 0 if ( cs == 0 )
4214 0 0 if ( ++p != ( (form.c_str() + form.size())) )
4217 0 0 if ( p == ( (form.c_str() + form.size())) )
4221 0 0 while ( __nacts-- > 0 ) {
4224 0 0 { if (best > 'c') best = 'c', remove = 3, append = nullptr; }
4227 0 0 { if (best > 'f') best = 'f', remove = 3, append = "e"; }
4230 0 0 { if (best > 'p') best = 'p', remove = 3, append = "e"; }
4239 0 0 add(VBG, form.substr(0, form.size() - remove).append(append ? append : ""), lemmas);
0 0 add(VBG, form.substr(0, form.size() - remove).append(append ? append : ""), lemmas);
4542 0 0 if ( p == ( (form.c_str() + form.size())) )
4551 0 0 if ( _klen > 0 ) {
4556 0 0 if ( _upper < _lower )
4560 0 0 if ( ( form[form.size() - 1 - (p - form.c_str())]) < *_mid )
4562 0 0 else if ( ( form[form.size() - 1 - (p - form.c_str())]) > *_mid )
4574 0 0 if ( _klen > 0 ) {
4579 0 0 if ( _upper < _lower )
4583 0 0 if ( ( form[form.size() - 1 - (p - form.c_str())]) < _mid[0] )
4585 0 0 else if ( ( form[form.size() - 1 - (p - form.c_str())]) > _mid[1] )
4599 0 0 if ( _VBD_VBN_trans_actions[_trans] == 0 )
4604 0 0 while ( _nacts-- > 0 )
4609 0 0 { if (best > 'a') best = 'a', remove = 1, append = nullptr; }
4612 0 0 { if (best > 'b') best = 'b', remove = 2, append = nullptr; }
4615 0 0 { if (best > 'c') best = 'c', remove = 1, append = nullptr; }
4618 0 0 { if (best > 'd') best = 'd', remove = 2, append = nullptr; }
4621 0 0 { if (best > 'e') best = 'e', remove = 1, append = nullptr; }
4624 0 0 { if (best > 'f') best = 'f', remove = 2, append = nullptr; }
4627 0 0 { if (best > 'h') best = 'h', remove = 2, append = nullptr; }
4630 0 0 { if (best > 'i') best = 'i', remove = 3, append = "y"; }
4633 0 0 { if (best > 'j') best = 'j', remove = 1, append = nullptr; }
4636 0 0 { if (best > 'k') best = 'k', remove = 2, append = nullptr; }
4639 0 0 { if (best > 'l') best = 'l', remove = 1, append = nullptr; }
4642 0 0 { if (best > 'm') best = 'm', remove = 2, append = nullptr; }
4645 0 0 { if (best > 'n') best = 'n', remove = 1, append = nullptr; }
4648 0 0 { if (best > 'o') best = 'o', remove = 2, append = nullptr; }
4651 0 0 { if (best > 'p') best = 'p', remove = 1, append = nullptr; }
4654 0 0 { if (best > 'q') best = 'q', remove = 2, append = nullptr; }
4657 0 0 { if (best > 'r') best = 'r', remove = 1, append = nullptr; }
4663 0 0 if ( cs == 0 )
4665 0 0 if ( ++p != ( (form.c_str() + form.size())) )
4668 0 0 if ( p == ( (form.c_str() + form.size())) )
4672 0 0 while ( __nacts-- > 0 ) {
4675 0 0 { if (best > 'd') best = 'd', remove = 2, append = nullptr; }
4678 0 0 { if (best > 'g') best = 'g', remove = 1, append = nullptr; }
4681 0 0 { if (best > 'j') best = 'j', remove = 1, append = nullptr; }
4690 0 0 add(VBD, VBN, form.substr(0, form.size() - remove).append(append ? append : ""), lemmas);
0 0 add(VBD, VBN, form.substr(0, form.size() - remove).append(append ? append : ""), lemmas);
4769 0 0 if ( p == ( (form.c_str() + form.size())) )
4778 0 0 if ( _klen > 0 ) {
4783 0 0 if ( _upper < _lower )
4787 0 0 if ( ( form[form.size() - 1 - (p - form.c_str())]) < *_mid )
4789 0 0 else if ( ( form[form.size() - 1 - (p - form.c_str())]) > *_mid )
4801 0 0 if ( _klen > 0 ) {
4806 0 0 if ( _upper < _lower )
4810 0 0 if ( ( form[form.size() - 1 - (p - form.c_str())]) < _mid[0] )
4812 0 0 else if ( ( form[form.size() - 1 - (p - form.c_str())]) > _mid[1] )
4826 0 0 if ( _VBZ_trans_actions[_trans] == 0 )
4831 0 0 while ( _nacts-- > 0 )
4836 0 0 { if (best > 'a') best = 'a', remove = 1, append = nullptr; }
4839 0 0 { if (best > 'b') best = 'b', remove = 2, append = nullptr; }
4842 0 0 { if (best > 'c') best = 'c', remove = 1, append = nullptr; }
4845 0 0 { if (best > 'd') best = 'd', remove = 2, append = nullptr; }
4848 0 0 { if (best > 'e') best = 'e', remove = 1, append = nullptr; }
4851 0 0 { if (best > 'f') best = 'f', remove = 2, append = nullptr; }
4854 0 0 { if (best > 'g') best = 'g', remove = 3, append = "y"; }
4857 0 0 { if (best > 'h') best = 'h', remove = 2, append = nullptr; }
4860 0 0 { if (best > 'i') best = 'i', remove = 1, append = nullptr; }
4866 0 0 if ( cs == 0 )
4868 0 0 if ( ++p != ( (form.c_str() + form.size())) )
4874 0 0 add(VBZ, form.substr(0, form.size() - remove).append(append ? append : ""), lemmas);
0 0 add(VBZ, form.substr(0, form.size() - remove).append(append ? append : ""), lemmas);
5000 0 0 if ( p == ( (form.c_str() + form.size())) )
5009 0 0 if ( _klen > 0 ) {
5014 0 0 if ( _upper < _lower )
5018 0 0 if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) < *_mid )
5020 0 0 else if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) > *_mid )
5032 0 0 if ( _klen > 0 ) {
5037 0 0 if ( _upper < _lower )
5041 0 0 if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) < _mid[0] )
5043 0 0 else if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) > _mid[1] )
5057 0 0 if ( _JJR_RBR_trans_actions[_trans] == 0 )
5062 0 0 while ( _nacts-- > 0 )
5067 0 0 { if (best > 'a') best = 'a', remove = 2, append = nullptr; }
5070 0 0 { if (best > 'b') best = 'b', remove = 3, append = nullptr; }
5073 0 0 { if (best > 'c') best = 'c', remove = 3, append = "y"; }
5076 0 0 { if (best > 'd') best = 'd', remove = 2, append = nullptr; }
5079 0 0 { if (best > 'e') best = 'e', remove = 1, append = nullptr; }
5082 0 0 { if (best > 'f') best = 'f', remove = 2, append = nullptr; }
5088 0 0 if ( cs == 0 )
5090 0 0 if ( ++p != ( (form.c_str() + form.size())) )
5096 0 0 add(JJR, RBR, form.substr(0, form.size() - remove).append(append ? append : ""), negation_len, lemmas);
0 0 add(JJR, RBR, form.substr(0, form.size() - remove).append(append ? append : ""), negation_len, lemmas);
0 0 add(JJR, RBR, form.substr(0, form.size() - remove).append(append ? append : ""), negation_len, lemmas);
5226 0 0 if ( p == ( (form.c_str() + form.size())) )
5235 0 0 if ( _klen > 0 ) {
5240 0 0 if ( _upper < _lower )
5244 0 0 if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) < *_mid )
5246 0 0 else if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) > *_mid )
5258 0 0 if ( _klen > 0 ) {
5263 0 0 if ( _upper < _lower )
5267 0 0 if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) < _mid[0] )
5269 0 0 else if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) > _mid[1] )
5283 0 0 if ( _JJS_RBS_trans_actions[_trans] == 0 )
5288 0 0 while ( _nacts-- > 0 )
5293 0 0 { if (best > 'a') best = 'a', remove = 3, append = nullptr; }
5296 0 0 { if (best > 'b') best = 'b', remove = 4, append = nullptr; }
5299 0 0 { if (best > 'c') best = 'c', remove = 4, append = "y"; }
5302 0 0 { if (best > 'd') best = 'd', remove = 3, append = nullptr; }
5305 0 0 { if (best > 'e') best = 'e', remove = 2, append = nullptr; }
5308 0 0 { if (best > 'f') best = 'f', remove = 3, append = nullptr; }
5314 0 0 if ( cs == 0 )
5316 0 0 if ( ++p != ( (form.c_str() + form.size())) )
5322 0 0 add(JJS, RBS, form.substr(0, form.size() - remove).append(append ? append : ""), negation_len, lemmas);
0 0 add(JJS, RBS, form.substr(0, form.size() - remove).append(append ? append : ""), negation_len, lemmas);
0 0 add(JJS, RBS, form.substr(0, form.size() - remove).append(append ? append : ""), negation_len, lemmas);
5393 0 0 if (!compressor::load(is, data)) return false;
0 0 if (!compressor::load(is, data)) return false;
5397 0 0 unsigned length = data.next_1B();
5398 0 0 unknown_tag.assign(data.next(length), length);
0 0 unknown_tag.assign(data.next(length), length);
5409 0 0 if (form.len) {
5412 0 0 while (lemmatags.len && *lemmatags.str != ' ') lemmatags.len--, lemmatags.str++;
0 0 while (lemmatags.len && *lemmatags.str != ' ') lemmatags.len--, lemmatags.str++;
5413 0 0 if (lemmatags.len) lemmatags.len--, lemmatags.str++;
5416 0 0 while (lemmatags.len) {
5418 0 0 while (lemmatags.len && *lemmatags.str != ' ') lemmatags.len--, lemmatags.str++;
0 0 while (lemmatags.len && *lemmatags.str != ' ') lemmatags.len--, lemmatags.str++;
5419 0 0 if (!lemmatags.len) break;
5424 0 0 while (lemmatags.len && *lemmatags.str != ' ') lemmatags.len--, lemmatags.str++;
0 0 while (lemmatags.len && *lemmatags.str != ' ') lemmatags.len--, lemmatags.str++;
5426 0 0 if (lemmatags.len) lemmatags.len--, lemmatags.str++;
5428 0 0 lemmas.emplace_back(string(lemma_start, lemma_len), string(tag_start, tag_len));
5431 0 0 if (!lemmas.empty()) return NO_GUESSER;
5434 0 0 lemmas.emplace_back(string(form.str, form.len), unknown_tag);
5443 0 0 if (lemma.len) {
5446 0 0 while (formtags.len && *formtags.str != ' ') formtags.len--, formtags.str++;
0 0 while (formtags.len && *formtags.str != ' ') formtags.len--, formtags.str++;
5448 0 0 if (formtags.len) formtags.len--, formtags.str++;
5452 0 0 while (formtags.len) {
5454 0 0 while (formtags.len && *formtags.str != ' ') formtags.len--, formtags.str++;
0 0 while (formtags.len && *formtags.str != ' ') formtags.len--, formtags.str++;
5455 0 0 if (!formtags.len) break;
5460 0 0 while (formtags.len && *formtags.str != ' ') formtags.len--, formtags.str++;
0 0 while (formtags.len && *formtags.str != ' ') formtags.len--, formtags.str++;
5462 0 0 if (formtags.len) formtags.len--, formtags.str++;
5466 0 0 if (filter.matches(tag.c_str())) {
5467 0 0 if (forms.empty()) forms.emplace_back(string(real_lemma.str, real_lemma.len));
0 0 if (forms.empty()) forms.emplace_back(string(real_lemma.str, real_lemma.len));
5468 0 0 forms.back().forms.emplace_back(string(form_start, form_len), tag);
5472 0 0 if (any_result) return NO_GUESSER;
5480 0 0 while (lemma_len < lemma.len && lemma.str[lemma_len] != ' ') lemma_len++;
0 0 while (lemma_len < lemma.len && lemma.str[lemma_len] != ' ') lemma_len++;
5486 0 0 while (lemma_len < lemma.len && lemma.str[lemma_len] != ' ') lemma_len++;
0 0 while (lemma_len < lemma.len && lemma.str[lemma_len] != ' ') lemma_len++;
5492 0 0 while (form_len < form.len && form.str[form_len] != ' ') form_len++;
0 0 while (form_len < form.len && form.str[form_len] != ' ') form_len++;
5601 2 0 if (!compressor::load(is, data)) return false;
2 0 if (!compressor::load(is, data)) return false;
5605 2 0 unsigned length = data.next_1B();
5606 2 0 unknown_tag.assign(data.next(length), length);
5607 2 0 length = data.next_1B();
5608 2 0 number_tag.assign(data.next(length), length);
5609 2 0 length = data.next_1B();
5610 2 0 punctuation_tag.assign(data.next(length), length);
5611 2 0 length = data.next_1B();
5612 2 0 symbol_tag.assign(data.next(length), length);
5615 2 0 dictionary.load(data);
5619 2 0 if (data.next_1B()) {
0 2 if (data.next_1B()) {
5620 0 0 statistical_guesser.reset(new morpho_statistical_guesser());
5621 0 0 statistical_guesser->load(data);
5622 0 0 }
5633 13 0 if (form.len) {
5637 13 0 generate_casing_variants(form, form_uclc, form_lc);
5640 13 0 dictionary.analyze(form, lemmas);
5641 0 13 if (!form_uclc.empty()) dictionary.analyze(form_uclc, lemmas);
0 0 if (!form_uclc.empty()) dictionary.analyze(form_uclc, lemmas);
5642 2 11 if (!form_lc.empty()) dictionary.analyze(form_lc, lemmas);
2 0 if (!form_lc.empty()) dictionary.analyze(form_lc, lemmas);
5643 7 6 if (!lemmas.empty()) return NO_GUESSER;
5646 7 0 analyze_special(form, lemmas);
5647 1 6 if (!lemmas.empty()) return NO_GUESSER;
5650 0 1 if (guesser == GUESSER && statistical_guesser) {
0 0 if (guesser == GUESSER && statistical_guesser) {
0 1 if (guesser == GUESSER && statistical_guesser) {
5651 0 0 if (form_uclc.empty() && form_lc.empty())
0 0 if (form_uclc.empty() && form_lc.empty())
0 0 if (form_uclc.empty() && form_lc.empty())
5652 0 0 statistical_guesser->analyze(form, lemmas, nullptr);
5654 0 0 morpho_statistical_guesser::used_rules used_rules; used_rules.reserve(3);
5655 0 0 statistical_guesser->analyze(form, lemmas, &used_rules);
5656 0 0 if (!form_uclc.empty()) statistical_guesser->analyze(form_uclc, lemmas, &used_rules);
0 0 if (!form_uclc.empty()) statistical_guesser->analyze(form_uclc, lemmas, &used_rules);
5657 0 0 if (!form_lc.empty()) statistical_guesser->analyze(form_lc, lemmas, &used_rules);
0 0 if (!form_lc.empty()) statistical_guesser->analyze(form_lc, lemmas, &used_rules);
5660 1 0 if (!lemmas.empty()) return GUESSER;
5663 1 0 lemmas.emplace_back(string(form.str, form.len), unknown_tag);
5672 4 0 if (lemma.len) {
5673 4 0 if (dictionary.generate(lemma, filter, forms))
0 4 if (dictionary.generate(lemma, filter, forms))
5703 7 0 if (!form.len) return;
5711 0 7 if (codepoint == '+' || codepoint == '-') codepoint = utf8::decode(number.str, number.len);
5712 1 7 while (unicode::category(codepoint) & unicode::N) any_digit = true, codepoint = utf8::decode(number.str, number.len);
5713 3 4 if ((codepoint == '.' && number.len) || codepoint == ',') codepoint = utf8::decode(number.str, number.len);
2 1 if ((codepoint == '.' && number.len) || codepoint == ',') codepoint = utf8::decode(number.str, number.len);
0 6 if ((codepoint == '.' && number.len) || codepoint == ',') codepoint = utf8::decode(number.str, number.len);
5714 1 7 while (unicode::category(codepoint) & unicode::N) any_digit = true, codepoint = utf8::decode(number.str, number.len);
5715 1 6 if (any_digit && (codepoint == 'e' || codepoint == 'E')) {
1 0 if (any_digit && (codepoint == 'e' || codepoint == 'E')) {
5717 1 0 if (codepoint == '+' || codepoint == '-') codepoint = utf8::decode(number.str, number.len);
5719 2 1 while (unicode::category(codepoint) & unicode::N) any_digit = true, codepoint = utf8::decode(number.str, number.len);
5722 1 6 if (any_digit && !number.len && (!codepoint || codepoint == '.')) {
1 0 if (any_digit && !number.len && (!codepoint || codepoint == '.')) {
1 0 if (any_digit && !number.len && (!codepoint || codepoint == '.')) {
5723 1 0 lemmas.emplace_back(string(form.str, form.len), number_tag);
5730 12 6 while (form.len) {
5732 6 6 punctuation = punctuation && unicode::category(codepoint) & unicode::P;
2 4 punctuation = punctuation && unicode::category(codepoint) & unicode::P;
5733 6 6 symbol = symbol && unicode::category(codepoint) & unicode::S;
1 5 symbol = symbol && unicode::category(codepoint) & unicode::S;
5735 4 2 if (punctuation)
5736 4 0 lemmas.emplace_back(string(form_ori.str, form_ori.len), punctuation_tag);
5737 1 1 else if (symbol)
5738 1 0 lemmas.emplace_back(string(form_ori.str, form_ori.len), symbol_tag);
5796 0 0 return unique_ptr(new T(std::forward(args)...));
1 0 return unique_ptr(new T(std::forward(args)...));
0 0 return unique_ptr(new T(std::forward(args)...));
5918 0 0 if (res->load(is)) return res.release();
0 0 if (res->load(is)) return res.release();
5927 0 0 3);
0 0 3);
5928 0 0 if (res->load(is)) return res.release();
0 0 if (res->load(is)) return res.release();
5934 0 0 if (res->load(is)) return res.release();
0 0 if (res->load(is)) return res.release();
5940 2 0 if (res->load(is)) return res.release();
2 0 if (res->load(is)) return res.release();
5946 0 0 if (res->load(is)) return res.release();
0 0 if (res->load(is)) return res.release();
5952 0 0 if (!derinet->load(is)) return nullptr;
0 0 if (!derinet->load(is)) return nullptr;
5954 0 0 unique_ptr dictionary(load(is));
5955 0 0 if (!dictionary) return nullptr;
5966 1 0 ifstream f(path_from_utf8(fname).c_str(), ifstream::binary);
5967 1 0 if (!f) return nullptr;
5969 1 0 return load(f);
5992 0 0 for (auto&& tag : tags) {
5994 0 0 for (unsigned i = 0; i < tag.size(); i++)
6005 0 0 if (!used) return false;
6007 0 0 for (auto&& used_rule : *used)
6008 0 0 if (used_rule == rule)
6020 0 0 string rule_label; rule_label.reserve(12);
6022 0 0 for (; suffix_len < form.len; suffix_len++) {
6023 0 0 rule_label.push_back(form.str[form.len - (suffix_len + 1)]);
6024 0 0 if (!rules.at(rule_label.c_str(), rule_label.size(), [](pointer_decoder& data){ data.next(data.next_2B()); }))
6028 0 0 for (suffix_len++; suffix_len--; ) {
6030 0 0 rule_label.push_back(' ');
6034 0 0 for (unsigned prefix_len = 0; prefix_len + suffix_len <= form.len; prefix_len++) {
6035 0 0 if (prefix_len) rule_label.push_back(form.str[prefix_len - 1]);
0 0 if (prefix_len) rule_label.push_back(form.str[prefix_len - 1]);
6037 0 0 if (!found) break;
6038 0 0 if (*(found += sizeof(uint16_t))) {
6044 0 0 if (rule) {
6046 0 0 if (rule_label.size() > 1 && !contains(used, rule_label)) { // ignore rule ' '
0 0 if (rule_label.size() > 1 && !contains(used, rule_label)) { // ignore rule ' '
0 0 if (rule_label.size() > 1 && !contains(used, rule_label)) { // ignore rule ' '
6047 0 0 if (used) used->push_back(rule_label);
0 0 if (used) used->push_back(rule_label);
6048 0 0 for (int rules_len = *rule++; rules_len; rules_len--) {
6055 0 0 if (pref_del_len + suff_del_len > form.len ||
0 0 if (pref_del_len + suff_del_len > form.len ||
6056 0 0 (pref_del_len && !small_memeq(pref_del, form.str, pref_del_len)) ||
0 0 (pref_del_len && !small_memeq(pref_del, form.str, pref_del_len)) ||
6057 0 0 (suff_del_len && !small_memeq(suff_del, form.str + form.len - suff_del_len, suff_del_len)) ||
0 0 (suff_del_len && !small_memeq(suff_del, form.str + form.len - suff_del_len, suff_del_len)) ||
0 0 (suff_del_len && !small_memeq(suff_del, form.str + form.len - suff_del_len, suff_del_len)) ||
6062 0 0 lemma.reserve(form.len + pref_add_len - pref_del_len + suff_add_len - suff_del_len);
6063 0 0 if (pref_add_len) lemma.append(pref_add, pref_add_len);
0 0 if (pref_add_len) lemma.append(pref_add, pref_add_len);
6064 0 0 if (pref_del_len + suff_del_len < form.len) lemma.append(form.str + pref_del_len, form.len - pref_del_len - suff_del_len);
0 0 if (pref_del_len + suff_del_len < form.len) lemma.append(form.str + pref_del_len, form.len - pref_del_len - suff_del_len);
6065 0 0 if (suff_add_len) lemma.append(suff_add, suff_add_len);
0 0 if (suff_add_len) lemma.append(suff_add, suff_add_len);
6066 0 0 while (tags_len--)
6067 0 0 lemmas.emplace_back(lemma, this->tags[unaligned_load_inc(tags)]);
6075 0 0 if (lemmas.size() == lemmas_initial_size)
6076 0 0 if (!contains(used, string())) {
6077 0 0 if (used) used->push_back(string());
6078 0 0 lemmas.emplace_back(string(form.str, form.len), tags[default_tag]);
6096 4 0 if (!filter) return;
6098 4 0 wildcard.assign(filter);
6101 6 4 for (int tag_pos = 0, filter_pos = 0; filter[filter_pos]; tag_pos++, filter_pos++) {
6102 5 1 if (filter[filter_pos] == '?') continue;
6103 3 2 if (filter[filter_pos] == '[') {
6107 1 2 if (filter[filter_pos] == '^') negate = true, filter_pos++;
6110 7 0 for (bool first = true; filter[filter_pos] && (first || filter[filter_pos] != ']'); first = false)
4 3 for (bool first = true; filter[filter_pos] && (first || filter[filter_pos] != ']'); first = false)
1 3 for (bool first = true; filter[filter_pos] && (first || filter[filter_pos] != ']'); first = false)
6113 3 0 filters.emplace_back(tag_pos, negate, chars_start, filter_pos - chars_start);
6114 3 0 if (!filter[filter_pos]) break;
6116 2 0 filters.emplace_back(tag_pos, false, filter_pos, 1);
6201 0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
9 4 return it ? unaligned_load(it) : elementary_feature_unknown;
0 13 return it ? unaligned_load(it) : elementary_feature_unknown;
0 13 return it ? unaligned_load(it) : elementary_feature_unknown;
0 13 return it ? unaligned_load(it) : elementary_feature_unknown;
0 2 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
6 0 return it ? unaligned_load(it) : elementary_feature_unknown;
6 0 return it ? unaligned_load(it) : elementary_feature_unknown;
3 0 return it ? unaligned_load(it) : elementary_feature_unknown;
3 0 return it ? unaligned_load(it) : elementary_feature_unknown;
3 0 return it ? unaligned_load(it) : elementary_feature_unknown;
3 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 3 return it ? unaligned_load(it) : elementary_feature_unknown;
0 3 return it ? unaligned_load(it) : elementary_feature_unknown;
0 3 return it ? unaligned_load(it) : elementary_feature_unknown;
0 3 return it ? unaligned_load(it) : elementary_feature_unknown;
0 3 return it ? unaligned_load(it) : elementary_feature_unknown;
3 0 return it ? unaligned_load(it) : elementary_feature_unknown;
3 0 return it ? unaligned_load(it) : elementary_feature_unknown;
3 0 return it ? unaligned_load(it) : elementary_feature_unknown;
3 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 3 return it ? unaligned_load(it) : elementary_feature_unknown;
0 3 return it ? unaligned_load(it) : elementary_feature_unknown;
0 3 return it ? unaligned_load(it) : elementary_feature_unknown;
0 3 return it ? unaligned_load(it) : elementary_feature_unknown;
0 3 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
6209 1 0 if (!compressor::load(is, data)) return false;
1 0 if (!compressor::load(is, data)) return false;
6212 1 0 maps.resize(data.next_1B());
1 0 maps.resize(data.next_1B());
6213 26 1 for (auto&& map : maps)
6214 26 0 map.load(data);
0 0 map.load(data);
6252 426 0 if (value < 0x80) *where++ = value;
6253 0 0 else if (value < 0x4000) *where++ = (value >> 7) | 0x80u, *where++ = value & 0x7Fu;
6254 0 0 else if (value < 0x200000) *where++ = (value >> 14) | 0x80u, *where++ = ((value >> 7) & 0x7Fu) | 0x80u, *where++ = value & 0x7Fu;
6255 0 0 else if (value < 0x10000000) *where++ = (value >> 21) | 0x80u, *where++ = ((value >> 14) & 0x7Fu) | 0x80u, *where++ = ((value >> 7) & 0x7Fu) | 0x80u, *where++ = value & 0x7Fu;
6299 0 0 class feature_sequences {
1 0 class feature_sequences {
0 0 class feature_sequences {
6328 0 0 return it ? unaligned_load(it) : 0;
100 58 return it ? unaligned_load(it) : 0;
0 0 return it ? unaligned_load(it) : 0;
6337 0 0 if (!elementary.load(is)) return false;
1 0 if (!elementary.load(is)) return false;
0 0 if (!elementary.load(is)) return false;
6340 0 0 if (!compressor::load(is, data)) return false;
0 0 if (!compressor::load(is, data)) return false;
1 0 if (!compressor::load(is, data)) return false;
1 0 if (!compressor::load(is, data)) return false;
0 0 if (!compressor::load(is, data)) return false;
0 0 if (!compressor::load(is, data)) return false;
6343 0 0 sequences.resize(data.next_1B());
0 0 sequences.resize(data.next_1B());
1 0 sequences.resize(data.next_1B());
1 0 sequences.resize(data.next_1B());
0 0 sequences.resize(data.next_1B());
0 0 sequences.resize(data.next_1B());
6344 0 0 for (auto&& sequence : sequences) {
21 1 for (auto&& sequence : sequences) {
0 0 for (auto&& sequence : sequences) {
6345 0 0 sequence.dependant_range = data.next_4B();
21 0 sequence.dependant_range = data.next_4B();
0 0 sequence.dependant_range = data.next_4B();
6346 0 0 sequence.elements.resize(data.next_1B());
0 0 sequence.elements.resize(data.next_1B());
21 0 sequence.elements.resize(data.next_1B());
21 0 sequence.elements.resize(data.next_1B());
0 0 sequence.elements.resize(data.next_1B());
0 0 sequence.elements.resize(data.next_1B());
6347 0 0 for (auto&& element : sequence.elements) {
45 21 for (auto&& element : sequence.elements) {
0 0 for (auto&& element : sequence.elements) {
6348 0 0 element.type = elementary_feature_type(data.next_4B());
45 0 element.type = elementary_feature_type(data.next_4B());
0 0 element.type = elementary_feature_type(data.next_4B());
6349 0 0 element.elementary_index = data.next_4B();
45 0 element.elementary_index = data.next_4B();
0 0 element.elementary_index = data.next_4B();
6350 0 0 element.sequence_index = data.next_4B();
45 0 element.sequence_index = data.next_4B();
0 0 element.sequence_index = data.next_4B();
6354 0 0 scores.resize(data.next_1B());
0 0 scores.resize(data.next_1B());
1 0 scores.resize(data.next_1B());
1 0 scores.resize(data.next_1B());
0 0 scores.resize(data.next_1B());
0 0 scores.resize(data.next_1B());
6355 0 0 for (auto&& score : scores)
21 1 for (auto&& score : scores)
0 0 for (auto&& score : scores)
6356 0 0 score.load(data);
21 0 score.load(data);
0 0 score.load(data);
0 0 score.load(data);
0 0 score.load(data);
0 0 score.load(data);
6376 0 0 cache_element(int elements) : key(vli::max_length() * elements), key_size(0), score(0) {}
0 0 cache_element(int elements) : key(vli::max_length() * elements), key_size(0), score(0) {}
0 0 cache_element(int elements) : key(vli::max_length() * elements), key_size(0), score(0) {}
6384 0 0 caches.reserve(self.sequences.size());
1 0 caches.reserve(self.sequences.size());
0 0 caches.reserve(self.sequences.size());
6386 0 0 for (auto&& sequence : self.sequences) {
21 1 for (auto&& sequence : self.sequences) {
0 0 for (auto&& sequence : self.sequences) {
6387 0 0 caches.emplace_back(int(sequence.elements.size()));
21 0 caches.emplace_back(int(sequence.elements.size()));
0 0 caches.emplace_back(int(sequence.elements.size()));
6388 0 0 if (int(sequence.elements.size()) > max_sequence_elements) max_sequence_elements = sequence.elements.size();
1 20 if (int(sequence.elements.size()) > max_sequence_elements) max_sequence_elements = sequence.elements.size();
0 0 if (int(sequence.elements.size()) > max_sequence_elements) max_sequence_elements = sequence.elements.size();
6389 0 0 for (auto&& element : sequence.elements)
45 21 for (auto&& element : sequence.elements)
0 0 for (auto&& element : sequence.elements)
6390 0 0 if (element.type == PER_TAG && 1 - element.sequence_index > max_window_size)
0 0 if (element.type == PER_TAG && 1 - element.sequence_index > max_window_size)
26 19 if (element.type == PER_TAG && 1 - element.sequence_index > max_window_size)
2 24 if (element.type == PER_TAG && 1 - element.sequence_index > max_window_size)
0 0 if (element.type == PER_TAG && 1 - element.sequence_index > max_window_size)
0 0 if (element.type == PER_TAG && 1 - element.sequence_index > max_window_size)
6393 0 0 key.resize(max_sequence_elements * vli::max_length());
1 0 key.resize(max_sequence_elements * vli::max_length());
0 0 key.resize(max_sequence_elements * vli::max_length());
6394 0 0 window.resize(max_window_size);
1 0 window.resize(max_window_size);
0 0 window.resize(max_window_size);
6405 0 0 if (forms.size() > c.elementary_per_form.size()) c.elementary_per_form.resize(forms.size() * 2);
1 1 if (forms.size() > c.elementary_per_form.size()) c.elementary_per_form.resize(forms.size() * 2);
0 0 if (forms.size() > c.elementary_per_form.size()) c.elementary_per_form.resize(forms.size() * 2);
6406 0 0 if (forms.size() > c.elementary_per_tag.size()) c.elementary_per_tag.resize(forms.size() * 2);
1 1 if (forms.size() > c.elementary_per_tag.size()) c.elementary_per_tag.resize(forms.size() * 2);
0 0 if (forms.size() > c.elementary_per_tag.size()) c.elementary_per_tag.resize(forms.size() * 2);
6407 0 0 for (unsigned i = 0; i < forms.size(); i++)
6 2 for (unsigned i = 0; i < forms.size(); i++)
0 0 for (unsigned i = 0; i < forms.size(); i++)
6408 0 0 if (analyses[i].size() > c.elementary_per_tag[i].size())
4 2 if (analyses[i].size() > c.elementary_per_tag[i].size())
0 0 if (analyses[i].size() > c.elementary_per_tag[i].size())
6416 0 0 for (auto&& cache : c.caches)
42 2 for (auto&& cache : c.caches)
0 0 for (auto&& cache : c.caches)
6422 0 0 elementary.compute_dynamic_features((*c.analyses)[form_index][tag_index], c.elementary_per_form[form_index], c.elementary_per_tag[form_index][tag_index], form_index > 0 ? prev_dynamic : nullptr, dynamic);
24 5 elementary.compute_dynamic_features((*c.analyses)[form_index][tag_index], c.elementary_per_form[form_index], c.elementary_per_tag[form_index][tag_index], form_index > 0 ? prev_dynamic : nullptr, dynamic);
0 0 elementary.compute_dynamic_features((*c.analyses)[form_index][tag_index], c.elementary_per_form[form_index], c.elementary_per_tag[form_index][tag_index], form_index > 0 ? prev_dynamic : nullptr, dynamic);
6428 0 0 for (int i = 0; i < int(c.window.size()) && form_index - i >= 0; i++)
0 0 for (int i = 0; i < int(c.window.size()) && form_index - i >= 0; i++)
0 0 for (int i = 0; i < int(c.window.size()) && form_index - i >= 0; i++)
80 12 for (int i = 0; i < int(c.window.size()) && form_index - i >= 0; i++)
16 64 for (int i = 0; i < int(c.window.size()) && form_index - i >= 0; i++)
64 28 for (int i = 0; i < int(c.window.size()) && form_index - i >= 0; i++)
0 0 for (int i = 0; i < int(c.window.size()) && form_index - i >= 0; i++)
0 0 for (int i = 0; i < int(c.window.size()) && form_index - i >= 0; i++)
0 0 for (int i = 0; i < int(c.window.size()) && form_index - i >= 0; i++)
6433 0 0 for (unsigned i = 0; i < sequences.size(); i++) {
314 12 for (unsigned i = 0; i < sequences.size(); i++) {
0 0 for (unsigned i = 0; i < sequences.size(); i++) {
6434 0 0 if (tags_unchanged >= sequences[i].dependant_range)
298 16 if (tags_unchanged >= sequences[i].dependant_range)
0 0 if (tags_unchanged >= sequences[i].dependant_range)
6438 0 0 for (unsigned j = 0; j < sequences[i].elements.size(); j++) {
550 174 for (unsigned j = 0; j < sequences[i].elements.size(); j++) {
0 0 for (unsigned j = 0; j < sequences[i].elements.size(); j++) {
6444 0 0 value = form_index + element.sequence_index < 0 || unsigned(form_index + element.sequence_index) >= c.forms->size() ? elementary_feature_empty : c.elementary_per_form[form_index + element.sequence_index].values[element.elementary_index];
0 0 value = form_index + element.sequence_index < 0 || unsigned(form_index + element.sequence_index) >= c.forms->size() ? elementary_feature_empty : c.elementary_per_form[form_index + element.sequence_index].values[element.elementary_index];
136 16 value = form_index + element.sequence_index < 0 || unsigned(form_index + element.sequence_index) >= c.forms->size() ? elementary_feature_empty : c.elementary_per_form[form_index + element.sequence_index].values[element.elementary_index];
126 10 value = form_index + element.sequence_index < 0 || unsigned(form_index + element.sequence_index) >= c.forms->size() ? elementary_feature_empty : c.elementary_per_form[form_index + element.sequence_index].values[element.elementary_index];
0 0 value = form_index + element.sequence_index < 0 || unsigned(form_index + element.sequence_index) >= c.forms->size() ? elementary_feature_empty : c.elementary_per_form[form_index + element.sequence_index].values[element.elementary_index];
0 0 value = form_index + element.sequence_index < 0 || unsigned(form_index + element.sequence_index) >= c.forms->size() ? elementary_feature_empty : c.elementary_per_form[form_index + element.sequence_index].values[element.elementary_index];
6447 0 0 value = form_index + element.sequence_index < 0 ? elementary_feature_empty : c.window[-element.sequence_index]->values[element.elementary_index];
380 18 value = form_index + element.sequence_index < 0 ? elementary_feature_empty : c.window[-element.sequence_index]->values[element.elementary_index];
0 0 value = form_index + element.sequence_index < 0 ? elementary_feature_empty : c.window[-element.sequence_index]->values[element.elementary_index];
6454 0 0 if (value == elementary_feature_unknown) {
124 426 if (value == elementary_feature_unknown) {
0 0 if (value == elementary_feature_unknown) {
6463 0 0 if (!key_size) {
124 174 if (!key_size) {
0 0 if (!key_size) {
6466 0 0 } else if (key_size != c.caches[i].key_size || !small_memeq(c.key.data(), c.caches[i].key.data(), key_size)) {
0 0 } else if (key_size != c.caches[i].key_size || !small_memeq(c.key.data(), c.caches[i].key.data(), key_size)) {
0 0 } else if (key_size != c.caches[i].key_size || !small_memeq(c.key.data(), c.caches[i].key.data(), key_size)) {
96 78 } else if (key_size != c.caches[i].key_size || !small_memeq(c.key.data(), c.caches[i].key.data(), key_size)) {
80 16 } else if (key_size != c.caches[i].key_size || !small_memeq(c.key.data(), c.caches[i].key.data(), key_size)) {
158 16 } else if (key_size != c.caches[i].key_size || !small_memeq(c.key.data(), c.caches[i].key.data(), key_size)) {
0 0 } else if (key_size != c.caches[i].key_size || !small_memeq(c.key.data(), c.caches[i].key.data(), key_size)) {
0 0 } else if (key_size != c.caches[i].key_size || !small_memeq(c.key.data(), c.caches[i].key.data(), key_size)) {
0 0 } else if (key_size != c.caches[i].key_size || !small_memeq(c.key.data(), c.caches[i].key.data(), key_size)) {
6523 0 0 cache(const viterbi& self) : features_cache(self.features) {}
1 0 cache(const viterbi& self) : features_cache(self.features) {}
0 0 cache(const viterbi& self) : features_cache(self.features) {}
6536 0 0 if (!forms.size()) return;
2 0 if (!forms.size()) return;
0 0 if (!forms.size()) return;
6540 0 0 for (unsigned i = 0, states = 1; i < forms.size(); i++) {
6 2 for (unsigned i = 0, states = 1; i < forms.size(); i++) {
0 0 for (unsigned i = 0, states = 1; i < forms.size(); i++) {
6541 0 0 if (analyses[i].empty()) return;
6 0 if (analyses[i].empty()) return;
0 0 if (analyses[i].empty()) return;
6542 0 0 states = (i+1 >= unsigned(decoding_order) ? states / analyses[i-decoding_order+1].size() : states) * analyses[i].size();
2 4 states = (i+1 >= unsigned(decoding_order) ? states / analyses[i-decoding_order+1].size() : states) * analyses[i].size();
0 0 states = (i+1 >= unsigned(decoding_order) ? states / analyses[i-decoding_order+1].size() : states) * analyses[i].size();
6545 0 0 if (nodes > c.nodes.size()) c.nodes.resize(nodes);
2 0 if (nodes > c.nodes.size()) c.nodes.resize(nodes);
0 0 if (nodes > c.nodes.size()) c.nodes.resize(nodes);
6551 0 0 int* window = window_size <= 16 ? window_stack : (window_heap.resize(window_size), window_heap.data());
0 0 int* window = window_size <= 16 ? window_stack : (window_heap.resize(window_size), window_heap.data());
0 2 int* window = window_size <= 16 ? window_stack : (window_heap.resize(window_size), window_heap.data());
0 0 int* window = window_size <= 16 ? window_stack : (window_heap.resize(window_size), window_heap.data());
0 0 int* window = window_size <= 16 ? window_stack : (window_heap.resize(window_size), window_heap.data());
0 0 int* window = window_size <= 16 ? window_stack : (window_heap.resize(window_size), window_heap.data());
6557 0 0 for (unsigned i = 0; i < forms.size(); i++) {
6 2 for (unsigned i = 0; i < forms.size(); i++) {
0 0 for (unsigned i = 0; i < forms.size(); i++) {
6560 0 0 for (int j = 0; j < window_size; j++) window[j] = -1;
6 18 for (int j = 0; j < window_size; j++) window[j] = -1;
0 0 for (int j = 0; j < window_size; j++) window[j] = -1;
6561 0 0 for (int tag = 0; tag < int(analyses[i].size()); tag++)
13 6 for (int tag = 0; tag < int(analyses[i].size()); tag++)
0 0 for (int tag = 0; tag < int(analyses[i].size()); tag++)
6562 0 0 for (int prev = nodes_prev; prev < nodes_now; prev++) {
29 13 for (int prev = nodes_prev; prev < nodes_now; prev++) {
0 0 for (int prev = nodes_prev; prev < nodes_now; prev++) {
6566 0 0 for (int p = prev, n = 1; p >= 0 && n < window_size; p = c.nodes[p].prev, n++) {
0 0 for (int p = prev, n = 1; p >= 0 && n < window_size; p = c.nodes[p].prev, n++) {
36 29 for (int p = prev, n = 1; p >= 0 && n < window_size; p = c.nodes[p].prev, n++) {
36 0 for (int p = prev, n = 1; p >= 0 && n < window_size; p = c.nodes[p].prev, n++) {
0 0 for (int p = prev, n = 1; p >= 0 && n < window_size; p = c.nodes[p].prev, n++) {
0 0 for (int p = prev, n = 1; p >= 0 && n < window_size; p = c.nodes[p].prev, n++) {
6567 0 0 same_tags += same_tags == n && window[n] == c.nodes[p].tag;
0 0 same_tags += same_tags == n && window[n] == c.nodes[p].tag;
22 14 same_tags += same_tags == n && window[n] == c.nodes[p].tag;
16 6 same_tags += same_tags == n && window[n] == c.nodes[p].tag;
0 0 same_tags += same_tags == n && window[n] == c.nodes[p].tag;
0 0 same_tags += same_tags == n && window[n] == c.nodes[p].tag;
6572 0 0 features.compute_dynamic_features(i, tag, prev >= 0 ? &c.nodes[prev].dynamic : nullptr, dynamic, c.features_cache);
24 5 features.compute_dynamic_features(i, tag, prev >= 0 ? &c.nodes[prev].dynamic : nullptr, dynamic, c.features_cache);
0 0 features.compute_dynamic_features(i, tag, prev >= 0 ? &c.nodes[prev].dynamic : nullptr, dynamic, c.features_cache);
6573 0 0 score = (nodes_prev + 1 == nodes_now && analyses[i].size() == 1 ? 0 : features.score(i, window, same_tags, dynamic, c.features_cache)) +
0 0 score = (nodes_prev + 1 == nodes_now && analyses[i].size() == 1 ? 0 : features.score(i, window, same_tags, dynamic, c.features_cache)) +
0 0 score = (nodes_prev + 1 == nodes_now && analyses[i].size() == 1 ? 0 : features.score(i, window, same_tags, dynamic, c.features_cache)) +
9 20 score = (nodes_prev + 1 == nodes_now && analyses[i].size() == 1 ? 0 : features.score(i, window, same_tags, dynamic, c.features_cache)) +
8 1 score = (nodes_prev + 1 == nodes_now && analyses[i].size() == 1 ? 0 : features.score(i, window, same_tags, dynamic, c.features_cache)) +
24 5 score = (nodes_prev + 1 == nodes_now && analyses[i].size() == 1 ? 0 : features.score(i, window, same_tags, dynamic, c.features_cache)) +
0 0 score = (nodes_prev + 1 == nodes_now && analyses[i].size() == 1 ? 0 : features.score(i, window, same_tags, dynamic, c.features_cache)) +
0 0 score = (nodes_prev + 1 == nodes_now && analyses[i].size() == 1 ? 0 : features.score(i, window, same_tags, dynamic, c.features_cache)) +
0 0 score = (nodes_prev + 1 == nodes_now && analyses[i].size() == 1 ? 0 : features.score(i, window, same_tags, dynamic, c.features_cache)) +
6577 0 0 if (same_tags >= decoding_order-1) {
6 23 if (same_tags >= decoding_order-1) {
0 0 if (same_tags >= decoding_order-1) {
6578 0 0 if (score <= c.nodes[nodes_next-1].score) continue;
0 6 if (score <= c.nodes[nodes_next-1].score) continue;
0 0 if (score <= c.nodes[nodes_next-1].score) continue;
6593 0 0 for (int node = nodes_prev + 1; node < nodes_now; node++)
4 2 for (int node = nodes_prev + 1; node < nodes_now; node++)
0 0 for (int node = nodes_prev + 1; node < nodes_now; node++)
6594 0 0 if (c.nodes[node].score > c.nodes[best].score)
1 3 if (c.nodes[node].score > c.nodes[best].score)
0 0 if (c.nodes[node].score > c.nodes[best].score)
6597 0 0 for (int i = forms.size() - 1; i >= 0; i--, best = c.nodes[best].prev)
6 2 for (int i = forms.size() - 1; i >= 0; i--, best = c.nodes[best].prev)
0 0 for (int i = forms.size() - 1; i >= 0; i--, best = c.nodes[best].prev)
6642 0 0 maps.resize(MAP_TOTAL);
6691 0 0 for (unsigned i = forms.size(); i--;) {
6695 0 0 for (unsigned j = 0; j < analyses[i].size(); j++) {
6706 0 0 per_tag[i][j].values[LEMMA] = j && analyses[i][j-1].lemma == lemma ? per_tag[i][j-1].values[LEMMA] :
0 0 per_tag[i][j].values[LEMMA] = j && analyses[i][j-1].lemma == lemma ? per_tag[i][j-1].values[LEMMA] :
6711 0 0 if (index == string::npos) index = tag.size();
6712 0 0 per_tag[i][j].values[TAG_UPOS] = maps[MAP_TAG_UPOS].value(tag.c_str() + (index ? 1 : 0), index - (index ? 1 : 0));
0 0 per_tag[i][j].values[TAG_UPOS] = maps[MAP_TAG_UPOS].value(tag.c_str() + (index ? 1 : 0), index - (index ? 1 : 0));
6714 0 0 if (index < tag.size()) index++;
6715 0 0 if (index < tag.size()) index = tag.find(separator, index);
6716 0 0 if (index < tag.size()) index++;
6717 0 0 for (size_t length; index < tag.size(); index += length + 1) {
6719 0 0 length = (length == string::npos ? tag.size() : length) - index;
6721 0 0 for (size_t equal_sign = 0; equal_sign + 1 < length; equal_sign++)
6722 0 0 if (tag[index + equal_sign] == '=') {
6726 0 0 if (tag.compare(index, equal_sign, "Case") == 0) value = TAG_CASE, map = MAP_TAG_CASE;
6729 0 0 if (tag.compare(index, equal_sign, "Gender") == 0) value = TAG_GENDER, map = MAP_TAG_GENDER;
6730 0 0 if (tag.compare(index, equal_sign, "Number") == 0) value = TAG_NUMBER, map = MAP_TAG_NUMBER;
6731 0 0 if (tag.compare(index, equal_sign, "Person") == 0) value = TAG_PERSON, map = MAP_TAG_PERSON;
6734 0 0 if (tag.compare(index, equal_sign, "Negative") == 0) value = TAG_NEGATIVE, map = MAP_TAG_NEGATIVE;
6738 0 0 if (value >= 0)
6744 0 0 if (tag.size() >= 2 && tag[1] == 'V') {
0 0 if (tag.size() >= 2 && tag[1] == 'V') {
0 0 if (tag.size() >= 2 && tag[1] == 'V') {
6746 0 0 verb_candidate = verb_candidate < 0 || (tag_compare = tag.compare(analyses[i][verb_candidate].tag), tag_compare < 0) || (tag_compare == 0 && lemma < analyses[i][verb_candidate].lemma) ? j : verb_candidate;
0 0 verb_candidate = verb_candidate < 0 || (tag_compare = tag.compare(analyses[i][verb_candidate].tag), tag_compare < 0) || (tag_compare == 0 && lemma < analyses[i][verb_candidate].lemma) ? j : verb_candidate;
6756 0 0 if (verb_candidate >= 0) {
6762 0 0 if (analyses[i].size() == 1) {
6770 0 0 } else if (forms[i].len <= 0) {
6785 0 0 while (form.len) {
6789 0 0 num = num || cat & unicode::N;
0 0 num = num || cat & unicode::N;
6790 0 0 cap = cap || cat & unicode::Lut;
0 0 cap = cap || cat & unicode::Lut;
6791 0 0 dash = dash || cat & unicode::Pd;
0 0 dash = dash || cat & unicode::Pd;
6793 0 0 if (index == 10 || (!form.len && index < 10)) {
0 0 if (index == 10 || (!form.len && index < 10)) {
0 0 if (index == 10 || (!form.len && index < 10)) {
6823 0 0 if (prev_dynamic) {
6831 0 0 if (tag.tag.size() >= 2 && tag.tag[1] == 'V') {
0 0 if (tag.tag.size() >= 2 && tag.tag[1] == 'V') {
0 0 if (tag.tag.size() >= 2 && tag.tag[1] == 'V') {
6881 0 0 maps.resize(MAP_TOTAL);
6917 0 0 for (unsigned i = forms.size(); i--;) {
6921 0 0 for (unsigned j = 0; j < analyses[i].size(); j++) {
6924 0 0 per_tag[i][j].values[TAG3] = analyses[i][j].tag.size() >= 3 ? maps[MAP_TAG3].value(analyses[i][j].tag.c_str() + 2, 1) : elementary_feature_empty;
6925 0 0 per_tag[i][j].values[TAG5] = analyses[i][j].tag.size() >= 5 ? maps[MAP_TAG5].value(analyses[i][j].tag.c_str() + 4, 1) : elementary_feature_empty;
6926 0 0 per_tag[i][j].values[TAG25] = analyses[i][j].tag.size() >= 5 ? maps[MAP_TAG25].value((tag25[0] = analyses[i][j].tag[1], tag25[1] = analyses[i][j].tag[4], tag25), 2) : elementary_feature_empty;
6927 0 0 per_tag[i][j].values[LEMMA] = j && analyses[i][j-1].lemma == analyses[i][j].lemma ? per_tag[i][j-1].values[LEMMA] :
0 0 per_tag[i][j].values[LEMMA] = j && analyses[i][j-1].lemma == analyses[i][j].lemma ? per_tag[i][j-1].values[LEMMA] :
6930 0 0 if (analyses[i][j].tag[0] == 'V') {
6932 0 0 verb_candidate = verb_candidate < 0 || (tag_compare = analyses[i][j].tag.compare(analyses[i][verb_candidate].tag), tag_compare < 0) || (tag_compare == 0 && analyses[i][j].lemma < analyses[i][verb_candidate].lemma) ? j : verb_candidate;
0 0 verb_candidate = verb_candidate < 0 || (tag_compare = analyses[i][j].tag.compare(analyses[i][verb_candidate].tag), tag_compare < 0) || (tag_compare == 0 && analyses[i][j].lemma < analyses[i][verb_candidate].lemma) ? j : verb_candidate;
6942 0 0 if (verb_candidate >= 0) {
6948 0 0 if (analyses[i].size() == 1) {
6952 0 0 } else if (forms[i].len <= 0) {
6963 0 0 while (form.len) {
6967 0 0 num = num || cat & unicode::N;
0 0 num = num || cat & unicode::N;
6968 0 0 cap = cap || cat & unicode::Lut;
0 0 cap = cap || cat & unicode::Lut;
6969 0 0 dash = dash || cat & unicode::Pd;
0 0 dash = dash || cat & unicode::Pd;
6971 0 0 if (index == 5 || (!form.len && index < 5)) {
0 0 if (index == 5 || (!form.len && index < 5)) {
0 0 if (index == 5 || (!form.len && index < 5)) {
6991 0 0 if (prev_dynamic) {
6999 0 0 if (tag.tag[0] == 'V') {
7049 1 0 maps.resize(MAP_TOTAL);
7097 6 2 for (unsigned i = forms.size(); i--;) {
7101 13 6 for (unsigned j = 0; j < analyses[i].size(); j++) {
7103 13 0 per_tag[i][j].values[TAG1] = analyses[i][j].tag.size() >= 1 ? maps[MAP_TAG1].value(analyses[i][j].tag.c_str() + 0, 1) : elementary_feature_empty;
7104 13 0 per_tag[i][j].values[TAG2] = analyses[i][j].tag.size() >= 2 ? maps[MAP_TAG2].value(analyses[i][j].tag.c_str() + 1, 1) : elementary_feature_empty;
7105 13 0 per_tag[i][j].values[TAG3] = analyses[i][j].tag.size() >= 3 ? maps[MAP_TAG3].value(analyses[i][j].tag.c_str() + 2, 1) : elementary_feature_empty;
7106 2 11 per_tag[i][j].values[TAG4] = analyses[i][j].tag.size() >= 4 ? maps[MAP_TAG4].value(analyses[i][j].tag.c_str() + 3, 1) : elementary_feature_empty;
7107 0 13 per_tag[i][j].values[TAG5] = analyses[i][j].tag.size() >= 5 ? maps[MAP_TAG5].value(analyses[i][j].tag.c_str() + 4, 1) : elementary_feature_empty;
7108 7 6 per_tag[i][j].values[LEMMA] = j && analyses[i][j-1].lemma == analyses[i][j].lemma ? per_tag[i][j-1].values[LEMMA] :
7 0 per_tag[i][j].values[LEMMA] = j && analyses[i][j-1].lemma == analyses[i][j].lemma ? per_tag[i][j-1].values[LEMMA] :
7111 3 10 if (analyses[i][j].tag[0] == 'V') {
7113 1 2 verb_candidate = verb_candidate < 0 || (tag_compare = analyses[i][j].tag.compare(analyses[i][verb_candidate].tag), tag_compare < 0) || (tag_compare == 0 && analyses[i][j].lemma < analyses[i][verb_candidate].lemma) ? j : verb_candidate;
1 0 verb_candidate = verb_candidate < 0 || (tag_compare = analyses[i][j].tag.compare(analyses[i][verb_candidate].tag), tag_compare < 0) || (tag_compare == 0 && analyses[i][j].lemma < analyses[i][verb_candidate].lemma) ? j : verb_candidate;
7123 2 4 if (verb_candidate >= 0) {
7129 3 3 if (analyses[i].size() == 1) {
7137 0 3 } else if (forms[i].len <= 0) {
7152 14 3 while (form.len) {
7156 14 0 num = num || cat & unicode::N;
14 0 num = num || cat & unicode::N;
7157 10 4 cap = cap || cat & unicode::Lut;
9 1 cap = cap || cat & unicode::Lut;
7158 14 0 dash = dash || cat & unicode::Pd;
14 0 dash = dash || cat & unicode::Pd;
7160 14 0 if (index == 10 || (!form.len && index < 10)) {
11 3 if (index == 10 || (!form.len && index < 10)) {
0 3 if (index == 10 || (!form.len && index < 10)) {
7190 24 5 if (prev_dynamic) {
7198 9 20 if (tag.tag[0] == 'V') {
7243 0 0 while (lock.test_and_set(memory_order_acquire)) {}
0 2 while (lock.test_and_set(memory_order_acquire)) {}
0 0 while (lock.test_and_set(memory_order_acquire)) {}
7252 0 0 while (lock.test_and_set(memory_order_acquire)) {}
0 2 while (lock.test_and_set(memory_order_acquire)) {}
0 0 while (lock.test_and_set(memory_order_acquire)) {}
7253 0 0 if (!stack.empty()) {
1 1 if (!stack.empty()) {
0 0 if (!stack.empty()) {
7302 0 0 cache(const perceptron_tagger& self) : decoder_cache(self.decoder) {}
1 0 cache(const perceptron_tagger& self) : decoder_cache(self.decoder) {}
0 0 cache(const perceptron_tagger& self) : decoder_cache(self.decoder) {}
7316 0 0 if (dict.reset(morpho::load(is)), !dict) return false;
1 0 if (dict.reset(morpho::load(is)), !dict) return false;
0 0 if (dict.reset(morpho::load(is)), !dict) return false;
7318 0 0 if (!features.load(is)) return false;
1 0 if (!features.load(is)) return false;
0 0 if (!features.load(is)) return false;
7330 0 0 if (!dict) return;
2 0 if (!dict) return;
0 0 if (!dict) return;
7333 0 0 if (!c) c = new cache(*this);
0 0 if (!c) c = new cache(*this);
1 1 if (!c) c = new cache(*this);
1 0 if (!c) c = new cache(*this);
0 0 if (!c) c = new cache(*this);
0 0 if (!c) c = new cache(*this);
7336 0 0 if (c->analyses.size() < forms.size()) c->analyses.resize(forms.size());
1 1 if (c->analyses.size() < forms.size()) c->analyses.resize(forms.size());
0 0 if (c->analyses.size() < forms.size()) c->analyses.resize(forms.size());
7337 0 0 for (unsigned i = 0; i < forms.size(); i++) {
6 2 for (unsigned i = 0; i < forms.size(); i++) {
0 0 for (unsigned i = 0; i < forms.size(); i++) {
7340 0 0 dict->analyze(forms[i], guesser >= 0 ? guesser : use_guesser ? morpho::GUESSER : morpho::NO_GUESSER, c->analyses[i]);
0 0 dict->analyze(forms[i], guesser >= 0 ? guesser : use_guesser ? morpho::GUESSER : morpho::NO_GUESSER, c->analyses[i]);
6 0 dict->analyze(forms[i], guesser >= 0 ? guesser : use_guesser ? morpho::GUESSER : morpho::NO_GUESSER, c->analyses[i]);
6 0 dict->analyze(forms[i], guesser >= 0 ? guesser : use_guesser ? morpho::GUESSER : morpho::NO_GUESSER, c->analyses[i]);
0 0 dict->analyze(forms[i], guesser >= 0 ? guesser : use_guesser ? morpho::GUESSER : morpho::NO_GUESSER, c->analyses[i]);
0 0 dict->analyze(forms[i], guesser >= 0 ? guesser : use_guesser ? morpho::GUESSER : morpho::NO_GUESSER, c->analyses[i]);
7343 0 0 if (c->tags.size() < forms.size()) c->tags.resize(forms.size() * 2);
1 1 if (c->tags.size() < forms.size()) c->tags.resize(forms.size() * 2);
0 0 if (c->tags.size() < forms.size()) c->tags.resize(forms.size() * 2);
7346 0 0 for (unsigned i = 0; i < forms.size(); i++)
6 2 for (unsigned i = 0; i < forms.size(); i++)
0 0 for (unsigned i = 0; i < forms.size(); i++)
7357 0 0 if (!c) c = new cache(*this);
0 0 if (!c) c = new cache(*this);
0 0 if (!c) c = new cache(*this);
0 0 if (!c) c = new cache(*this);
0 0 if (!c) c = new cache(*this);
0 0 if (!c) c = new cache(*this);
7444 1 0 tagger_id id = tagger_id(is.get());
0 1 tagger_id id = tagger_id(is.get());
0 0 tagger_id id = tagger_id(is.get());
0 0 tagger_id id = tagger_id(is.get());
0 0 tagger_id id = tagger_id(is.get());
0 0 tagger_id id = tagger_id(is.get());
0 0 tagger_id id = tagger_id(is.get());
7450 0 0 auto res = new_unique_ptr>>(tagger_ids::decoding_order(id), tagger_ids::window_size(id));
7451 0 0 if (res->load(is)) return res.release();
0 0 if (res->load(is)) return res.release();
7459 1 0 auto res = new_unique_ptr>>(tagger_ids::decoding_order(id), tagger_ids::window_size(id));
7460 1 0 if (res->load(is)) return res.release();
1 0 if (res->load(is)) return res.release();
7467 0 0 auto res = new_unique_ptr>>(tagger_ids::decoding_order(id), tagger_ids::window_size(id));
7468 0 0 if (res->load(is)) return res.release();
0 0 if (res->load(is)) return res.release();
7477 1 0 ifstream f(path_from_utf8(fname).c_str(), ifstream::binary);
7478 1 0 if (!f) return nullptr;
7480 1 0 return load(f);
7485 2 0 return morpho ? morpho->new_tokenizer() : nullptr;
7574 0 0 for (int i = 0; i < 15 && pdt_tag[i]; i++)
0 0 for (int i = 0; i < 15 && pdt_tag[i]; i++)
7575 0 0 if (pdt_tag[i] != '-') {
7576 0 0 if (!tag.empty()) tag.push_back('|');
7583 0 0 for (unsigned i = 0; i + 2 < lemma.size(); i++)
7584 0 0 if (lemma[i] == '_' && lemma[i + 1] == ';') {
0 0 if (lemma[i] == '_' && lemma[i + 1] == ';') {
0 0 if (lemma[i] == '_' && lemma[i + 1] == ';') {
7585 0 0 if (!tag.empty()) tag.push_back('|');
7594 0 0 return raw_lemma < lemma.size() ? (lemma.resize(raw_lemma), true) : false;
7605 0 0 for (auto&& tagged_lemma : tagged_lemmas) {
7611 0 0 if (!lemma_changed || tagged_lemmas.size() < 2) return;
0 0 if (!lemma_changed || tagged_lemmas.size() < 2) return;
0 0 if (!lemma_changed || tagged_lemmas.size() < 2) return;
7619 0 0 for (auto&& tagged_lemma_forms : forms) {
7620 0 0 for (auto&& tagged_form : tagged_lemma_forms.forms)
7626 0 0 if (!lemma_changed || forms.size() < 2) return;
0 0 if (!lemma_changed || forms.size() < 2) return;
0 0 if (!lemma_changed || forms.size() < 2) return;
7672 0 0 return lemma_id_len < lemma.size() ? (lemma.resize(lemma_id_len), true) : false;
7682 0 0 for (auto&& tagged_lemma : tagged_lemmas)
7686 0 0 if (!lemma_changed || tagged_lemmas.size() < 2) return;
0 0 if (!lemma_changed || tagged_lemmas.size() < 2) return;
0 0 if (!lemma_changed || tagged_lemmas.size() < 2) return;
7694 0 0 for (auto&& tagged_lemma_forms : forms)
7698 0 0 if (!lemma_changed || forms.size() < 2) return;
0 0 if (!lemma_changed || forms.size() < 2) return;
0 0 if (!lemma_changed || forms.size() < 2) return;
7744 0 0 return raw_lemma_len < lemma.size() ? (lemma.resize(raw_lemma_len), true) : false;
7754 0 0 for (auto&& tagged_lemma : tagged_lemmas)
7758 0 0 if (!lemma_changed || tagged_lemmas.size() < 2) return;
0 0 if (!lemma_changed || tagged_lemmas.size() < 2) return;
0 0 if (!lemma_changed || tagged_lemmas.size() < 2) return;
7766 0 0 for (auto&& tagged_lemma_forms : forms)
7770 0 0 if (!lemma_changed || forms.size() < 2) return;
0 0 if (!lemma_changed || forms.size() < 2) return;
0 0 if (!lemma_changed || forms.size() < 2) return;
7805 0 0 if (name == "pdt_to_conll2009") return tagset_converter::new_pdt_to_conll2009_converter();
7806 0 0 if (name == "strip_lemma_comment") return tagset_converter::new_strip_lemma_comment_converter(dictionary);
7807 0 0 if (name == "strip_lemma_id") return tagset_converter::new_strip_lemma_id_converter(dictionary);
7814 0 0 inline static bool eq(const tagged_lemma& a, const tagged_lemma& b) { return a.lemma == b.lemma && a.tag == b.tag; }
0 0 inline static bool eq(const tagged_lemma& a, const tagged_lemma& b) { return a.lemma == b.lemma && a.tag == b.tag; }
7815 0 0 inline static bool lt(const tagged_lemma& a, const tagged_lemma& b) { int lemma_compare = a.lemma.compare(b.lemma); return lemma_compare < 0 || (lemma_compare == 0 && a.tag < b.tag); }
7824 0 0 for (unsigned i = 0; i < forms.size(); i++) {
7826 0 0 for (unsigned j = forms.size() - 1; j > i; j--)
7827 0 0 if (forms[j].lemma == forms[i].lemma) {
7829 0 0 for (auto&& tagged_form : forms[j].forms)
7833 0 0 if (j < forms.size() - 1) {
7841 0 0 if (any_merged && forms[i].forms.size() > 1) {
0 0 if (any_merged && forms[i].forms.size() > 1) {
0 0 if (any_merged && forms[i].forms.size() > 1) {
7844 0 0 inline static bool eq(const tagged_form& a, const tagged_form& b) { return a.tag == b.tag && a.form == b.form; }
0 0 inline static bool eq(const tagged_form& a, const tagged_form& b) { return a.tag == b.tag && a.form == b.form; }
7845 0 0 inline static bool lt(const tagged_form& a, const tagged_form& b) { int tag_compare = a.tag.compare(b.tag); return tag_compare < 0 || (tag_compare == 0 && a.form < b.form); }
7999 428 4 const unordered_set czech_tokenizer::abbreviations_czech = {
0 0 const unordered_set czech_tokenizer::abbreviations_czech = {
8015 412 4 const unordered_set czech_tokenizer::abbreviations_slovak = {
0 0 const unordered_set czech_tokenizer::abbreviations_slovak = {
8032 1 0 : ragel_tokenizer(version <= 1 ? 1 : 2), m(m) {
0 0 : ragel_tokenizer(version <= 1 ? 1 : 2), m(m) {
8046 0 22 if (!m) return;
8047 0 0 if (tokens.empty() || chars[tokens.back().start].cat & ~unicode::L) return;
0 0 if (tokens.empty() || chars[tokens.back().start].cat & ~unicode::L) return;
0 0 if (tokens.empty() || chars[tokens.back().start].cat & ~unicode::L) return;
8050 0 0 for (unsigned hyphens = 1; hyphens <= 2; hyphens++) {
8052 0 0 if (tokens.size() < 2*hyphens + 1) break;
8054 0 0 if (tokens[first_hyphen].length != 1 || chars[tokens[first_hyphen].start].cat & ~unicode::P ||
0 0 if (tokens[first_hyphen].length != 1 || chars[tokens[first_hyphen].start].cat & ~unicode::P ||
0 0 if (tokens[first_hyphen].length != 1 || chars[tokens[first_hyphen].start].cat & ~unicode::P ||
8055 0 0 tokens[first_hyphen].start + tokens[first_hyphen].length != tokens[first_hyphen + 1].start ||
8056 0 0 tokens[first_hyphen-1].start + tokens[first_hyphen-1].length != tokens[first_hyphen].start ||
0 0 tokens[first_hyphen-1].start + tokens[first_hyphen-1].length != tokens[first_hyphen].start ||
8060 0 0 if (m->analyze(string_piece(chars[tokens[first_hyphen-1].start].str, chars[tokens.back().start + tokens.back().length].str - chars[tokens[first_hyphen-1].start].str), morpho::NO_GUESSER, lemmas) >= 0)
8064 0 0 if (matched_hyphens) {
8078 0 2 while (tokenize_url_email(tokens))
8079 0 0 if (emergency_sentence_split(tokens))
8095 2 0 if ( ( current) == ( (chars.size() - 1)) )
8100 43 111 switch ( _czech_tokenizer_from_state_actions[cs] ) {
8109 43 111 if ( _klen > 0 ) {
8114 69 42 if ( _upper < _lower )
8118 17 52 if ( _widec < _mid[0] )
8120 51 1 else if ( _widec > _mid[1] )
8126 0 1 if (
8127 1 0 !current || (chars[current-1].cat & ~(unicode::L | unicode::M | unicode::N | unicode::Pd)) ) _widec += 256;
0 1 !current || (chars[current-1].cat & ~(unicode::L | unicode::M | unicode::N | unicode::Pd)) ) _widec += 256;
8132 0 0 if (
8133 0 0 !current || ((chars[current-1].cat & ~(unicode::L | unicode::M | unicode::N)) && chars[current-1].chr != '+') ) _widec += 256;
0 0 !current || ((chars[current-1].cat & ~(unicode::L | unicode::M | unicode::N)) && chars[current-1].chr != '+') ) _widec += 256;
0 0 !current || ((chars[current-1].cat & ~(unicode::L | unicode::M | unicode::N)) && chars[current-1].chr != '+') ) _widec += 256;
8146 154 0 if ( _klen > 0 ) {
8151 486 118 if ( _upper < _lower )
8155 270 216 if ( _widec < *_mid )
8157 180 36 else if ( _widec > *_mid )
8169 101 17 if ( _klen > 0 ) {
8174 133 20 if ( _upper < _lower )
8178 38 95 if ( _widec < _mid[0] )
8180 14 81 else if ( _widec > _mid[1] )
8195 70 85 if ( _czech_tokenizer_trans_actions[_trans] == 0 )
8209 0 1 do
8210 0 1 if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } }
8218 4 4 for (current = ts; current < whitespace; current++)
8221 1 3 if (eos) {( current)++; goto _out; }
8226 0 0 if (!tokens.empty()) {( current)++; goto _out; }
8228 0 0 do
8229 0 0 if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } }
8238 0 19 do
8239 0 19 if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } }
8247 0 17 do
8248 0 17 if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } }
8255 0 0 if (!tokens.empty()) {( current)++; goto _out; }
8257 0 0 do
8258 0 0 if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } }
8267 0 2 do
8268 0 2 if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } }
8276 42 112 switch ( _czech_tokenizer_to_state_actions[cs] ) {
8282 154 0 if ( cs == 0 )
8284 152 2 if ( ++( current) != ( (chars.size() - 1)) )
8287 2 0 if ( ( current) == ( (chars.size() - 1)) )
8289 1 1 if ( _czech_tokenizer_eof_trans[cs] > 0 ) {
8317 456 4 const unordered_set english_tokenizer::abbreviations = {
0 0 const unordered_set english_tokenizer::abbreviations = {
8416 0 0 if (tokens.empty() || chars[tokens.back().start].cat & ~unilib::unicode::L) return;
0 0 if (tokens.empty() || chars[tokens.back().start].cat & ~unilib::unicode::L) return;
0 0 if (tokens.empty() || chars[tokens.back().start].cat & ~unilib::unicode::L) return;
8431 0 0 if ( ( index) == ( end) )
8440 0 0 if ( _klen > 0 ) {
8445 0 0 if ( _upper < _lower )
8449 0 0 if ( ( ragel_char(chars[tokens.back().start + end - index - 1])) < *_mid )
8451 0 0 else if ( ( ragel_char(chars[tokens.back().start + end - index - 1])) > *_mid )
8463 0 0 if ( _klen > 0 ) {
8468 0 0 if ( _upper < _lower )
8472 0 0 if ( ( ragel_char(chars[tokens.back().start + end - index - 1])) < _mid[0] )
8474 0 0 else if ( ( ragel_char(chars[tokens.back().start + end - index - 1])) > _mid[1] )
8488 0 0 if ( _english_tokenizer_split_token_trans_actions[_trans] == 0 )
8502 0 0 if ( cs == 0 )
8504 0 0 if ( ++( index) != ( end) )
8507 0 0 if ( ( index) == ( end) )
8509 0 0 switch ( _english_tokenizer_split_token_eof_actions[cs] ) {
8519 0 0 if (split_len && split_len < end) {
8673 0 0 english_tokenizer::english_tokenizer(unsigned version) : ragel_tokenizer(version <= 1 ? 1 : 2) {}
0 0 english_tokenizer::english_tokenizer(unsigned version) : ragel_tokenizer(version <= 1 ? 1 : 2) {}
0 0 english_tokenizer::english_tokenizer(unsigned version) : ragel_tokenizer(version <= 1 ? 1 : 2) {}
0 0 english_tokenizer::english_tokenizer(unsigned version) : ragel_tokenizer(version <= 1 ? 1 : 2) {}
8682 0 0 while (tokenize_url_email(tokens))
8683 0 0 if (emergency_sentence_split(tokens))
8699 0 0 if ( ( current) == ( (chars.size() - 1)) )
8704 0 0 switch ( _english_tokenizer_from_state_actions[cs] ) {
8713 0 0 if ( _klen > 0 ) {
8718 0 0 if ( _upper < _lower )
8722 0 0 if ( _widec < _mid[0] )
8724 0 0 else if ( _widec > _mid[1] )
8730 0 0 if (
8731 0 0 !current || (chars[current-1].cat & ~(unicode::L | unicode::M | unicode::N | unicode::Pd)) ) _widec += 256;
0 0 !current || (chars[current-1].cat & ~(unicode::L | unicode::M | unicode::N | unicode::Pd)) ) _widec += 256;
8736 0 0 if (
8737 0 0 !current || ((chars[current-1].cat & ~(unicode::L | unicode::M | unicode::N)) && chars[current-1].chr != '+') ) _widec += 256;
0 0 !current || ((chars[current-1].cat & ~(unicode::L | unicode::M | unicode::N)) && chars[current-1].chr != '+') ) _widec += 256;
0 0 !current || ((chars[current-1].cat & ~(unicode::L | unicode::M | unicode::N)) && chars[current-1].chr != '+') ) _widec += 256;
8750 0 0 if ( _klen > 0 ) {
8755 0 0 if ( _upper < _lower )
8759 0 0 if ( _widec < *_mid )
8761 0 0 else if ( _widec > *_mid )
8773 0 0 if ( _klen > 0 ) {
8778 0 0 if ( _upper < _lower )
8782 0 0 if ( _widec < _mid[0] )
8784 0 0 else if ( _widec > _mid[1] )
8799 0 0 if ( _english_tokenizer_trans_actions[_trans] == 0 )
8813 0 0 do
8814 0 0 if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } }
8822 0 0 for (current = ts; current < whitespace; current++)
8825 0 0 if (eos) {( current)++; goto _out; }
8830 0 0 if (!tokens.empty()) {( current)++; goto _out; }
8832 0 0 do
8833 0 0 if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } }
8842 0 0 do
8843 0 0 if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } }
8851 0 0 do
8852 0 0 if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } }
8859 0 0 if (!tokens.empty()) {( current)++; goto _out; }
8861 0 0 do
8862 0 0 if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } }
8871 0 0 do
8872 0 0 if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } }
8880 0 0 switch ( _english_tokenizer_to_state_actions[cs] ) {
8886 0 0 if ( cs == 0 )
8888 0 0 if ( ++( current) != ( (chars.size() - 1)) )
8891 0 0 if ( ( current) == ( (chars.size() - 1)) )
8893 0 0 if ( _english_tokenizer_eof_trans[cs] > 0 ) {
9046 0 0 generic_tokenizer::generic_tokenizer(unsigned version) : ragel_tokenizer(version <= 1 ? 1 : 2) {}
0 0 generic_tokenizer::generic_tokenizer(unsigned version) : ragel_tokenizer(version <= 1 ? 1 : 2) {}
0 2 generic_tokenizer::generic_tokenizer(unsigned version) : ragel_tokenizer(version <= 1 ? 1 : 2) {}
0 0 generic_tokenizer::generic_tokenizer(unsigned version) : ragel_tokenizer(version <= 1 ? 1 : 2) {}
9055 0 2 while (tokenize_url_email(tokens))
9056 0 0 if (emergency_sentence_split(tokens))
9072 2 0 if ( ( current) == ( (chars.size() - 1)) )
9077 8 21 switch ( _generic_tokenizer_from_state_actions[cs] ) {
9086 8 21 if ( _klen > 0 ) {
9091 14 8 if ( _upper < _lower )
9095 2 12 if ( _widec < _mid[0] )
9097 12 0 else if ( _widec > _mid[1] )
9103 0 0 if (
9104 0 0 !current || (chars[current-1].cat & ~(unicode::L | unicode::M | unicode::N | unicode::Pd)) ) _widec += 256;
0 0 !current || (chars[current-1].cat & ~(unicode::L | unicode::M | unicode::N | unicode::Pd)) ) _widec += 256;
9109 0 0 if (
9110 0 0 !current || ((chars[current-1].cat & ~(unicode::L | unicode::M | unicode::N)) && chars[current-1].chr != '+') ) _widec += 256;
0 0 !current || ((chars[current-1].cat & ~(unicode::L | unicode::M | unicode::N)) && chars[current-1].chr != '+') ) _widec += 256;
0 0 !current || ((chars[current-1].cat & ~(unicode::L | unicode::M | unicode::N)) && chars[current-1].chr != '+') ) _widec += 256;
9123 29 0 if ( _klen > 0 ) {
9128 88 21 if ( _upper < _lower )
9132 52 36 if ( _widec < *_mid )
9134 28 8 else if ( _widec > *_mid )
9146 19 2 if ( _klen > 0 ) {
9151 25 4 if ( _upper < _lower )
9155 8 17 if ( _widec < _mid[0] )
9157 2 15 else if ( _widec > _mid[1] )
9172 10 21 if ( _generic_tokenizer_trans_actions[_trans] == 0 )
9185 0 0 do
9186 0 0 if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } }
9194 0 0 for (current = ts; current < whitespace; current++)
9197 0 0 if (eos) {( current)++; goto _out; }
9202 0 0 if (!tokens.empty()) {( current)++; goto _out; }
9204 0 0 do
9205 0 0 if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } }
9213 0 6 do
9214 0 6 if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } }
9222 0 2 do
9223 0 2 if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } }
9230 0 0 if (!tokens.empty()) {( current)++; goto _out; }
9232 0 0 do
9233 0 0 if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } }
9241 0 0 do
9242 0 0 if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } }
9250 8 23 switch ( _generic_tokenizer_to_state_actions[cs] ) {
9256 31 0 if ( cs == 0 )
9258 27 4 if ( ++( current) != ( (chars.size() - 1)) )
9261 4 0 if ( ( current) == ( (chars.size() - 1)) )
9263 2 2 if ( _generic_tokenizer_eof_trans[cs] > 0 ) {
9641 3 0 initialize_ragel_map();
9645 0 6 while (ragel_map_flag.test_and_set()) {}
9646 2 4 if (ragel_map.empty()) {
9647 256 2 for (uint8_t ascii = 0; ascii < 128; ascii++)
9659 2 6 if (chr >= ragel_map.size())
9679 48 0 if ( ( current) == ( (chars.size() - 1)) )
9687 0 138 if ( _klen > 0 ) {
9692 0 0 if ( _upper < _lower )
9696 0 0 if ( _widec < _mid[0] )
9698 0 0 else if ( _widec > _mid[1] )
9704 0 0 if (
9710 0 0 if (
9724 138 0 if ( _klen > 0 ) {
9729 355 127 if ( _upper < _lower )
9733 133 222 if ( _widec < *_mid )
9735 211 11 else if ( _widec > *_mid )
9747 127 0 if ( _klen > 0 ) {
9752 298 44 if ( _upper < _lower )
9756 81 217 if ( _widec < _mid[0] )
9758 134 83 else if ( _widec > _mid[1] )
9772 0 138 if ( _ragel_url_email_trans_actions[_trans] == 0 )
9793 94 44 if ( cs == 0 )
9795 90 4 if ( ++( current) != ( (chars.size() - 1)) )
9801 0 48 if (end > start) {
9826 0 0 vertical_tokenizer() : unicode_tokenizer(0) {}
9874 3 0 ragel_tokenizer::initialize_ragel_map();
9876 3 0 set_text(string_piece(nullptr, 0));
9882 3 3 if (make_copy && text.str) {
3 0 if (make_copy && text.str) {
9889 130 6 for (const char* curr_str = text.str; text.len; curr_str = text.str)
9895 7 0 vector& tokens = tokens_ptr ? *tokens_ptr : tokens_buffer;
9897 7 0 if (forms) forms->clear();
9898 4 3 if (current >= chars.size() - 1) return false;
9901 4 0 if (forms)
9902 32 4 for (auto&& token : tokens)
9909 48 3 if (current >= chars.size() - 1) return false;
9911 48 0 return url_email_tokenizer ? ragel_tokenizer::ragel_url_email(url_email_tokenizer, chars, current, tokens) : false;
9918 0 47 return tokens.size() >= 500 ||
9919 47 0 (tokens.size() >= 450 && chars[tokens.back().start].cat & unicode::P) ||
0 0 (tokens.size() >= 450 && chars[tokens.back().start].cat & unicode::P) ||
0 47 (tokens.size() >= 450 && chars[tokens.back().start].cat & unicode::P) ||
9920 0 0 (tokens.size() >= 400 && chars[tokens.back().start].cat & unicode::Po);
9926 4 0 if (eos_chr == '.' && !tokens.empty()) {
0 4 if (eos_chr == '.' && !tokens.empty()) {
4 0 if (eos_chr == '.' && !tokens.empty()) {
9928 2 2 if (tokens.back().length == 1 && chars[tokens.back().start].cat & unicode::Lut)
0 2 if (tokens.back().length == 1 && chars[tokens.back().start].cat & unicode::Lut)
2 2 if (tokens.back().length == 1 && chars[tokens.back().start].cat & unicode::Lut)
9932 2 0 if (abbreviations) {
9934 11 2 for (size_t i = 0; i < tokens.back().length; i++)
9936 1 1 if (abbreviations->count(eos_buffer))
9957 0 0 if (current >= chars.size() - 1) return false;
9961 0 0 while (current < chars.size() - 1 && chars[current].chr != '\r' && chars[current].chr != '\n') current++;
0 0 while (current < chars.size() - 1 && chars[current].chr != '\r' && chars[current].chr != '\n') current++;
0 0 while (current < chars.size() - 1 && chars[current].chr != '\r' && chars[current].chr != '\n') current++;
0 0 while (current < chars.size() - 1 && chars[current].chr != '\r' && chars[current].chr != '\n') current++;
9964 0 0 if (current < chars.size() - 1) {
9966 0 0 if (current < chars.size() - 1 &&
0 0 if (current < chars.size() - 1 &&
0 0 if (current < chars.size() - 1 &&
9967 0 0 ((chars[current-1].chr == '\r' && chars[current].chr == '\n') ||
0 0 ((chars[current-1].chr == '\r' && chars[current].chr == '\n') ||
9968 0 0 (chars[current-1].chr == '\n' && chars[current].chr == '\r')))
9972 0 0 if (line_start < line_end)
10225 0 0 for (; *str; str++)
10226 0 0 if (((unsigned char)*str) >= 0x80) {
10227 0 0 if (((unsigned char)*str) < 0xC0) return false;
10228 0 0 else if (((unsigned char)*str) < 0xE0) {
10229 0 0 str++; if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
0 0 str++; if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
10230 0 0 } else if (((unsigned char)*str) < 0xF0) {
10231 0 0 str++; if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
0 0 str++; if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
10232 0 0 str++; if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
0 0 str++; if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
10233 0 0 } else if (((unsigned char)*str) < 0xF8) {
10234 0 0 str++; if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
0 0 str++; if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
10235 0 0 str++; if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
0 0 str++; if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
10236 0 0 str++; if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
0 0 str++; if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
10243 0 0 for (; len > 0; str++, len--)
10244 0 0 if (((unsigned char)*str) >= 0x80) {
10245 0 0 if (((unsigned char)*str) < 0xC0) return false;
10246 0 0 else if (((unsigned char)*str) < 0xE0) {
10247 0 0 str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
0 0 str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
0 0 str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
0 0 str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
10248 0 0 } else if (((unsigned char)*str) < 0xF0) {
10249 0 0 str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
0 0 str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
0 0 str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
0 0 str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
10250 0 0 str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
0 0 str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
0 0 str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
0 0 str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
10251 0 0 } else if (((unsigned char)*str) < 0xF8) {
10252 0 0 str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
0 0 str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
0 0 str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
0 0 str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
10253 0 0 str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
0 0 str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
0 0 str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
0 0 str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
10254 0 0 str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
0 0 str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
0 0 str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
0 0 str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
10263 0 0 for (char32_t chr; (chr = decode(str)); )
10270 0 0 while (len)
10277 0 0 for (auto&& chr : str)
10335 0 0 return {3, 2, 0, ""};
10865 214 32574 IF_BIT_0(prob)
563 32225 IF_BIT_0(prob)
10870 559 4 if (checkDicSize != 0 || processedPos != 0)
10872 0 559 (dic[(dicPos == 0 ? dicBufSize : dicPos) - 1] >> (8 - lc))));
10874 237 326 if (state < kNumLitStates)
10878 203 1693 do { GET_BIT(prob + symbol, symbol) } while (symbol < 0x100);
1297 599 do { GET_BIT(prob + symbol, symbol) } while (symbol < 0x100);
1659 237 do { GET_BIT(prob + symbol, symbol) } while (symbol < 0x100);
10882 0 326 unsigned matchByte = p->dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)];
10884 88 238 state -= (state < 10) ? 3 : 6;
10893 287 2321 GET_BIT2(probLit, symbol, offs &= ~bit, offs &= bit)
1780 828 GET_BIT2(probLit, symbol, offs &= ~bit, offs &= bit)
10895 2282 326 while (symbol < 0x100);
10905 210 32015 IF_BIT_0(prob)
206 32019 IF_BIT_0(prob)
10914 32019 0 if (checkDicSize == 0 && processedPos == 0)
10917 116 31903 IF_BIT_0(prob)
31929 90 IF_BIT_0(prob)
10921 132 31797 IF_BIT_0(prob)
78 31851 IF_BIT_0(prob)
10924 0 78 dic[dicPos] = dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)];
10927 3 75 state = state < kNumLitStates ? 9 : 11;
10937 21 69 IF_BIT_0(prob)
51 39 IF_BIT_0(prob)
10946 5 34 IF_BIT_0(prob)
25 14 IF_BIT_0(prob)
10962 31809 132 state = state < kNumLitStates ? 8 : 11;
10968 203 31944 IF_BIT_0(probLen)
211 31936 IF_BIT_0(probLen)
10979 110 31826 IF_BIT_0(probLen)
29 31907 IF_BIT_0(probLen)
10994 1131 254845 TREE_DECODE(probLen, limit, len);
972 255004 TREE_DECODE(probLen, limit, len);
223829 32147 TREE_DECODE(probLen, limit, len);
10998 206 31941 if (state >= kNumStates)
11003 18 188 TREE_6_DECODE(prob, distance);
132 74 TREE_6_DECODE(prob, distance);
39 167 TREE_6_DECODE(prob, distance);
189 17 TREE_6_DECODE(prob, distance);
22 184 TREE_6_DECODE(prob, distance);
152 54 TREE_6_DECODE(prob, distance);
20 186 TREE_6_DECODE(prob, distance);
83 123 TREE_6_DECODE(prob, distance);
12 194 TREE_6_DECODE(prob, distance);
115 91 TREE_6_DECODE(prob, distance);
29 177 TREE_6_DECODE(prob, distance);
133 73 TREE_6_DECODE(prob, distance);
11004 173 33 if (distance >= kStartPosModelIndex)
11009 80 93 if (posSlot < kEndPosModelIndex)
11016 112 80 do
11018 24 168 GET_BIT2(prob + i, i, ; , distance |= mask);
89 103 GET_BIT2(prob + i, i, ; , distance |= mask);
11027 1017 93 do
11029 134 976 NORMALIZE
11053 12 81 GET_BIT2(prob + i, i, ; , distance |= 1);
55 38 GET_BIT2(prob + i, i, ; , distance |= 1);
11054 16 77 GET_BIT2(prob + i, i, ; , distance |= 2);
52 41 GET_BIT2(prob + i, i, ; , distance |= 2);
11055 9 84 GET_BIT2(prob + i, i, ; , distance |= 4);
53 40 GET_BIT2(prob + i, i, ; , distance |= 4);
11056 13 80 GET_BIT2(prob + i, i, ; , distance |= 8);
58 35 GET_BIT2(prob + i, i, ; , distance |= 8);
11058 0 93 if (distance == (uint32_t)0xFFFFFFFF)
11070 206 0 if (checkDicSize == 0)
11072 206 0 if (distance >= processedPos)
11075 0 0 else if (distance >= checkDicSize)
11077 85 121 state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3;
11082 32147 0 if (limit == dicPos)
11086 0 32147 unsigned curLen = ((rem < len) ? (unsigned)rem : len);
11087 0 32147 size_t pos = (dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0);
11092 32147 0 if (pos + curLen <= dicBufSize)
11098 8657310 32147 do
11104 0 0 do
11107 0 0 if (++pos == dicBufSize)
11115 32712 76 while (dicPos < limit && buf < bufLimit);
11116 18 58 NORMALIZE;
11134 0 80 if (p->remainLen != 0 && p->remainLen < kMatchSpecLenStart)
11141 0 0 if (limit - dicPos < len)
11144 0 0 if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= len)
0 0 if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= len)
11149 0 0 while (len-- != 0)
11151 0 0 dic[dicPos] = dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)];
11163 76 0 if (p->checkDicSize == 0)
11166 0 76 if (limit - p->dicPos > rem)
11169 76 0 RINOK(LzmaDec_DecodeReal(p, limit2, bufLimit));
11170 0 76 if (p->processedPos >= p->prop.dicSize)
11174 72 4 while (p->dicPos < limit && p->buf < bufLimit && p->remainLen < kMatchSpecLenStart);
0 72 while (p->dicPos < limit && p->buf < bufLimit && p->remainLen < kMatchSpecLenStart);
0 0 while (p->dicPos < limit && p->buf < bufLimit && p->remainLen < kMatchSpecLenStart);
11176 0 76 if (p->remainLen > kMatchSpecLenStart)
11207 0 71 IF_BIT_0_CHECK(prob)
0 0 IF_BIT_0_CHECK(prob)
42 29 IF_BIT_0_CHECK(prob)
11214 42 0 if (p->checkDicSize != 0 || p->processedPos != 0)
11217 0 42 (p->dic[(p->dicPos == 0 ? p->dicBufSize : p->dicPos) - 1] >> (8 - p->prop.lc))));
11219 15 27 if (state < kNumLitStates)
11222 7 113 do { GET_BIT_CHECK(prob + symbol, symbol) } while (symbol < 0x100);
7 0 do { GET_BIT_CHECK(prob + symbol, symbol) } while (symbol < 0x100);
85 35 do { GET_BIT_CHECK(prob + symbol, symbol) } while (symbol < 0x100);
105 15 do { GET_BIT_CHECK(prob + symbol, symbol) } while (symbol < 0x100);
11227 0 27 ((p->dicPos < p->reps[0]) ? p->dicBufSize : 0)];
11237 21 195 GET_BIT2_CHECK(probLit, symbol, offs &= ~bit, offs &= bit)
21 0 GET_BIT2_CHECK(probLit, symbol, offs &= ~bit, offs &= bit)
154 62 GET_BIT2_CHECK(probLit, symbol, offs &= ~bit, offs &= bit)
11239 189 27 while (symbol < 0x100);
11249 1 28 IF_BIT_0_CHECK(prob)
1 0 IF_BIT_0_CHECK(prob)
11 18 IF_BIT_0_CHECK(prob)
11261 0 18 IF_BIT_0_CHECK(prob)
0 0 IF_BIT_0_CHECK(prob)
18 0 IF_BIT_0_CHECK(prob)
11265 2 16 IF_BIT_0_CHECK(prob)
2 0 IF_BIT_0_CHECK(prob)
13 5 IF_BIT_0_CHECK(prob)
11268 6 7 NORMALIZE_CHECK;
6 0 NORMALIZE_CHECK;
11280 0 0 IF_BIT_0_CHECK(prob)
0 0 IF_BIT_0_CHECK(prob)
0 0 IF_BIT_0_CHECK(prob)
11288 0 0 IF_BIT_0_CHECK(prob)
0 0 IF_BIT_0_CHECK(prob)
0 0 IF_BIT_0_CHECK(prob)
11304 2 14 IF_BIT_0_CHECK(probLen)
2 0 IF_BIT_0_CHECK(probLen)
15 1 IF_BIT_0_CHECK(probLen)
11315 0 1 IF_BIT_0_CHECK(probLen)
0 0 IF_BIT_0_CHECK(probLen)
1 0 IF_BIT_0_CHECK(probLen)
11330 8 40 TREE_DECODE_CHECK(probLen, limit, len);
8 0 TREE_DECODE_CHECK(probLen, limit, len);
22 26 TREE_DECODE_CHECK(probLen, limit, len);
32 16 TREE_DECODE_CHECK(probLen, limit, len);
11334 11 5 if (state < 4)
11340 7 59 TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot);
7 0 TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot);
42 24 TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot);
55 11 TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot);
11341 10 1 if (posSlot >= kStartPosModelIndex)
11347 7 3 if (posSlot < kEndPosModelIndex)
11354 40 3 do
11356 5 38 NORMALIZE_CHECK
5 0 NORMALIZE_CHECK
11367 21 10 do
11369 5 26 GET_BIT_CHECK(prob + i, i);
5 0 GET_BIT_CHECK(prob + i, i);
10 21 GET_BIT_CHECK(prob + i, i);
11377 12 46 NORMALIZE_CHECK;
12 0 NORMALIZE_CHECK;
11394 0 0 if (initDic)
11400 0 0 if (initState)
11415 31960 4 for (i = 0; i < numProbs; i++)
11431 80 0 while (p->remainLen != kMatchSpecLenStart)
11435 4 76 if (p->needFlush != 0)
11437 24 0 for (; inSize > 0 && p->tempBufSize < RC_INIT_SIZE; (*srcLen)++, inSize--)
20 4 for (; inSize > 0 && p->tempBufSize < RC_INIT_SIZE; (*srcLen)++, inSize--)
11439 0 4 if (p->tempBufSize < RC_INIT_SIZE)
11444 4 0 if (p->tempBuf[0] != 0)
11452 4 76 if (p->dicPos >= dicLimit)
11454 4 0 if (p->remainLen == 0 && p->code == 0)
4 0 if (p->remainLen == 0 && p->code == 0)
11459 0 0 if (finishMode == LZMA_FINISH_ANY)
11464 0 0 if (p->remainLen != 0)
11472 4 72 if (p->needInitState)
11475 0 76 if (p->tempBufSize == 0)
11479 71 5 if (inSize < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow)
11482 0 71 if (dummyRes == DUMMY_ERROR)
11490 0 71 if (checkEndMarkNow && dummyRes != DUMMY_MATCH)
11500 76 0 if (LzmaDec_DecodeReal2(p, dicLimit, bufLimit) != 0)
11510 0 0 while (rem < LZMA_REQUIRED_INPUT_MAX && lookAhead < inSize)
0 0 while (rem < LZMA_REQUIRED_INPUT_MAX && lookAhead < inSize)
11513 0 0 if (rem < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow)
11516 0 0 if (dummyRes == DUMMY_ERROR)
11522 0 0 if (checkEndMarkNow && dummyRes != DUMMY_MATCH)
11529 0 0 if (LzmaDec_DecodeReal2(p, dicLimit, p->buf) != 0)
11538 0 0 if (p->code == 0)
11553 0 0 if (p->dicPos == p->dicBufSize)
11556 0 0 if (outSize > p->dicBufSize - dicPos)
11576 0 0 if (res != 0)
11578 0 0 if (outSizeCur == 0 || outSize == 0)
11606 4 0 if (size < LZMA_PROPS_SIZE)
11611 0 4 if (dicSize < LZMA_DIC_MIN)
11616 4 0 if (d >= (9 * 5 * 5))
11630 0 4 if (p->probs == 0 || numProbs != p->numProbs)
0 0 if (p->probs == 0 || numProbs != p->numProbs)
11635 4 0 if (p->probs == 0)
11644 4 0 RINOK(LzmaProps_Decode(&propNew, props, propsSize));
11645 4 0 RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc));
11654 0 0 RINOK(LzmaProps_Decode(&propNew, props, propsSize));
11655 0 0 RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc));
11657 0 0 if (p->dic == 0 || dicBufSize != p->dicBufSize)
0 0 if (p->dic == 0 || dicBufSize != p->dicBufSize)
11661 0 0 if (p->dic == 0)
11681 4 0 if (inSize < RC_INIT_SIZE)
11686 4 0 if (res != 0)
11696 4 0 if (res == SZ_OK && *status == LZMA_STATUS_NEEDS_MORE_INPUT)
0 4 if (res == SZ_OK && *status == LZMA_STATUS_NEEDS_MORE_INPUT)
11710 4 4 static void LzmaFree(void* /*p*/, void *address) { delete[] (char*) address; }
11718 4 0 if (!is.read((char *) &uncompressed_len, sizeof(uncompressed_len))) return false;
11719 4 0 if (!is.read((char *) &compressed_len, sizeof(compressed_len))) return false;
11720 4 0 if (!is.read((char *) &poor_crc, sizeof(poor_crc))) return false;
11721 4 0 if (poor_crc != uncompressed_len * 19991 + compressed_len * 199999991 + 1234567890) return false;
11722 4 0 if (!is.read((char *) props_encoded, sizeof(props_encoded))) return false;
11725 4 0 if (!is.read((char *) compressed.data(), compressed_len)) return false;
4 0 if (!is.read((char *) compressed.data(), compressed_len)) return false;
11729 4 0 auto res = lzma::LzmaDecode(data.fill(uncompressed_len), &uncompressed_size, compressed.data(), &compressed_size, props_encoded, LZMA_PROPS_SIZE, lzma::LZMA_FINISH_ANY, &status, &lzmaAllocator);
11730 4 0 if (res != SZ_OK || uncompressed_size != uncompressed_len || compressed_size != compressed_len) return false;
4 0 if (res != SZ_OK || uncompressed_size != uncompressed_len || compressed_size != compressed_len) return false;
4 0 if (res != SZ_OK || uncompressed_size != uncompressed_len || compressed_size != compressed_len) return false;
11778 0 0 return {1, 11, 0, ""};
11789 0 0 << (morphodita.prerelease.empty() ? "" : "-") << morphodita.prerelease
0 0 << (morphodita.prerelease.empty() ? "" : "-") << morphodita.prerelease
11791 0 0 << (other_libraries.empty() ? "" : " and ") << other_libraries << ")\n"
0 0 << (other_libraries.empty() ? "" : " and ") << other_libraries << ")\n"
11793 0 0 "Mathematics and Physics, Charles University in Prague, Czech Republic.";
11799 4 0 } // namespace ufal
4 0 } // namespace ufal