diff --git a/xapian-applications/omega/assistant.cc b/xapian-applications/omega/assistant.cc index b9fa3bb6fe0..58a81d544c7 100644 --- a/xapian-applications/omega/assistant.cc +++ b/xapian-applications/omega/assistant.cc @@ -74,7 +74,8 @@ response(const char* dump, size_t dump_len, const char* title, size_t title_len, const char* keywords, size_t keywords_len, const char* author, size_t author_len, - int pages) + int pages, + time_t created) { if (replied) { // Logic error - handler already called response() for this file. @@ -82,12 +83,14 @@ response(const char* dump, size_t dump_len, } replied = true; unsigned u_pages = pages + 1; + unsigned long u_created = static_cast(created); if (!write_string(sockt, nullptr, 0) || !write_string(sockt, dump, dump_len) || !write_string(sockt, title, title_len) || !write_string(sockt, keywords, keywords_len) || !write_string(sockt, author, author_len) || - !write_unsigned(sockt, u_pages)) { + !write_unsigned(sockt, u_pages) || + !write_unsigned(sockt, u_created)) { exit(1); } } diff --git a/xapian-applications/omega/handler.h b/xapian-applications/omega/handler.h index e5d037e3b37..b44a9c53028 100644 --- a/xapian-applications/omega/handler.h +++ b/xapian-applications/omega/handler.h @@ -53,20 +53,23 @@ response(const char* dump, size_t dump_len, const char* title, size_t title_len, const char* keywords, size_t keywords_len, const char* author, size_t author_len, - int pages); + int pages, + time_t created); inline void response(const char* dump, const char* title, const char* keywords, const char* author, - int pages) + int pages, + time_t created) { response(dump, dump ? std::strlen(dump) : 0, title, title ? std::strlen(title) : 0, keywords, keywords ? std::strlen(keywords) : 0, author, author ? std::strlen(author) : 0, - pages); + pages, + created); } inline void @@ -74,13 +77,15 @@ response(const std::string& dump, const std::string& title, const std::string& keywords, const std::string& author, - int pages) + int pages, + time_t created) { response(dump.data(), dump.size(), title.data(), title.size(), keywords.data(), keywords.size(), author.data(), author.size(), - pages); + pages, + created); } void diff --git a/xapian-applications/omega/handler_gmime.cc b/xapian-applications/omega/handler_gmime.cc index 4d48afff91b..01d12a53417 100644 --- a/xapian-applications/omega/handler_gmime.cc +++ b/xapian-applications/omega/handler_gmime.cc @@ -186,7 +186,8 @@ extract(const string& filename, title, strlen(title), nullptr, 0, author, strlen(author), - -1); + -1, + time_t(-1)); #if GMIME_MAJOR_VERSION >= 3 free(author); diff --git a/xapian-applications/omega/handler_libabw.cc b/xapian-applications/omega/handler_libabw.cc index f0db2a1b7b1..b9ca2d96f9a 100644 --- a/xapian-applications/omega/handler_libabw.cc +++ b/xapian-applications/omega/handler_libabw.cc @@ -167,7 +167,8 @@ try { title, title_len, keywords.data(), keywords.size(), author, author_len, - -1); + -1, + time_t(-1)); } catch (...) { fail("Libabw threw an exception"); } diff --git a/xapian-applications/omega/handler_libarchive.cc b/xapian-applications/omega/handler_libarchive.cc index 3fee85e47fb..f7b59e80607 100644 --- a/xapian-applications/omega/handler_libarchive.cc +++ b/xapian-applications/omega/handler_libarchive.cc @@ -99,7 +99,7 @@ extract_opendoc(struct archive* archive_obj) // in either order in the ZIP container. parser.parse(styles); - response(parser.dump, title, keywords, author, -1); + response(parser.dump, title, keywords, author, -1, time_t(-1)); return true; } @@ -147,7 +147,7 @@ extract_xlsx(struct archive* archive_obj) } } parser.parse(sheets); - response(parser.dump, title, keywords, author, -1); + response(parser.dump, title, keywords, author, -1, time_t(-1)); return true; } @@ -229,7 +229,7 @@ extract_msxml(struct archive* archive_obj, MSXmlParser parser; parser.parse(content); - response(parser.dump, title, keywords, author, -1); + response(parser.dump, title, keywords, author, -1, time_t(-1)); return true; } @@ -267,7 +267,7 @@ extract_xps(struct archive* archive_obj) } } - response(parser.dump, title, keywords, author, -1); + response(parser.dump, title, keywords, author, -1, time_t(-1)); return true; } diff --git a/xapian-applications/omega/handler_libcdr.cc b/xapian-applications/omega/handler_libcdr.cc index 4876b212aa6..d21af737536 100644 --- a/xapian-applications/omega/handler_libcdr.cc +++ b/xapian-applications/omega/handler_libcdr.cc @@ -57,7 +57,8 @@ try { nullptr, 0, nullptr, 0, nullptr, 0, - pages); + pages, + time_t(-1)); } catch (...) { fail("Libcdr threw an exception"); } diff --git a/xapian-applications/omega/handler_libebook.cc b/xapian-applications/omega/handler_libebook.cc index 198505faf0a..2f969128b49 100644 --- a/xapian-applications/omega/handler_libebook.cc +++ b/xapian-applications/omega/handler_libebook.cc @@ -184,7 +184,8 @@ try { title, title_len, keywords.data(), keywords.size(), author, author_len, - -1); + -1, + time_t(-1)); } catch (...) { fail("Libe-book threw an exception"); } diff --git a/xapian-applications/omega/handler_libetonyek.cc b/xapian-applications/omega/handler_libetonyek.cc index 6f49de3cecc..7ba5cfe9e9a 100644 --- a/xapian-applications/omega/handler_libetonyek.cc +++ b/xapian-applications/omega/handler_libetonyek.cc @@ -151,7 +151,8 @@ extract_key(RVNGInputStream* input) nullptr, 0, nullptr, 0, nullptr, 0, - -1); + -1, + time_t(-1)); } static void @@ -174,7 +175,8 @@ extract_numbers(RVNGInputStream* input) nullptr, 0, nullptr, 0, nullptr, 0, - -1); + -1, + time_t(-1)); } static void @@ -209,7 +211,8 @@ extract_pages(RVNGInputStream* input) title, title_len, keywords.data(), keywords.size(), author, author_len, - -1); + -1, + time_t(-1)); } void diff --git a/xapian-applications/omega/handler_libextractor.cc b/xapian-applications/omega/handler_libextractor.cc index fd120e61a51..2ebfa39f3bb 100644 --- a/xapian-applications/omega/handler_libextractor.cc +++ b/xapian-applications/omega/handler_libextractor.cc @@ -153,7 +153,7 @@ try { EXTRACTOR_extract(plugins, filename.c_str(), NULL, 0, &process_metadata, &md); - response(md.dump, md.title, md.keywords, md.author, md.pages); + response(md.dump, md.title, md.keywords, md.author, md.pages, time_t(-1)); } catch (...) { fail("Libextractor threw an exception"); } diff --git a/xapian-applications/omega/handler_libmwaw.cc b/xapian-applications/omega/handler_libmwaw.cc index 908eeaa45ef..df73d61d34d 100644 --- a/xapian-applications/omega/handler_libmwaw.cc +++ b/xapian-applications/omega/handler_libmwaw.cc @@ -171,7 +171,8 @@ extract_word(RVNGFileStream* input) title, title_len, keywords.data(), keywords.size(), author, author_len, - -1); + -1, + time_t(-1)); } static void @@ -210,7 +211,8 @@ extract_spreadsheet(RVNGFileStream* input) title, title_len, keywords.data(), keywords.size(), author, author_len, - pages_content.size()); + pages_content.size(), + time_t(-1)); } static void @@ -229,7 +231,8 @@ extract_presentation(RVNGFileStream* input) nullptr, 0, nullptr, 0, nullptr, 0, - pages_content.size()); + pages_content.size(), + time_t(-1)); } static void @@ -248,7 +251,8 @@ extract_drawing(RVNGFileStream* input) nullptr, 0, nullptr, 0, nullptr, 0, - pages_content.size()); + pages_content.size(), + time_t(-1)); } void diff --git a/xapian-applications/omega/handler_poppler.cc b/xapian-applications/omega/handler_poppler.cc index 1151aa11fc3..894f5be7537 100644 --- a/xapian-applications/omega/handler_poppler.cc +++ b/xapian-applications/omega/handler_poppler.cc @@ -98,16 +98,14 @@ extract(const string& filename, gchar* author = poppler_document_get_author(doc); gchar* title = poppler_document_get_title(doc); gchar* keywords = poppler_document_get_keywords(doc); - time_t datetime = poppler_document_get_creation_date(doc); - if (datetime != time_t(-1)) { - // FIXME: Add support for this to the worker protocol. - } + time_t created = poppler_document_get_creation_date(doc); response(dump.data(), dump.size(), title, strlen_if_nonnull(title), nullptr, 0, author, strlen_if_nonnull(author), - pages); + pages, + created); g_free(author); g_free(title); g_free(keywords); diff --git a/xapian-applications/omega/handler_tesseract.cc b/xapian-applications/omega/handler_tesseract.cc index dd6a89f222d..4a39ba137a6 100644 --- a/xapian-applications/omega/handler_tesseract.cc +++ b/xapian-applications/omega/handler_tesseract.cc @@ -67,7 +67,7 @@ extract(const string& filename, // Get OCR result const char* dump = ocr->GetUTF8Text(); - response(dump, nullptr, nullptr, nullptr, -1); + response(dump, nullptr, nullptr, nullptr, -1, time_t(-1)); delete[] dump; // Release memory. diff --git a/xapian-applications/omega/index_file.cc b/xapian-applications/omega/index_file.cc index 5d15e9683da..78859ad5549 100644 --- a/xapian-applications/omega/index_file.cc +++ b/xapian-applications/omega/index_file.cc @@ -786,7 +786,7 @@ index_mimetype(const string& file, const string& urlterm, const string& url, // Use a worker process to extract the content. Worker* wrk = cmd_it->second.worker; int r = wrk->extract(file, mimetype, dump, title, keywords, author, - pages); + pages, created); if (r != 0) { string msg = wrk->get_error(); assert(!msg.empty()); diff --git a/xapian-applications/omega/worker.cc b/xapian-applications/omega/worker.cc index 6015d1fdfa2..cd334c37737 100644 --- a/xapian-applications/omega/worker.cc +++ b/xapian-applications/omega/worker.cc @@ -144,7 +144,8 @@ Worker::extract(const std::string& filename, std::string& title, std::string& keywords, std::string& author, - int& pages) + int& pages, + time_t& created) { if (filter_module.empty()) { error = "This helper hard failed earlier in the current run"; @@ -176,12 +177,15 @@ Worker::extract(const std::string& filename, } unsigned u_pages; + unsigned long u_created; if (read_string(sockt, dump) && read_string(sockt, title) && read_string(sockt, keywords) && read_string(sockt, author) && - read_unsigned(sockt, u_pages)) { + read_unsigned(sockt, u_pages) && + read_unsigned(sockt, u_created)) { pages = int(u_pages) - 1; + created = time_t(long(u_created)); return 0; } } diff --git a/xapian-applications/omega/worker.h b/xapian-applications/omega/worker.h index 0398804c71d..e8e2b0dd39a 100644 --- a/xapian-applications/omega/worker.h +++ b/xapian-applications/omega/worker.h @@ -79,11 +79,12 @@ class Worker { * * @param filename Path to the file. * @param mimetype Mimetype of the file. - * @param dump String where the dump will be saved. - * @param title String which will hold the title of the file. - * @param keywords String where the keywords will be stored. - * @param author String where the author will be stored. - * @param pages Here the number of pages will be stored. + * @param[out] dump Any body text. + * @param[out] title The title of the document. + * @param[out] keyword Any keywords. + * @param[out] author The author(s). + * @param[out] pages The number of pages (-1 if unknown). + * @param[out] created Created timestamp as time_t (-1 if unknown). * * @return 0 on success. * @@ -104,7 +105,8 @@ class Worker { std::string& title, std::string& keywords, std::string& author, - int& pages); + int& pages, + time_t& created); /** Returns an error message if the extraction fails, or an empty string * if everything is okay. diff --git a/xapian-applications/omega/worker_comms.cc b/xapian-applications/omega/worker_comms.cc index 937e39b2bca..3004adb61f8 100644 --- a/xapian-applications/omega/worker_comms.cc +++ b/xapian-applications/omega/worker_comms.cc @@ -111,6 +111,29 @@ read_unsigned(FILE* f, unsigned& v) return true; } +bool +read_unsigned(FILE* f, unsigned long& v) +{ + v = 0; + int ch; + do { + ch = getc(f); + if (ch < 0) return false; + v = (v << 7) | (ch & 0x7f); + } while (ch & 0x80); + return true; +} + +bool +write_unsigned(FILE* f, unsigned long v) +{ + while (v >= 0x80) { + if (putc(v | 0x80, f) < 0) return false; + v >>= 7; + } + return !(putc(v, f) < 0); +} + bool write_unsigned(FILE* f, unsigned v) { diff --git a/xapian-applications/omega/worker_comms.h b/xapian-applications/omega/worker_comms.h index 50bd14cef67..21de8ff8062 100644 --- a/xapian-applications/omega/worker_comms.h +++ b/xapian-applications/omega/worker_comms.h @@ -35,4 +35,8 @@ inline bool write_string(std::FILE* f, const std::string& s) { bool read_unsigned(std::FILE* f, unsigned& v); +bool read_unsigned(std::FILE* f, unsigned long& v); + bool write_unsigned(std::FILE* f, unsigned v); + +bool write_unsigned(std::FILE* f, unsigned long v);