Skip to content

Commit

Permalink
Add support for creation time to worker protocol
Browse files Browse the repository at this point in the history
Initially this is hooked up for the poppler worker.
  • Loading branch information
ojwb committed Oct 25, 2022
1 parent 47283ab commit 8b606c7
Show file tree
Hide file tree
Showing 17 changed files with 88 additions and 38 deletions.
7 changes: 5 additions & 2 deletions xapian-applications/omega/assistant.cc
Original file line number Diff line number Diff line change
Expand Up @@ -74,20 +74,23 @@ response(const char* dump, size_t dump_len,
const char* title, size_t title_len,
const char* keywords, size_t keywords_len,
const char* author, size_t author_len,
int pages)
int pages,
time_t created)
{
if (replied) {
// Logic error - handler already called response() for this file.
exit(EX_SOFTWARE);
}
replied = true;
unsigned u_pages = pages + 1;
unsigned long u_created = static_cast<unsigned long>(created);
if (!write_string(sockt, nullptr, 0) ||
!write_string(sockt, dump, dump_len) ||
!write_string(sockt, title, title_len) ||
!write_string(sockt, keywords, keywords_len) ||
!write_string(sockt, author, author_len) ||
!write_unsigned(sockt, u_pages)) {
!write_unsigned(sockt, u_pages) ||
!write_unsigned(sockt, u_created)) {
exit(1);
}
}
Expand Down
15 changes: 10 additions & 5 deletions xapian-applications/omega/handler.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,34 +53,39 @@ response(const char* dump, size_t dump_len,
const char* title, size_t title_len,
const char* keywords, size_t keywords_len,
const char* author, size_t author_len,
int pages);
int pages,
time_t created);

inline void
response(const char* dump,
const char* title,
const char* keywords,
const char* author,
int pages)
int pages,
time_t created)
{
response(dump, dump ? std::strlen(dump) : 0,
title, title ? std::strlen(title) : 0,
keywords, keywords ? std::strlen(keywords) : 0,
author, author ? std::strlen(author) : 0,
pages);
pages,
created);
}

inline void
response(const std::string& dump,
const std::string& title,
const std::string& keywords,
const std::string& author,
int pages)
int pages,
time_t created)
{
response(dump.data(), dump.size(),
title.data(), title.size(),
keywords.data(), keywords.size(),
author.data(), author.size(),
pages);
pages,
created);
}

void
Expand Down
3 changes: 2 additions & 1 deletion xapian-applications/omega/handler_gmime.cc
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,8 @@ extract(const string& filename,
title, strlen(title),
nullptr, 0,
author, strlen(author),
-1);
-1,
time_t(-1));

#if GMIME_MAJOR_VERSION >= 3
free(author);
Expand Down
3 changes: 2 additions & 1 deletion xapian-applications/omega/handler_libabw.cc
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,8 @@ try {
title, title_len,
keywords.data(), keywords.size(),
author, author_len,
-1);
-1,
time_t(-1));
} catch (...) {
fail("Libabw threw an exception");
}
8 changes: 4 additions & 4 deletions xapian-applications/omega/handler_libarchive.cc
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ extract_opendoc(struct archive* archive_obj)
// in either order in the ZIP container.
parser.parse(styles);

response(parser.dump, title, keywords, author, -1);
response(parser.dump, title, keywords, author, -1, time_t(-1));
return true;
}

Expand Down Expand Up @@ -147,7 +147,7 @@ extract_xlsx(struct archive* archive_obj)
}
}
parser.parse(sheets);
response(parser.dump, title, keywords, author, -1);
response(parser.dump, title, keywords, author, -1, time_t(-1));
return true;
}

Expand Down Expand Up @@ -229,7 +229,7 @@ extract_msxml(struct archive* archive_obj,

MSXmlParser parser;
parser.parse(content);
response(parser.dump, title, keywords, author, -1);
response(parser.dump, title, keywords, author, -1, time_t(-1));
return true;
}

Expand Down Expand Up @@ -267,7 +267,7 @@ extract_xps(struct archive* archive_obj)
}
}

response(parser.dump, title, keywords, author, -1);
response(parser.dump, title, keywords, author, -1, time_t(-1));
return true;
}

Expand Down
3 changes: 2 additions & 1 deletion xapian-applications/omega/handler_libcdr.cc
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@ try {
nullptr, 0,
nullptr, 0,
nullptr, 0,
pages);
pages,
time_t(-1));
} catch (...) {
fail("Libcdr threw an exception");
}
3 changes: 2 additions & 1 deletion xapian-applications/omega/handler_libebook.cc
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,8 @@ try {
title, title_len,
keywords.data(), keywords.size(),
author, author_len,
-1);
-1,
time_t(-1));
} catch (...) {
fail("Libe-book threw an exception");
}
9 changes: 6 additions & 3 deletions xapian-applications/omega/handler_libetonyek.cc
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,8 @@ extract_key(RVNGInputStream* input)
nullptr, 0,
nullptr, 0,
nullptr, 0,
-1);
-1,
time_t(-1));
}

static void
Expand All @@ -174,7 +175,8 @@ extract_numbers(RVNGInputStream* input)
nullptr, 0,
nullptr, 0,
nullptr, 0,
-1);
-1,
time_t(-1));
}

static void
Expand Down Expand Up @@ -209,7 +211,8 @@ extract_pages(RVNGInputStream* input)
title, title_len,
keywords.data(), keywords.size(),
author, author_len,
-1);
-1,
time_t(-1));
}

void
Expand Down
2 changes: 1 addition & 1 deletion xapian-applications/omega/handler_libextractor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ try {
EXTRACTOR_extract(plugins, filename.c_str(),
NULL, 0,
&process_metadata, &md);
response(md.dump, md.title, md.keywords, md.author, md.pages);
response(md.dump, md.title, md.keywords, md.author, md.pages, time_t(-1));
} catch (...) {
fail("Libextractor threw an exception");
}
12 changes: 8 additions & 4 deletions xapian-applications/omega/handler_libmwaw.cc
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,8 @@ extract_word(RVNGFileStream* input)
title, title_len,
keywords.data(), keywords.size(),
author, author_len,
-1);
-1,
time_t(-1));
}

static void
Expand Down Expand Up @@ -210,7 +211,8 @@ extract_spreadsheet(RVNGFileStream* input)
title, title_len,
keywords.data(), keywords.size(),
author, author_len,
pages_content.size());
pages_content.size(),
time_t(-1));
}

static void
Expand All @@ -229,7 +231,8 @@ extract_presentation(RVNGFileStream* input)
nullptr, 0,
nullptr, 0,
nullptr, 0,
pages_content.size());
pages_content.size(),
time_t(-1));
}

static void
Expand All @@ -248,7 +251,8 @@ extract_drawing(RVNGFileStream* input)
nullptr, 0,
nullptr, 0,
nullptr, 0,
pages_content.size());
pages_content.size(),
time_t(-1));
}

void
Expand Down
8 changes: 3 additions & 5 deletions xapian-applications/omega/handler_poppler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -98,16 +98,14 @@ extract(const string& filename,
gchar* author = poppler_document_get_author(doc);
gchar* title = poppler_document_get_title(doc);
gchar* keywords = poppler_document_get_keywords(doc);
time_t datetime = poppler_document_get_creation_date(doc);
if (datetime != time_t(-1)) {
// FIXME: Add support for this to the worker protocol.
}
time_t created = poppler_document_get_creation_date(doc);

response(dump.data(), dump.size(),
title, strlen_if_nonnull(title),
nullptr, 0,
author, strlen_if_nonnull(author),
pages);
pages,
created);
g_free(author);
g_free(title);
g_free(keywords);
Expand Down
2 changes: 1 addition & 1 deletion xapian-applications/omega/handler_tesseract.cc
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ extract(const string& filename,

// Get OCR result
const char* dump = ocr->GetUTF8Text();
response(dump, nullptr, nullptr, nullptr, -1);
response(dump, nullptr, nullptr, nullptr, -1, time_t(-1));
delete[] dump;

// Release memory.
Expand Down
2 changes: 1 addition & 1 deletion xapian-applications/omega/index_file.cc
Original file line number Diff line number Diff line change
Expand Up @@ -786,7 +786,7 @@ index_mimetype(const string& file, const string& urlterm, const string& url,
// Use a worker process to extract the content.
Worker* wrk = cmd_it->second.worker;
int r = wrk->extract(file, mimetype, dump, title, keywords, author,
pages);
pages, created);
if (r != 0) {
string msg = wrk->get_error();
assert(!msg.empty());
Expand Down
8 changes: 6 additions & 2 deletions xapian-applications/omega/worker.cc
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,8 @@ Worker::extract(const std::string& filename,
std::string& title,
std::string& keywords,
std::string& author,
int& pages)
int& pages,
time_t& created)
{
if (filter_module.empty()) {
error = "This helper hard failed earlier in the current run";
Expand Down Expand Up @@ -176,12 +177,15 @@ Worker::extract(const std::string& filename,
}

unsigned u_pages;
unsigned long u_created;
if (read_string(sockt, dump) &&
read_string(sockt, title) &&
read_string(sockt, keywords) &&
read_string(sockt, author) &&
read_unsigned(sockt, u_pages)) {
read_unsigned(sockt, u_pages) &&
read_unsigned(sockt, u_created)) {
pages = int(u_pages) - 1;
created = time_t(long(u_created));
return 0;
}
}
Expand Down
14 changes: 8 additions & 6 deletions xapian-applications/omega/worker.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,11 +79,12 @@ class Worker {
*
* @param filename Path to the file.
* @param mimetype Mimetype of the file.
* @param dump String where the dump will be saved.
* @param title String which will hold the title of the file.
* @param keywords String where the keywords will be stored.
* @param author String where the author will be stored.
* @param pages Here the number of pages will be stored.
* @param[out] dump Any body text.
* @param[out] title The title of the document.
* @param[out] keyword Any keywords.
* @param[out] author The author(s).
* @param[out] pages The number of pages (-1 if unknown).
* @param[out] created Created timestamp as time_t (-1 if unknown).
*
* @return 0 on success.
*
Expand All @@ -104,7 +105,8 @@ class Worker {
std::string& title,
std::string& keywords,
std::string& author,
int& pages);
int& pages,
time_t& created);

/** Returns an error message if the extraction fails, or an empty string
* if everything is okay.
Expand Down
23 changes: 23 additions & 0 deletions xapian-applications/omega/worker_comms.cc
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,29 @@ read_unsigned(FILE* f, unsigned& v)
return true;
}

bool
read_unsigned(FILE* f, unsigned long& v)
{
v = 0;
int ch;
do {
ch = getc(f);
if (ch < 0) return false;
v = (v << 7) | (ch & 0x7f);
} while (ch & 0x80);
return true;
}

bool
write_unsigned(FILE* f, unsigned long v)
{
while (v >= 0x80) {
if (putc(v | 0x80, f) < 0) return false;
v >>= 7;
}
return !(putc(v, f) < 0);
}

bool
write_unsigned(FILE* f, unsigned v)
{
Expand Down
4 changes: 4 additions & 0 deletions xapian-applications/omega/worker_comms.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,8 @@ inline bool write_string(std::FILE* f, const std::string& s) {

bool read_unsigned(std::FILE* f, unsigned& v);

bool read_unsigned(std::FILE* f, unsigned long& v);

bool write_unsigned(std::FILE* f, unsigned v);

bool write_unsigned(std::FILE* f, unsigned long v);

0 comments on commit 8b606c7

Please sign in to comment.