-
Notifications
You must be signed in to change notification settings - Fork 14
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Manifest enrichment #699
base: feature/IDA-893-enrichment-integration
Are you sure you want to change the base?
Manifest enrichment #699
Conversation
export const manifestEnrichmentPipeline: RouteMiddleware<{ id: number }> = async context => { | ||
const { siteId } = userWithScope(context, ['site.admin']); | ||
const site = await context.siteManager.getSiteById(siteId); | ||
const siteApi = api.asUser({ siteId }); | ||
|
||
// 12-hour token. | ||
const webhook = await context.webhookExtension.generateWebhookUrl( | ||
site, | ||
manifestEnrichmentPipelineEvent.event_id, | ||
12 * 3600 | ||
); | ||
context.response.body = await siteApi.enrichment.enrichManifestInternal(context.params.id, webhook); | ||
}; | ||
|
||
export const manifestEnrichmentPipelineEvent: WebhookEventType = { | ||
event_id: 'manifest-enrichment-pipeline.complete', | ||
body_variables: ['id'], | ||
}; | ||
|
||
export const manifestEnrichmentHook: IncomingWebhook = { | ||
type: 'manifest-enrichment-pipeline-task-ingest', | ||
event_id: 'manifest-enrichment-pipeline.complete', | ||
is_outgoing: false, | ||
execute: async (resp, siteApi) => { | ||
invariant(resp.id, 'Expected response to contain `id`'); | ||
|
||
const task = await siteApi.enrichment.getEnrichmentTask(resp.id); | ||
invariant(task.subject, 'Missing subject on task'); | ||
invariant(task.status === 3, 'Task is not yet complete'); | ||
|
||
if (task.task_type === 'ocr_madoc_resource') { | ||
const parsed = parseUrn(task.subject); | ||
invariant(parsed, 'Invalid subject'); | ||
invariant(parsed.type === 'canvas', 'Can only process canvases'); | ||
|
||
if (task.state && task.state.ocr_resources && task.state.ocr_resources[0]) { | ||
const first = task.state.ocr_resources[0]; | ||
const enrichmentPlaintext = await siteApi.enrichment.getEnrichmentPlaintext(first); | ||
invariant(enrichmentPlaintext, 'Missing plaintext from enrichment'); | ||
if (enrichmentPlaintext.plaintext) { | ||
const canvasId = parsed.id; // ?? | ||
return await siteApi.updateCanvasPlaintext(canvasId, enrichmentPlaintext.plaintext); | ||
} | ||
} | ||
} | ||
}, | ||
}; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is the main part (other parts mostly filling gaps in Madocs API).
manifestEnrichmentPipeline
is the API route handler when an admin hits "Enrich"- Generates a webhook URL (12-hour token)
- Creates the enrichment task
- Returns task (@mattmcgrattan it would be useful to omit
callback_url
from the state in the future)
manifestEnrichmentPipelineEvent
is a short description of the webhook "type" and the fields expected in the response.manifestEnrichmentHook
this is the function that is called when the task is complete. We get the webhook post-body JSON and an instance of the siteApi (already mapped correctly to the right site).- Fetch the enrichment task
- Validate that it has a subject + is complete
- If it's an
ocr_madoc_resource
:- Parse + validate the subject
- check for
ocr_resources
(@mattmcgrattan will need to change if there are more than one here) - Fetch the plaintext + validate what we expect to see
- Attach the plaintext to the canvas.
enrichManifestInternal(id: number, callback?: string) { | ||
return this.api.request<EnrichmentTask>(`/api/enrichment/tasks/madoc_manifest_enrichment_pipeline`, { | ||
method: 'POST', | ||
body: { | ||
task: { | ||
subject: `urn:madoc:manifest:${id}`, | ||
parameters: [{ callback_url: callback }], | ||
}, | ||
}, | ||
}); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is the call to kick off the enrichment pipeline
Preview docker image available
|
you might want to put a |
No description provided.