Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update vi_layoutxlm_ser to load ckpt, fix bug in predict_ser #24

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 15 additions & 3 deletions mindocr/models/kie_layoutxlm.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ def _cfg(url="", **kwargs):


default_cfgs = {
"layoutxlm": _cfg(
url="https://download.mindspore.cn/toolkits/mindocr/layoutxlm/ser_layoutxlm_base-a4ea148e.ckpt"
),
"vi_layoutxlm": _cfg(
url="https://download.mindspore.cn/toolkits/mindocr/vi-layoutxlm/ser_vi_layoutxlm-f3c83585.ckpt"
),
Expand Down Expand Up @@ -65,19 +68,25 @@ def layoutxlm_ser(
}
model = KieNet(model_config)
if pretrained:
default_cfg = default_cfgs["vi_layoutxlm"]
default_cfg = default_cfgs["layoutxlm"]
load_pretrained(model, default_cfg)

return model


@register_model
def vi_layoutxlm_ser(pretrained: bool = True, use_visual_backbone: bool = False, use_float16: bool = False, **kwargs):
def vi_layoutxlm_ser(
pretrained: bool = True,
pretrained_backbone: bool = False,
use_visual_backbone: bool = False,
use_float16: bool = False,
**kwargs
):
model_config = {
"type": "kie",
"backbone": {
"name": "layoutxlm",
"pretrained": pretrained, # backbone pretrained
"pretrained": pretrained_backbone, # backbone pretrained
"use_visual_backbone": use_visual_backbone,
"use_float16": use_float16,
},
Expand All @@ -90,5 +99,8 @@ def vi_layoutxlm_ser(pretrained: bool = True, use_visual_backbone: bool = False,
},
}
model = KieNet(model_config)
if pretrained:
default_cfg = default_cfgs["vi_layoutxlm"]
load_pretrained(model, default_cfg)

return model
15 changes: 12 additions & 3 deletions tools/infer/text/predict_ser.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def __init__(self, args):
# build model for algorithm with pretrained weights or local checkpoint
ckpt_dir = args.ser_model_dir
if ckpt_dir is None:
pretrained = False
pretrained = True
ckpt_load_path = None
pretrained_backbone = False
else:
Expand All @@ -64,7 +64,10 @@ def __init__(self, args):
)
model_name = algo_to_model_name[args.ser_algorithm]
self.model = build_model(
model_name, pretrained=pretrained, pretrained_backbone=pretrained_backbone, ckpt_load_path=ckpt_load_path
model_name,
pretrained=pretrained,
pretrained_backbone=pretrained_backbone,
ckpt_load_path=ckpt_load_path,
)
self.model.set_train(False)

Expand Down Expand Up @@ -202,6 +205,7 @@ def run_batchwise(self, ocr_info_list):
token_type_ids_batch = []
segment_offset_ids_batch = []
ocr_infos_batch = []
image_batch = []
for j in range(batch_begin, batch_end): # image index j
data = self.preprocess(ocr_info_list[j])
input_ids_batch.append(data["input_ids"])
Expand All @@ -210,6 +214,7 @@ def run_batchwise(self, ocr_info_list):
token_type_ids_batch.append(data["token_type_ids"])
segment_offset_ids_batch.append(data["segment_offset_id"])
ocr_infos_batch.append(data["ocr_info"])
image_batch.append(data["image"])

input_ids_batch = (
np.stack(input_ids_batch) if len(input_ids_batch) > 1 else np.expand_dims(input_ids_batch[0], axis=0)
Expand All @@ -225,13 +230,15 @@ def run_batchwise(self, ocr_info_list):
if len(token_type_ids_batch) > 1
else np.expand_dims(token_type_ids_batch[0], axis=0)
)
image_batch = np.stack(image_batch) if len(image_batch) > 1 else np.expand_dims(image_batch[0], axis=0)

# infer
input_x = [
Tensor(input_ids_batch),
Tensor(bbox_batch),
Tensor(attention_mask_batch),
Tensor(token_type_ids_batch),
Tensor(image_batch),
]
logits = self.model(input_x)
# postprocess
Expand Down Expand Up @@ -262,13 +269,15 @@ def run_single(self, ocr_info_list):
token_type_ids = data["token_type_ids"]
segment_offset_id = data["segment_offset_id"]
ocr_info = data["ocr_info"]
image = data["image"]

input_ids = np.expand_dims(input_ids, axis=0)
bbox = np.expand_dims(bbox, axis=0)
attention_mask = np.expand_dims(attention_mask, axis=0)
token_type_ids = np.expand_dims(token_type_ids, axis=0)
image = np.expand_dims(image, axis=0)

input_x = (Tensor(input_ids), Tensor(bbox), Tensor(attention_mask), Tensor(token_type_ids))
input_x = (Tensor(input_ids), Tensor(bbox), Tensor(attention_mask), Tensor(token_type_ids), Tensor(image))

logits = self.model(input_x)

Expand Down
Loading