Skip to content

Commit

Permalink
Load from filename not stream (#76)
Browse files Browse the repository at this point in the history
  • Loading branch information
stweil authored and zuphilip committed Sep 27, 2016
1 parent 1333646 commit 9b57a1e
Show file tree
Hide file tree
Showing 4 changed files with 8 additions and 8 deletions.
4 changes: 2 additions & 2 deletions hocr-combine
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,13 @@ if len(sys.argv)<2:
print_usage()
sys.exit(1)

doc = html.fromstring(open(sys.argv[1]).read())
doc = html.parse(sys.argv[1])

pages = doc.xpath("//*[@class='ocr_page']")
container = pages[-1].getparent()

for fname in sys.argv[2:]:
doc2 = html.fromstring(open(fname).read())
doc2 = html.parse(fname)
pages = doc2.xpath("//*[@class='ocr_page']")
for page in pages:
page = doc.importNode(page,1)
Expand Down
4 changes: 2 additions & 2 deletions hocr-eval
Original file line number Diff line number Diff line change
Expand Up @@ -141,8 +141,8 @@ if(imgfile):
draw=ImageDraw.Draw(im)

# get pages from inputs
truth_doc = html.fromstring(args[0])
actual_doc = html.fromstring(args[1])
truth_doc = html.parse(args[0])
actual_doc = html.parse(args[1])

# parse pages
truth_pages = truth_doc.xpath("//*[@class='ocr_page']")
Expand Down
4 changes: 2 additions & 2 deletions hocr-eval-geom
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,8 @@ close_match = float(close_match)

### read the hOCR files

truth_doc = html.fromstring(open(args[0]).read())
actual_doc = html.fromstring(open(args[1]).read())
truth_doc = html.parse(args[0])
actual_doc = html.parse(args[1])
truth_pages = truth_doc.xpath("//*[@class='ocr_page']")
actual_pages = actual_doc.xpath("//*[@class='ocr_page']")
assert len(truth_pages) == len(actual_pages)
Expand Down
4 changes: 2 additions & 2 deletions hocr-merge-dc
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ if len(sys.argv)<2:
print_usage()
sys.exit(1)

dc_doc = etree.fromstring(open(sys.argv[1]).read())
hocr_doc = html.fromstring(open(sys.argv[2]).read())
dc_doc = etree.parse(sys.argv[1], html.XHTMLParser())
hocr_doc = html.parse(sys.argv[2])


### remove all existing META tags representing Dublin Core metadata
Expand Down

0 comments on commit 9b57a1e

Please sign in to comment.