Skip to content

Commit

Permalink
Merge pull request #15 from mseri/process-file
Browse files Browse the repository at this point in the history
Add file processing support
  • Loading branch information
mseri authored Feb 3, 2022
2 parents cd7ac3a + 4e8ca03 commit ff93dfb
Show file tree
Hide file tree
Showing 6 changed files with 102 additions and 29 deletions.
3 changes: 2 additions & 1 deletion .github/workflows/workflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ jobs:
- name: Use OCaml ${{ matrix.ocaml-version }}
uses: ocaml/setup-ocaml@v2
with:
ocaml-version: ${{ matrix.ocaml-version }}
ocaml-compiler: ${{ matrix.ocaml-version }}
dune-cache: true

- name: Set git user
Expand Down Expand Up @@ -90,6 +90,7 @@ jobs:
opam exec -- dune build @fmt
- name: Run tests
if: ${{ !startsWith(github.ref, 'refs/tags/') }}
run: opam exec -- dune runtest

- name: Build
Expand Down
1 change: 1 addition & 0 deletions .ocamlformat
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ocaml-version = 4.08
5 changes: 5 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# 0.6.0 (2022-02-03)

- Support batch processing of files of IDs
- Support append result to file

# 0.5.2 (2021-12-17)

- Move from cuz to the published clz
Expand Down
49 changes: 29 additions & 20 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,44 +9,53 @@ Usage:
arXiv or PubMed ID.

SYNOPSIS
doi2bib [OPTION]... [ID]
doi2bib [OPTION]... [ID]

ARGUMENTS
ID A DOI, an arXiv ID or a PubMed ID. The tool tries to automatically
infer what kind of ID you are using. You can force the cli to
lookup a DOI by using the form 'doi:ID' or an arXiv ID by using
the form 'arXiv:ID'. PubMed IDs always start with 'PMC'.
ID A DOI, an arXiv ID or a PubMed ID. The tool tries to automatically
infer what kind of ID you are using. You can force the cli to
lookup a DOI by using the form 'doi:ID' or an arXiv ID by using
the form 'arXiv:ID'. PubMed IDs always start with 'PMC'.

OPTIONS
--help[=FMT] (default=auto)
Show this help in format FMT. The value FMT must be one of `auto',
`pager', `groff' or `plain'. With `auto', the format is `pager` or
`plain' whenever the TERM env var is `dumb' or undefined.
-f FILE, --file=FILE
With this flag, the tool reads the file and process its lines
sequentially, treating them as DOIs, arXiv IDs or PubMedIDs.
Errors will be printed on standard error but will not terminate
the operation.

--version
Show version information.
--help[=FMT] (default=auto)
Show this help in format FMT. The value FMT must be one of `auto',
`pager', `groff' or `plain'. With `auto', the format is `pager` or
`plain' whenever the TERM env var is `dumb' or undefined.

-o OUTPUT, --output=OUTPUT (absent=stdout)
Append the bibtex output to the specified file. It will create the
file if it does not exist.

--version
Show version information.

EXIT STATUS
doi2bib exits with the following status:
doi2bib exits with the following status:

0 on success.
0 on success.

124 on command line parsing errors.
124 on command line parsing errors.

125 on unexpected internal errors (bugs).
125 on unexpected internal errors (bugs).

BUGS
Report bugs to https://github.com/mseri/doi2bib/issues

Report bugs to https://github.com/mseri/doi2bib/issues

It will output the bibtex entry, using the published details when possible.
It will retrieve the bibtex entry, using the published details when possible.

Examples of use (the bibtex entry is printed on standard output):

$ doi2bib 10.1007/s10569-019-9946-9
$ doi2bib doi:10.4171/JST/226
$ doi2bib arXiv:1609.01724
$ doi2bib doi:10.4171/JST/226 -o "bibliography.bib"
$ doi2bib 1902.00436
$ doi2bib arXiv:1609.01724
$ doi2bib PMC2883744

Each release comes with attached binaries for windows, mac and linux.
Expand Down
71 changes: 64 additions & 7 deletions bin/doi2bib.ml
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,52 @@ open Doi2bib

let err s = `Error (false, s)

let doi2bib id =
match id with
| None -> `Help (`Pager, None)
| Some id -> (
match Lwt_main.run (Http.get_bib_entry @@ Parser.parse_id id) with
| bibtex -> `Ok (Printf.printf "%s" bibtex)
let process_id outfile id =
let open Lwt.Syntax in
let* bibtex = Http.get_bib_entry @@ Parser.parse_id id in
match outfile with
| "stdout" -> Lwt_io.print bibtex
| outfile ->
let flags = [ Unix.O_WRONLY; O_APPEND; O_CREAT ] in
Lwt_io.with_file ~mode:Output ~flags outfile (fun oc ->
Lwt_io.write_line oc bibtex)

let process_file outfile infile =
let open Lwt.Syntax in
let write_out f =
match outfile with
| "stdout" -> f Lwt_io.stdout
| outfile ->
let flags = [ Unix.O_WRONLY; O_APPEND; O_CREAT ] in
Lwt_io.with_file ~mode:Output ~flags outfile f
in
let lines ic =
Lwt_seq.unfold_lwt
(fun ic ->
let* line = Lwt_io.read_line_opt ic in
Lwt.return @@ Option.map (fun x -> (x, ic)) line)
ic
in
let process oc id =
match Http.get_bib_entry @@ Parser.parse_id id with
| bibtex ->
let* bibtex = bibtex in
Lwt_io.write_line oc bibtex
| exception e -> Lwt_io.eprintf "Error for %s: %s" id (Printexc.to_string e)
in
Lwt_io.with_file ~mode:Input infile (fun ic ->
write_out (fun oc -> Lwt_seq.iter_s (process oc) (lines ic)))

let doi2bib id file outfile =
match (id, file) with
| None, "" -> `Help (`Pager, None)
| None, infile -> (
match Lwt_main.run (process_file outfile infile) with
| () -> `Ok ()
| exception e -> err @@ Printexc.to_string e)
| Some id, "" -> (
match Lwt_main.run (process_id outfile id) with
| () -> `Ok ()
| exception Http.PubMed_DOI_not_found ->
err @@ Printf.sprintf "Error: unable to find a DOI entry for %s.\n" id
| exception Http.Entry_not_found ->
Expand All @@ -29,9 +69,26 @@ let doi2bib id =
You can force me to consider it by prepending 'doi:', 'arxiv:' \
or 'PMC' as appropriate."
id)
| Some _, _ -> `Help (`Pager, None)

let () =
let open Cmdliner in
let file =
let doc =
"With this flag, the tool reads the file and process its lines \
sequentially, treating them as DOIs, arXiv IDs or PubMedIDs. Errors \
will be printed on standard error but will not terminate the operation."
in
Arg.(value & opt string "" & info [ "f"; "file" ] ~docv:"FILE" ~doc)
in
let out =
let doc =
"Append the bibtex output to the specified file. It will create the file \
if it does not exist."
in
Arg.(
value & opt string "stdout" & info [ "o"; "output" ] ~docv:"OUTPUT" ~doc)
in
let id =
let doc =
"A DOI, an arXiv ID or a PubMed ID. The tool tries to automatically \
Expand All @@ -42,7 +99,7 @@ let () =
in
Arg.(value & pos 0 (some string) None & info ~docv:"ID" ~doc [])
in
let doi2bib_t = Term.(ret (const doi2bib $ id)) in
let doi2bib_t = Term.(ret (const doi2bib $ id $ file $ out)) in
let info =
let doc =
"A little CLI tool to get the bibtex entry for a given DOI, arXiv or \
Expand Down
2 changes: 1 addition & 1 deletion bin/dune
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
(executable
(name doi2bib)
(public_name doi2bib)
(libraries cmdliner doi2bib)
(libraries cmdliner doi2bib unix)
(preprocess future_syntax))

0 comments on commit ff93dfb

Please sign in to comment.