Skip to content

Commit

Permalink
Warn about (and fix) invalid DOIs
Browse files Browse the repository at this point in the history
  • Loading branch information
goerz committed Nov 5, 2023
1 parent 1bba46e commit 46c5175
Show file tree
Hide file tree
Showing 5 changed files with 123 additions and 14 deletions.
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Fixed

* Warn about markdown link syntax in `.bib` files [[#60][]]
* Warn about invalid DOIs in `.bib` files. The DOI field should never contain a URL (`https://doi.org/...`). This is detected as a special case, and the DOI is extracted from the URL.


## [Version 1.3.1][1.3.1] - 2023-11-02
Expand Down
2 changes: 1 addition & 1 deletion docs/src/refs.bib
Original file line number Diff line number Diff line change
Expand Up @@ -427,7 +427,7 @@ @incollection{TannorBookChapter1991
pages = {333--345},
publisher = {Springer},
title = {Design of Femtosecond Pulse Sequences to Control Photochemical Products},
doi = {0.1007/978-94-011-2642-7_23},
doi = {10.1007/978-94-011-2642-7_23},
year = {1991},
}

Expand Down
46 changes: 33 additions & 13 deletions src/formatting.jl
Original file line number Diff line number Diff line change
Expand Up @@ -203,11 +203,12 @@ function format_published_in(
push!(urls, doi_url(entry))
end
else
append!(urls, get_urls(entry))
if !isempty(get_title(entry))
if isempty(get_title(entry))
append!(urls, get_urls(entry; skip=0))
else
# The title already linked to the URL (or DOI, if no
# URL available)
pop_url!(urls)
# URL available), hence we skip the first link here.
append!(urls, get_urls(entry; skip=1))
end
end
segments = [String[], String[], String[]]
Expand Down Expand Up @@ -347,15 +348,22 @@ function get_booktitle(entry)
end


function get_urls(entry)
function get_urls(entry; skip=0)
# URL is first priority, DOI second (cf. `pop_url!`)
# Passing `skip = 1` skips the first available link
urls = String[]
if !isempty(entry.access.doi)
url = doi_url(entry)
push!(urls, url)
end
if !isempty(entry.access.url)
push!(urls, entry.access.url)
if skip <= 0
push!(urls, entry.access.url)
end
skip = skip - 1
end
if !isempty(entry.access.doi)
if skip <= 0
url = doi_url(entry)
push!(urls, url)
end
skip = skip - 1
end
return urls
end
Expand All @@ -366,16 +374,28 @@ function doi_url(entry)
if isempty(doi)
return ""
else
if !startswith(doi, "10.")
doi_match = match(r"\b10.\d{4,9}/.*\b", doi)
if isnothing(doi_match)
@warn "Invalid DOI $(repr(doi)) in bibtex entry $(repr(entry.id)). Ignoring DOI."
return ""
else
if startswith(doi, "http")
@warn "The DOI field in bibtex entry $(repr(entry.id)) should not be a URL. Extracting $(repr(doi)) -> $(repr(doi_match.match))."
else
@warn "Invalid DOI $(repr(doi)) in bibtex entry $(repr(entry.id)). Extracting $(repr(doi_match.match))."
end
doi = doi_match.match
end
end
return "https://doi.org/$doi"
end
end


function pop_url!(urls)
# pop from the back so later entries have higher priority (i.e., URL has
# highest priority if `urls=get_urls(entry)`.)
try
return pop!(urls)
return popfirst!(urls)
catch
return ""
end
Expand Down
50 changes: 50 additions & 0 deletions test/test_formatting.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ using DocumenterCitations
import DocumenterCitations:
two_digit_year,
alpha_label,
get_urls,
doi_url,
format_names,
format_citation,
format_bibliography_reference,
Expand Down Expand Up @@ -155,6 +157,20 @@ end
end


@testset "get_urls" begin
bib = CitationBibliography(DocumenterCitations.example_bibfile)
@test get_urls(bib.entries["GoerzSPIEO2021"]) == [
"https://michaelgoerz.net/research/GoerzSPIEO2021.pdf",
"https://doi.org/10.1117/12.2587002"
]
@test get_urls(bib.entries["GoerzSPIEO2021"]; skip=1) ==
["https://doi.org/10.1117/12.2587002"]
@test length(get_urls(bib.entries["GoerzSPIEO2021"]; skip=2)) == 0
@test get_urls(bib.entries["Nolting1997Coulomb"]) ==
["https://doi.org/10.1007/978-3-663-14691-9"]
end


@testset "format_bibliography_reference(:numeric)" begin
bib = CitationBibliography(DocumenterCitations.example_bibfile)
md(key) = format_bibliography_reference(Val(:numeric), bib.entries[key])
Expand Down Expand Up @@ -367,3 +383,37 @@ end


end


@testset "invalid DOI" begin

bib = CitationBibliography(joinpath(splitext(@__FILE__)[1], "invalid_doi.bib"))

c = IOCapture.capture() do
doi_url(bib.entries["Brif"])
end
@test contains(
c.output,
"Warning: The DOI field in bibtex entry \"Brif\" should not be a URL."
)
@test c.value == "https://doi.org/10.1088/1367-2630/12/7/075008"

c = IOCapture.capture() do
doi_url(bib.entries["Shapiro"])
end
@test contains(
c.output,
"Warning: Invalid DOI \"doi:10.1002/9783527639700\" in bibtex entry \"Shapiro\"."
)
@test c.value == "https://doi.org/10.1002/9783527639700"

c = IOCapture.capture() do
doi_url(bib.entries["Tannor"])
end
@test contains(
c.output,
"Warning: Invalid DOI \"0.1007/978-94-011-2642-7_23\" in bibtex entry \"Tannor\"."
)
@test c.value == ""

end
38 changes: 38 additions & 0 deletions test/test_formatting/invalid_doi.bib
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
@string{njp = "New J. Phys."}
@string{XXXclearparser = ""}


% DOI shouldn't be a URL
@article{Brif,
Author = {Brif, Constantin and Chakrabarti, Raj and Rabitz, Herschel},
Title = {Control of quantum phenomena: past, present and future},
Journal = njp,
Year = {2010},
Doi = {https://doi.org/10.1088/1367-2630/12/7/075008},
Pages = {075008},
Volume = {12},
}


% DOI has extra test
@book{Shapiro,
author = {Shapiro, Moshe and Brumer, Paul},
edition = {Second},
publisher = {Wiley and Sons},
title = {Quantum Control of Molecular Processes},
year = {2012},
Doi = {doi:10.1002/9783527639700},
}


% DOI is missing first letter
@incollection{Tannor,
author = {Tannor, David J. and Jin, Yijian},
booktitle = {Mode Selective Chemistry},
editor = {Jortner, J. and Levine, R. D. and Pullman, B.},
pages = {333--345},
publisher = {Springer},
title = {Design of Femtosecond Pulse Sequences to Control Photochemical Products},
doi = {0.1007/978-94-011-2642-7_23},
year = {1991},
}

0 comments on commit 46c5175

Please sign in to comment.