Skip to content

Commit

Permalink
Merge pull request #64 from JuliaDocs/mg/warn-invalid-bibtex
Browse files Browse the repository at this point in the history
Warn about invalid bibtex
  • Loading branch information
goerz authored Nov 5, 2023
2 parents b8064cc + 46c5175 commit fc68ab7
Show file tree
Hide file tree
Showing 8 changed files with 146 additions and 15 deletions.
9 changes: 9 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,14 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).


## [Unreleased][]

### Fixed

* Warn about markdown link syntax in `.bib` files [[#60][]]
* Warn about invalid DOIs in `.bib` files. The DOI field should never contain a URL (`https://doi.org/...`). This is detected as a special case, and the DOI is extracted from the URL.


## [Version 1.3.1][1.3.1] - 2023-11-02

### Fixed
Expand Down Expand Up @@ -142,6 +150,7 @@ There were several bugs and limitations in version `1.2.x` for which some existi
[1.1.0]: https://github.com/JuliaDocs/DocumenterCitations.jl/compare/v1.0.0...v1.1.0
[1.0.0]: https://github.com/JuliaDocs/DocumenterCitations.jl/compare/v0.2.12...v1.0.0
[#61]: https://github.com/JuliaDocs/DocumenterCitations.jl/pull/61
[#60]: https://github.com/JuliaDocs/DocumenterCitations.jl/issues/60
[#59]: https://github.com/JuliaDocs/DocumenterCitations.jl/issues/59
[#56]: https://github.com/JuliaDocs/DocumenterCitations.jl/pull/56
[#53]: https://github.com/JuliaDocs/DocumenterCitations.jl/issues/53
Expand Down
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "DocumenterCitations"
uuid = "daee34ce-89f3-4625-b898-19384cb65244"
authors = ["Michael Goerz <[email protected]>"]
version = "1.3.1+dev"
version = "1.3.2-dev"

[deps]
AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"
Expand Down
2 changes: 1 addition & 1 deletion docs/src/refs.bib
Original file line number Diff line number Diff line change
Expand Up @@ -427,7 +427,7 @@ @incollection{TannorBookChapter1991
pages = {333--345},
publisher = {Springer},
title = {Design of Femtosecond Pulse Sequences to Control Photochemical Products},
doi = {0.1007/978-94-011-2642-7_23},
doi = {10.1007/978-94-011-2642-7_23},
year = {1991},
}

Expand Down
46 changes: 33 additions & 13 deletions src/formatting.jl
Original file line number Diff line number Diff line change
Expand Up @@ -203,11 +203,12 @@ function format_published_in(
push!(urls, doi_url(entry))
end
else
append!(urls, get_urls(entry))
if !isempty(get_title(entry))
if isempty(get_title(entry))
append!(urls, get_urls(entry; skip=0))
else
# The title already linked to the URL (or DOI, if no
# URL available)
pop_url!(urls)
# URL available), hence we skip the first link here.
append!(urls, get_urls(entry; skip=1))
end
end
segments = [String[], String[], String[]]
Expand Down Expand Up @@ -347,15 +348,22 @@ function get_booktitle(entry)
end


function get_urls(entry)
function get_urls(entry; skip=0)
# URL is first priority, DOI second (cf. `pop_url!`)
# Passing `skip = 1` skips the first available link
urls = String[]
if !isempty(entry.access.doi)
url = doi_url(entry)
push!(urls, url)
end
if !isempty(entry.access.url)
push!(urls, entry.access.url)
if skip <= 0
push!(urls, entry.access.url)
end
skip = skip - 1
end
if !isempty(entry.access.doi)
if skip <= 0
url = doi_url(entry)
push!(urls, url)
end
skip = skip - 1
end
return urls
end
Expand All @@ -366,16 +374,28 @@ function doi_url(entry)
if isempty(doi)
return ""
else
if !startswith(doi, "10.")
doi_match = match(r"\b10.\d{4,9}/.*\b", doi)
if isnothing(doi_match)
@warn "Invalid DOI $(repr(doi)) in bibtex entry $(repr(entry.id)). Ignoring DOI."
return ""
else
if startswith(doi, "http")
@warn "The DOI field in bibtex entry $(repr(entry.id)) should not be a URL. Extracting $(repr(doi)) -> $(repr(doi_match.match))."
else
@warn "Invalid DOI $(repr(doi)) in bibtex entry $(repr(entry.id)). Extracting $(repr(doi_match.match))."
end
doi = doi_match.match
end
end
return "https://doi.org/$doi"
end
end


function pop_url!(urls)
# pop from the back so later entries have higher priority (i.e., URL has
# highest priority if `urls=get_urls(entry)`.)
try
return pop!(urls)
return popfirst!(urls)
catch
return ""
end
Expand Down
4 changes: 4 additions & 0 deletions src/tex_to_markdown.jl
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,10 @@ end


function tex_to_markdown(tex_str; transform_case=s -> s, debug=_DEBUG)
if contains(tex_str, "](http")
# https://github.com/JuliaDocs/DocumenterCitations.jl/issues/60
@warn "The tex string $(repr(tex_str)) appears to contain a link in markdown syntax. Links in a `.bib` entry should use the `\\href` tex command."
end
try
md_str = _process_tex(tex_str; transform_case=transform_case, debug=debug)
return Unicode.normalize(md_str)
Expand Down
50 changes: 50 additions & 0 deletions test/test_formatting.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ using DocumenterCitations
import DocumenterCitations:
two_digit_year,
alpha_label,
get_urls,
doi_url,
format_names,
format_citation,
format_bibliography_reference,
Expand Down Expand Up @@ -155,6 +157,20 @@ end
end


@testset "get_urls" begin
bib = CitationBibliography(DocumenterCitations.example_bibfile)
@test get_urls(bib.entries["GoerzSPIEO2021"]) == [
"https://michaelgoerz.net/research/GoerzSPIEO2021.pdf",
"https://doi.org/10.1117/12.2587002"
]
@test get_urls(bib.entries["GoerzSPIEO2021"]; skip=1) ==
["https://doi.org/10.1117/12.2587002"]
@test length(get_urls(bib.entries["GoerzSPIEO2021"]; skip=2)) == 0
@test get_urls(bib.entries["Nolting1997Coulomb"]) ==
["https://doi.org/10.1007/978-3-663-14691-9"]
end


@testset "format_bibliography_reference(:numeric)" begin
bib = CitationBibliography(DocumenterCitations.example_bibfile)
md(key) = format_bibliography_reference(Val(:numeric), bib.entries[key])
Expand Down Expand Up @@ -367,3 +383,37 @@ end


end


@testset "invalid DOI" begin

bib = CitationBibliography(joinpath(splitext(@__FILE__)[1], "invalid_doi.bib"))

c = IOCapture.capture() do
doi_url(bib.entries["Brif"])
end
@test contains(
c.output,
"Warning: The DOI field in bibtex entry \"Brif\" should not be a URL."
)
@test c.value == "https://doi.org/10.1088/1367-2630/12/7/075008"

c = IOCapture.capture() do
doi_url(bib.entries["Shapiro"])
end
@test contains(
c.output,
"Warning: Invalid DOI \"doi:10.1002/9783527639700\" in bibtex entry \"Shapiro\"."
)
@test c.value == "https://doi.org/10.1002/9783527639700"

c = IOCapture.capture() do
doi_url(bib.entries["Tannor"])
end
@test contains(
c.output,
"Warning: Invalid DOI \"0.1007/978-94-011-2642-7_23\" in bibtex entry \"Tannor\"."
)
@test c.value == ""

end
38 changes: 38 additions & 0 deletions test/test_formatting/invalid_doi.bib
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
@string{njp = "New J. Phys."}
@string{XXXclearparser = ""}


% DOI shouldn't be a URL
@article{Brif,
Author = {Brif, Constantin and Chakrabarti, Raj and Rabitz, Herschel},
Title = {Control of quantum phenomena: past, present and future},
Journal = njp,
Year = {2010},
Doi = {https://doi.org/10.1088/1367-2630/12/7/075008},
Pages = {075008},
Volume = {12},
}


% DOI has extra test
@book{Shapiro,
author = {Shapiro, Moshe and Brumer, Paul},
edition = {Second},
publisher = {Wiley and Sons},
title = {Quantum Control of Molecular Processes},
year = {2012},
Doi = {doi:10.1002/9783527639700},
}


% DOI is missing first letter
@incollection{Tannor,
author = {Tannor, David J. and Jin, Yijian},
booktitle = {Mode Selective Chemistry},
editor = {Jortner, J. and Levine, R. D. and Pullman, B.},
pages = {333--345},
publisher = {Springer},
title = {Design of Femtosecond Pulse Sequences to Control Photochemical Products},
doi = {0.1007/978-94-011-2642-7_23},
year = {1991},
}
10 changes: 10 additions & 0 deletions test/test_tex_to_markdown.jl
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,16 @@ end
@test c.value.msg ==
"Cannot evaluate \\href: ArgumentError(\"Unsupported command: \\\\error. Please report a bug.\")"

s = "The krotov Pyhon package is available on [Github](https://github.com/qucontrol/krotov)"
c = IOCapture.capture(rethrow=Union{}) do
tex_to_markdown(s)
end
@test c.value == s
@test contains(
c.output,
"Warning: The tex string \"The krotov Pyhon package is available on [Github](https://github.com/qucontrol/krotov)\" appears to contain a link in markdown syntax"
)

end


Expand Down

0 comments on commit fc68ab7

Please sign in to comment.