diff --git a/R/search_catalog.R b/R/search_catalog.R index 5b5037d..246d9fa 100644 --- a/R/search_catalog.R +++ b/R/search_catalog.R @@ -87,58 +87,63 @@ get_cas <- function(query,type){ if (httr::status_code(res) == 200){ result = res %>% xml2::read_html() %>% - xml2::xml_find_all("//p[@class='result']") %>% - xml2::xml_text() %>% - tibble::enframe() %>% - dplyr::mutate(value = stringr::str_squish(value)) %>% - dplyr::filter(value != "") %>% - dplyr::mutate( - query = stringr::str_extract(value, "[a-z]+, [A-Za-z]+"), - content = stringr::str_extract(value, "(?<=Current status: ).*"), - species = stringr::str_split(content,"\\. ",simplify = T)[,1], - family = stringr::str_split(content,"\\. ",simplify = T)[,2], - species_author = stringr::str_trim(ifelse(grepl("Valid as",species),gsub("Valid as ","",species), - ifelse(grepl("Synonym of",species),gsub("Synonym of ","",species), - ifelse(grepl("Uncertain as",species),gsub("Uncertain as ","",species),species)))), - status = ifelse(grepl("Valid as",species),"Validation", - ifelse(grepl("Synonym of",species),"Synonym", - ifelse(grepl("Uncertain as",species),"Uncertainty",NA))) - ) %>% - na.omit() %>% - dplyr::select(query,species_author,family,status) + xml2::xml_find_all("//p[@class='result']") + if (length(result) == 0) { + message(paste("No match found for", query)) + } else { + result = result %>% + xml2::xml_text() %>% + tibble::enframe() %>% + dplyr::mutate(value = stringr::str_squish(value)) %>% + dplyr::filter(value != "") %>% + dplyr::mutate( + query = stringr::str_extract(value, "[a-z]+, [A-Za-z]+"), + content = stringr::str_extract(value, "(?<=Current status: ).*"), + species = stringr::str_split(content,"\\. ",simplify = T)[,1], + family = stringr::str_split(content,"\\. ",simplify = T)[,2], + species_author = stringr::str_trim(ifelse(grepl("Valid as",species),gsub("Valid as ","",species), + ifelse(grepl("Synonym of",species),gsub("Synonym of ","",species), + ifelse(grepl("Uncertain as",species),gsub("Uncertain as ","",species),species)))), + status = ifelse(grepl("Valid as",species),"Validation", + ifelse(grepl("Synonym of",species),"Synonym", + ifelse(grepl("Uncertain as",species),"Uncertainty",NA))) + ) %>% + na.omit() %>% + dplyr::select(query,species_author,family,status) - dd = stringr::str_locate_all(result$species_author, " ") - end = c() - switch(type, `genus_family` = { - for (i in 1:length(dd)) { - end[i] = dd[[i]][1] + dd = stringr::str_locate_all(result$species_author, " ") + end = c() + switch(type, `genus_family` = { + for (i in 1:length(dd)) { + end[i] = dd[[i]][1] + } + }, `species_family` = { + for (i in 1:length(dd)) { + end[i] = dd[[i]][2] + } + }, `species_genus` = { + for (i in 1:length(dd)) { + end[i] = dd[[i]][2] + } + },`species` = { + for (i in 1:length(dd)) { + end[i] = dd[[i]][2] + } } - }, `species_family` = { - for (i in 1:length(dd)) { - end[i] = dd[[i]][2] - } - }, `species_genus` = { - for (i in 1:length(dd)) { - end[i] = dd[[i]][2] - } - },`species` = { - for (i in 1:length(dd)) { - end[i] = dd[[i]][2] - } - } - ) - result$species = stringr::str_sub(result$species_author, 1, end - 1) - result$author = stringr::str_sub(result$species_author, end + 1) - #result$family = gsub(":.*","",result$family) - result$family = gsub(": ","_",result$family) - if(type == "genus_family"){ - names(result)[2] = "genus_author" - names(result)[5] = "genus" - result$family = gsub("\\.","",result$family) + ) + result$species = stringr::str_sub(result$species_author, 1, end - 1) + result$author = stringr::str_sub(result$species_author, end + 1) #result$family = gsub(":.*","",result$family) result$family = gsub(": ","_",result$family) + if(type == "genus_family"){ + names(result)[2] = "genus_author" + names(result)[5] = "genus" + result$family = gsub("\\.","",result$family) + #result$family = gsub(":.*","",result$family) + result$family = gsub(": ","_",result$family) + } + return(tibble::as_tibble(result[,c(1,2,5,6,3,4)])) } - return(tibble::as_tibble(result[,c(1,2,5,6,3,4)])) }else{ cat("Error request - the parameter query is not valid") browseURL(baseurl)