From af05191a9da1ea0ee68dce15309c16aa7a12702d Mon Sep 17 00:00:00 2001
From: Marina D'Amato <marinadamato.md@gmail.com>
Date: Thu, 25 Jan 2024 14:58:27 +0100
Subject: [PATCH] update scripts

---
 scripts/automatic_update/get_biblatex.py   | 199 +++++++++++----------
 scripts/automatic_update/update_bibfile.py |   4 +-
 2 files changed, 102 insertions(+), 101 deletions(-)

diff --git a/scripts/automatic_update/get_biblatex.py b/scripts/automatic_update/get_biblatex.py
index e2637f0..778d1a8 100644
--- a/scripts/automatic_update/get_biblatex.py
+++ b/scripts/automatic_update/get_biblatex.py
@@ -10,9 +10,10 @@
 
 
 class GetBiblatex:
-    def __init__(self, doi, diag_bib):
+    def __init__(self, doi, ss_id, diag_bib):
         self.doi = doi
         self.diag_bib = diag_bib
+        self.ss_id = ss_id
         self.accent_mappings = accent_mappings
 
     def _get_doi_csl(self):
@@ -118,104 +119,106 @@ def _clean_author_abbreviation(auth_abr, year, bib_file):
         return auth_abr
 
     def get_bib_text(self):
-        try:
-            response_json = self._get_doi_csl()
-            abstract = self._get_doi_abstract()
-            abstract = self._convert_to_biblatex_format(author_name=abstract)
-
-            if 'proceedings-article' in response_json['type']:
-                kind = 'inproceedings'
-                journal = response_json['container-title']
-            elif 'journal-article' in response_json['type']:
-                kind = 'article'
-                journal = response_json['container-title']
-            elif 'article' in response_json['type']:
-                kind = 'article'
-
-                # Convert doi to arXiv journal format "arXiv:xxxx.xxxxx"
-                index = self.doi.find('arXiv')
-                arxiv_id = self.doi[index:]
-                journal = arxiv_id.replace('.', ':', 1)
-            elif 'book-chapter' in response_json['type']:
-                kind = 'book'
-                journal = response_json['container-title']
-
-            elif 'posted-content' in response_json['type']:
-                kind = 'article'
-                journal = 'Preprint'
-
-
-
-                
-
-            author_string = "{"
-            for index, author in enumerate(response_json["author"]):
-                if index == len(response_json["author"])-1:
-                    if 'name' in author:  # Add research groups. For example COPD Investigators
-                        author_string = author_string + f"{author['name']}" + "}"
-                        continue
-                    if 'given' in author:
-                        author_string = author_string + f"{author['family']}, {author['given']}" + "}"
-                    else:
-                        author_string = author_string + f"{author['family']}" + "}"
+
+        #try:
+        response_json = self._get_doi_csl()
+        abstract = self._get_doi_abstract()
+        abstract = self._convert_to_biblatex_format(author_name=abstract)
+
+        if 'proceedings-article' in response_json['type']:
+            kind = 'inproceedings'
+            journal = response_json['container-title']
+        elif 'journal-article' in response_json['type']:
+            kind = 'article'
+            journal = response_json['container-title']
+        elif 'article' in response_json['type']:
+            kind = 'article'
+
+            # Convert doi to arXiv journal format "arXiv:xxxx.xxxxx"
+            index = self.doi.find('arXiv')
+            arxiv_id = self.doi[index:]
+            journal = arxiv_id.replace('.', ':', 1)
+        elif 'book-chapter' in response_json['type']:
+            kind = 'book'
+            journal = response_json['container-title']
+
+        elif 'posted-content' in response_json['type']:
+            kind = 'article'
+            journal = 'Preprint'
+
+
+
+            
+
+        author_string = "{"
+        for index, author in enumerate(response_json["author"]):
+            if index == len(response_json["author"])-1:
+                if 'name' in author:  # Add research groups. For example COPD Investigators
+                    author_string = author_string + f"{author['name']}" + "}"
+                    continue
+                if 'given' in author:
+                    author_string = author_string + f"{author['family']}, {author['given']}" + "}"
                 else:
-                    if 'name' in author:
-                        author_string = author_string + f"{author['name']} and "
-                        continue
-                    if 'given' in author:
-                        author_string = author_string + f"{author['family']}, {author['given']} and "
-                    else:
-                        author_string = author_string + f"{author['family']} and "
-            author_string = self._convert_to_biblatex_format(author_name=author_string)
-            newline = '\n'
-            tab = '\t'
-            author_abbreviation = response_json['author'][0]['family'].rsplit(' ')[-1]
-            author_abbreviation = author_abbreviation.replace("'", "").lower().capitalize()[:4]
-
-            published = response_json.get("published")
-            if published is None:
-                published = response_json.get("issued")
-            year_short = str(published.get('date-parts')[0][0])[2:]
-            year = str(published.get('date-parts')[0][0])
-            # year = str(response_json["published"]["date-parts"][0][0])[2:]
-            author_abbreviation = self._clean_author_abbreviation(author_abbreviation, year_short, self.diag_bib)
-            title = response_json["title"]
-            title = self._convert_to_biblatex_format(author_name=title)
-            optnote = "DIAG, RADIOLOGY"
-
-            biblatex = f"@{kind}{{{author_abbreviation}, {newline}" \
-                       f"{tab}author = {author_string}, {newline} " \
-                       f"{tab}title = {{{title}}}, {newline}" \
-                       f"{tab}doi = {{{response_json['DOI']}}}, {newline}" \
-                       f"{tab}year = {{{year}}}, {newline}" \
-                       f"{tab}abstract = {{{abstract}}}, {newline}" \
-                       f"{tab}url = {{{response_json['URL']}}}, {newline}" \
-                       f"{tab}file = {{{author_abbreviation}.pdf:pdf\\\\{author_abbreviation}.pdf:PDF}}, {newline}" \
-                       f"{tab}optnote = {{{optnote}}}, {newline}" \
-                       f"{tab}journal = {{{journal}}}, {newline}" \
-                       f"{tab}automatic = {{yes}}, {newline}" \
-                       # f"{tab}citation-count = {{{response_json['is-referenced-by-count']}}}, {newline}" \
-                       # f"}}{newline}"
-
-            if 'is-referenced-by-count' in response_json.keys():
-                biblatex = biblatex + f"{tab}citation-count = {{{response_json['is-referenced-by-count']}}}, {newline}"
-
-            if 'page' in response_json.keys() and 'volume' in response_json.keys():
-                biblatex = biblatex + f"{tab}pages = {{{response_json['page']}}},{newline}" + f"{tab}volume = {{{response_json['volume']}}}, {newline}" + f"}}{newline}"
-            elif 'page' in response_json.keys():
-                biblatex = biblatex + f"{tab}pages = {{{response_json['page']}}},{newline}" + f"}}{newline}"
-            elif 'volume' in response_json.keys():
-                biblatex = biblatex + f"{tab}volume = {{{response_json['volume']}}}, {newline}" + f"}}{newline}"
+                    author_string = author_string + f"{author['family']}" + "}"
             else:
-                biblatex = biblatex + f"}}{newline}"
-
-            # replace any EN DASH with Hyphen
-            if "–" in biblatex:
-                biblatex = biblatex.replace("–", "-")
-
-        except Exception as e:
-            print(f'Unable to generate bibtext for {self.doi}')
-            print(e)
-            biblatex = 'empty'
+                if 'name' in author:
+                    author_string = author_string + f"{author['name']} and "
+                    continue
+                if 'given' in author:
+                    author_string = author_string + f"{author['family']}, {author['given']} and "
+                else:
+                    author_string = author_string + f"{author['family']} and "
+        author_string = self._convert_to_biblatex_format(author_name=author_string)
+        newline = '\n'
+        tab = '\t'
+        author_abbreviation = response_json['author'][0]['family'].rsplit(' ')[-1]
+        author_abbreviation = author_abbreviation.replace("'", "").lower().capitalize()[:4]
+
+        published = response_json.get("published")
+        if published is None:
+            published = response_json.get("issued")
+        year_short = str(published.get('date-parts')[0][0])[2:]
+        year = str(published.get('date-parts')[0][0])
+        # year = str(response_json["published"]["date-parts"][0][0])[2:]
+        author_abbreviation = self._clean_author_abbreviation(author_abbreviation, year_short, self.diag_bib)
+        title = response_json["title"]
+        title = self._convert_to_biblatex_format(author_name=title)
+        optnote = "DIAG, RADIOLOGY"
+        self.ss_id = "['"+self.ss_id+"']"
+        biblatex = f"@{kind}{{{author_abbreviation}, {newline}" \
+                    f"{tab}author = {author_string}, {newline} " \
+                    f"{tab}title = {{{title}}}, {newline}" \
+                    f"{tab}doi = {{{response_json['DOI']}}}, {newline}" \
+                    f"{tab}year = {{{year}}}, {newline}" \
+                    f"{tab}abstract = {{{abstract}}}, {newline}" \
+                    f"{tab}url = {{{response_json['URL']}}}, {newline}" \
+                    f"{tab}file = {{{author_abbreviation}.pdf:pdf\\\\{author_abbreviation}.pdf:PDF}}, {newline}" \
+                    f"{tab}optnote = {{{optnote}}}, {newline}" \
+                    f"{tab}journal = {{{journal}}}, {newline}" \
+                    f"{tab}automatic = {{yes}}, {newline}" \
+                    f"{tab}all_ss_ids = {{{self.ss_id}}}, {newline}"
+                    # f"{tab}citation-count = {{{response_json['is-referenced-by-count']}}}, {newline}" \
+                    # f"}}{newline}"
+        
+        if 'is-referenced-by-count' in response_json.keys():
+            biblatex = biblatex + f"{tab}citation-count = {{{response_json['is-referenced-by-count']}}}, {newline}"
+
+        if 'page' in response_json.keys() and 'volume' in response_json.keys():
+            biblatex = biblatex + f"{tab}pages = {{{response_json['page']}}},{newline}" + f"{tab}volume = {{{response_json['volume']}}}, {newline}" + f"}}{newline}"
+        elif 'page' in response_json.keys():
+            biblatex = biblatex + f"{tab}pages = {{{response_json['page']}}},{newline}" + f"}}{newline}"
+        elif 'volume' in response_json.keys():
+            biblatex = biblatex + f"{tab}volume = {{{response_json['volume']}}}, {newline}" + f"}}{newline}"
+        else:
+            biblatex = biblatex + f"}}{newline}"
+
+        # replace any EN DASH with Hyphen
+        if "–" in biblatex:
+            biblatex = biblatex.replace("–", "-")
+
+        #except Exception as e:
+        #    print(f'Unable to generate bibtext for {self.doi}')
+        #    print(e)
+        #    biblatex = 'empty'
 
         return biblatex
diff --git a/scripts/automatic_update/update_bibfile.py b/scripts/automatic_update/update_bibfile.py
index 592dbbf..f46b25b 100644
--- a/scripts/automatic_update/update_bibfile.py
+++ b/scripts/automatic_update/update_bibfile.py
@@ -90,9 +90,8 @@ def get_bib_info(diag_bib_file, item): #diag_bib_file is the file read in as a s
             print('similar doi already exists in bib file, but new item will be added for ', item['ss_doi'], item['ss_id'])
 
     # Get BibLatex information based on DOI if not in the file
-    reader = GetBiblatex(doi=item['ss_doi'], diag_bib=diag_bib_file)
+    reader = GetBiblatex(doi=item['ss_doi'], ss_id=item['ss_id'], diag_bib=diag_bib_file)
     bibtext = reader.get_bib_text()
-
     # Return the bibtext if it is not 'empty', otherwise return None
     return bibtext if bibtext != 'empty' else None
 
@@ -246,7 +245,6 @@ def loop_manual_check(manually_checked, diag_bib_orig):
         elif "[add new item]" == bib_item['action'].strip():
            
            bib_item_text = get_bib_info(diag_bib_orig, bib_item)
-    
            if bib_item_text is not None:
                items_to_add += bib_item_text
                # if there is a pmid note it to be added afterwards