Merge branch 'trs/newline-handling'

nextstrain · Aug 1, 2024 · c28fab2 · c28fab2
2 parents e62252f + ab58544
commit c28fab2
Show file tree

Hide file tree

Showing 5 changed files with 8 additions and 7 deletions.
diff --git a/CHANGES.md b/CHANGES.md
@@ -8,10 +8,11 @@
 
 ### Bug Fixes
 
-* Embedded newlines in quoted field values of metadata files are now properly handled. [#1561][] (@tsibley)
+* Embedded newlines in quoted field values of metadata files read/written by many commands, annotation files read by `augur curate apply-record-annotations`, and index files written by `augur index` are now properly handled. [#1561][] [#1564][] (@tsibley)
 
 [#1561]: https://github.com/nextstrain/augur/pull/1561
 [#1562]: https://github.com/nextstrain/augur/pull/1562
+[#1564]: https://github.com/nextstrain/augur/pull/1564
 
 
 

diff --git a/augur/curate/apply_record_annotations.py b/augur/curate/apply_record_annotations.py
@@ -29,7 +29,7 @@ def register_parser(parent_subparsers):
 
 def run(args, records):
     annotations = defaultdict(dict)
-    with open(args.annotations, 'r') as annotations_fh:
+    with open(args.annotations, 'r', newline='') as annotations_fh:
         csv_reader = csv.reader(annotations_fh, delimiter='\t')
         for row in csv_reader:
             if not row or row[0].lstrip()[0] == '#':

diff --git a/augur/filter/io.py b/augur/filter/io.py
@@ -115,7 +115,7 @@ def write_metadata_based_outputs(input_metadata_path: str, delimiters: Sequence[
 
     # Set up output streams.
     if output_metadata_path:
-        output_metadata_handle = xopen(output_metadata_path, "w")
+        output_metadata_handle = xopen(output_metadata_path, "w", newline="")
         output_metadata = csv.DictWriter(output_metadata_handle, fieldnames=input_metadata.columns,
                                          delimiter="\t", lineterminator=os.linesep)
         output_metadata.writeheader()

diff --git a/augur/index.py b/augur/index.py
@@ -46,7 +46,7 @@ def index_vcf(vcf_path, index_path):
 
     num_of_seqs = 0
 
-    with open_file(index_path, 'wt') as out_file:
+    with open_file(index_path, 'wt', newline='') as out_file:
         tsv_writer = csv.writer(out_file, delimiter = DELIMITER)
 
         #write header i output file
@@ -185,7 +185,7 @@ def index_sequences(sequences_path, sequence_index_path):
     tot_length = 0
     num_of_seqs = 0
 
-    with open_file(sequence_index_path, 'wt') as out_file:
+    with open_file(sequence_index_path, 'wt', newline='') as out_file:
         tsv_writer = csv.writer(out_file, delimiter = '\t', lineterminator='\n')
 
         #write header i output file

diff --git a/scripts/tree_to_JSON.py b/scripts/tree_to_JSON.py
@@ -285,7 +285,7 @@ def removeBOM(arr):
 
     if args.strain_csv:
         meta = {}
-        with open(args.strain_csv, 'rb') as fh:
+        with open(args.strain_csv, 'r', newline='') as fh:
             csvdata = csv.reader(fh, delimiter=',', quotechar='"')
             header = removeBOM(csvdata.next())
             assert(header[0] == "strain")
@@ -306,7 +306,7 @@ def removeBOM(arr):
 
     if args.geo:
         geo = defaultdict(lambda: defaultdict(dict))
-        with open(args.geo, 'rb') as fh:
+        with open(args.geo, 'r', newline='') as fh:
             csvdata = csv.reader(fh, delimiter=',', quotechar='"')
             header = removeBOM(csvdata.next())
             assert(len(header) == 4)