Skip to content

Commit

Permalink
Merge branch 'devel'
Browse files Browse the repository at this point in the history
  • Loading branch information
rhandberg committed Jan 28, 2021
2 parents 8f7bd73 + c2aa111 commit 4d9ac0a
Show file tree
Hide file tree
Showing 7 changed files with 251 additions and 29 deletions.
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
master-v1.3.2
master-v1.4
150 changes: 138 additions & 12 deletions dataval/release.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,63 @@
.. codeauthor:: Rasmus Handberg <[email protected]>
"""

import numpy as np
import logging
import warnings
import re
import os
import shutil
import tempfile
import contextlib
from astropy.io import fits
from astropy.wcs import WCS, FITSFixedWarning
from .utilities import find_tpf_files, get_filehash

#--------------------------------------------------------------------------------------------------
@contextlib.contextmanager
def temporary_filename(**kwargs):
"""
Context that introduces a temporary file.
Creates a temporary file, yields its name, and upon context exit, deletes it.
(In contrast, tempfile.NamedTemporaryFile() provides a 'file' object and
deletes the file as soon as that file object is closed, so the temporary file
cannot be safely re-opened by another library or process.)
Yields:
The name of the temporary file.
"""
if 'delete' in kwargs:
raise ValueError("DELETE keyword can not be used")
try:
f = tempfile.NamedTemporaryFile(delete=False, **kwargs)
tmp_name = f.name
f.close()
yield tmp_name
finally:
if os.path.exists(tmp_name):
os.remove(tmp_name)

#--------------------------------------------------------------------------------------------------
def atomic_copy(src, dst):
"""
Copy file (using shutil.copy2), but with higher likelihood of being an atomic operation.
This is done by first copying to a temp file and then renaming this file to the final name.
This is only atmic on POSIX systems.
"""
if os.path.exists(dst):
raise FileExistsError(dst)

with temporary_filename(dir=os.path.dirname(dst), suffix='.tmp') as tmp:
try:
shutil.copy2(src, tmp)
os.rename(tmp, dst)
except: # noqa: E722
if os.path.exists(dst):
os.remove(dst)
raise

#--------------------------------------------------------------------------------------------------
def check_fits_changes(fname, fname_modified, allow_header_value_changes=None):
"""
Expand Down Expand Up @@ -170,46 +218,53 @@ def fix_file(row, input_folder=None, check_corrector=None, force_version=None, t
raise Exception("CORRECTOR")

# Do we really need to modify the FITS file?
modification_needed = True # FORCE modification check!
openfile_needed = True # FORCE modification check!
fix_wcs = False

if dataval > 0:
modification_needed = True
openfile_needed = True

if cadence == 120 and version <= 5:
modification_needed = True
openfile_needed = True
fix_wcs = True

# We need to open the ensemble files to find the lightcurve dependencies:
if corrector == 'ens':
openfile_needed = True

# Find the starid of the TPF which was used to create this lightcurve:
if row['datasource'] == 'tpf':
dependency = row['starid']
dependency_tpf = row['starid']
elif row['datasource'].startswith('tpf:'):
dependency = int(row['datasource'][4:])
dependency_tpf = int(row['datasource'][4:])
else:
dependency = None
dependency_tpf = None

# Placeholder for dependencies between lightcurves:
dependency_lc = None

# Damn, it looks like a modification is needed:
allow_change = []
if modification_needed:
if openfile_needed:
logger.debug("Opening FITS file: %s", fname)
modification_needed = False

if fix_wcs:
if tpf_rootdir is None:
raise Exception("You need to provide a TPF_ROOTDIR")
# Find out what the
if dependency is None:
if dependency_tpf is None:
raise Exception("We can't fix WCSs of FFI targets!")
# Find the original TPF file and extract the WCS from its headers:
tpf_file = find_tpf_files(tpf_rootdir, starid=dependency, sector=sector, camera=camera, ccd=ccd, cadence=cadence)
tpf_file = find_tpf_files(tpf_rootdir, starid=dependency_tpf, sector=sector, camera=camera, ccd=ccd, cadence=cadence)
if len(tpf_file) != 1:
raise Exception("Could not find TPF file: starid=%d, sector=%d" % (dependency, sector))
raise Exception("Could not find TPF file: starid=%d, sector=%d" % (dependency_tpf, sector))
# Extract the FITS header with the correct WCS:
with warnings.catch_warnings():
warnings.filterwarnings('ignore', category=FITSFixedWarning)
wcs_header = WCS(header=fits.getheader(tpf_file[0], extname='APERTURE'), relax=True).to_header(relax=True)

shutil.copy(fname, fname_original)
atomic_copy(fname, fname_original)
with fits.open(fname_original, mode='readonly', memmap=True) as hdu:
prihdr = hdu[0].header

Expand All @@ -231,6 +286,10 @@ def fix_file(row, input_folder=None, check_corrector=None, force_version=None, t
allow_change += ['TDISP2']
hdu['ENSEMBLE'].header['TDISP2'] = 'E26.17'

if corrector == 'ens':
# Pick out the list of TIC-IDs used to build ensemble:
dependency_lc = list(hdu['ENSEMBLE'].data['TIC'])

if fix_wcs:
logger.info("%s: Changing WCS", fname)
modification_needed = True
Expand Down Expand Up @@ -269,12 +328,79 @@ def fix_file(row, input_folder=None, check_corrector=None, force_version=None, t
'sector': row['sector'],
'camera': row['camera'],
'ccd': row['ccd'],
'cbv_area': row['cbv_area'],
'cadence': cadence,
'lightcurve': row['lightcurve'],
'dataval': dataval,
'datarel': datarel,
'version': version,
'filesize': filesize,
'filehash': filehash,
'dependency': dependency
'dependency_tpf': dependency_tpf,
'dependency_lc': dependency_lc
}

#--------------------------------------------------------------------------------------------------
def process_cbv(fname, input_folder, force_version=None):

m = re.match(r'^tess-s(\d{4})-c(\d{4})-a(\d{3})-v(\d+)-tasoc_cbv\.fits\.gz$', os.path.basename(fname))
if m is None:
raise Exception("CBV file does not have the correct file name format!")
fname_sector = int(m.group(1))
fname_cadence = int(m.group(2))
fname_cbvarea = int(m.group(3))
fname_camera = int(m.group(3)[0])
fname_ccd = int(m.group(3)[1])
fname_version = int(m.group(4))

# Open the FITS file and check the headers:
with fits.open(fname, mode='readonly', memmap=True) as hdu:
hdr = hdu[0].header
sector = hdr['SECTOR']
camera = hdr['CAMERA']
ccd = hdr['CCD']
data_rel = hdr['DATA_REL']
version = hdr['VERSION']
cbv_area = hdr['CBV_AREA']
cadence = hdr['CADENCE']

time = np.asarray(hdu[1].data['TIME'])
cadence_time = int(np.round(86400*np.median(np.diff(time))))

# Check that the filename and headers are consistent:
if sector != fname_sector:
raise Exception("SECTOR does not match filename.")
if camera != fname_camera:
raise Exception("CAMERA does not match filename.")
if ccd != fname_ccd:
raise Exception("CCD does not match filename.")
if cadence != fname_cadence:
raise Exception("CADENCE does not match filename.")
if cadence != cadence_time:
raise Exception("CADENCE does not match TIME.")
if cbv_area != fname_cbvarea:
raise Exception("CBV_AREA does not match filename.")
if version != fname_version:
raise Exception("VERSION does not match filename.")

if force_version is not None and version != force_version:
raise Exception("Version mismatch!")

path = os.path.relpath(fname, input_folder).replace('\\', '/')

# Extract information from final file:
filesize = os.path.getsize(fname)
filehash = get_filehash(fname)

return {
'path': path,
'sector': sector,
'camera': camera,
'ccd': ccd,
'cbv_area': cbv_area,
'cadence': cadence,
'datarel': data_rel,
'version': version,
'filesize': filesize,
'filehash': filehash
}
17 changes: 10 additions & 7 deletions dataval/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,21 +68,24 @@ def loadPickle(fname):
return pickle.load(fid)

#--------------------------------------------------------------------------------------------------
def find_tpf_files(rootdir, starid=None, sector=None, camera=None, ccd=None, cadence=None, findmax=None):
def find_tpf_files(rootdir, starid=None, sector=None, camera=None, ccd=None, cadence=None,
findmax=None):
"""
Search directory recursively for TESS Target Pixel Files.
Parameters:
rootdir (string): Directory to search recursively for TESS TPF files.
starid (integer or None, optional): Only return files from the given TIC number.
rootdir (str): Directory to search recursively for TESS TPF files.
starid (int, optional): Only return files from the given TIC number.
If ``None``, files from all TIC numbers are returned.
sector (integer or None, optional): Only return files from the given sector.
sector (int, optional): Only return files from the given sector.
If ``None``, files from all sectors are returned.
camera (integer or None, optional): Only return files from the given camera number (1-4).
camera (int or None, optional): Only return files from the given camera number (1-4).
If ``None``, files from all cameras are returned.
ccd (integer or None, optional): Only return files from the given CCD number (1-4).
ccd (int, optional): Only return files from the given CCD number (1-4).
If ``None``, files from all CCDs are returned.
findmax (integer or None, optional): Maximum number of files to return.
cadence (int, optional): Only return files from the given cadence (20 or 120).
If ``None``, files from all cadences are returned.
findmax (int, optional): Maximum number of files to return.
If ``None``, return all files.
Note:
Expand Down
65 changes: 61 additions & 4 deletions run_package_release.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,15 @@
import sys
import logging
import os
import glob
import sqlite3
from contextlib import closing
import functools
import multiprocessing
from tqdm import tqdm
from dataval import __version__
from dataval.utilities import TqdmLoggingHandler, CounterFilter
from dataval.release import fix_file, regex_fileend
from dataval.release import fix_file, regex_fileend, process_cbv

#--------------------------------------------------------------------------------------------------
def main():
Expand Down Expand Up @@ -126,6 +127,7 @@ def main():
todolist.ccd,
todolist.sector,
todolist.datasource,
todolist.cbv_area,
diagnostics_corr.lightcurve,
datavalidation_corr.dataval
FROM todolist
Expand Down Expand Up @@ -187,8 +189,11 @@ def main():
filehash TEXT NOT NULL,
datarel INTEGER NOT NULL,
dataval INTEGER NOT NULL,
dependency INTEGER
cbv_area INTEGER NOT NULL,
dependency_tpf INTEGER,
dependency_lc TEXT
);""")

# Create the settings table if it doesn't exist:
cursor.execute("SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='settings';")
if cursor.fetchone()[0] == 0:
Expand All @@ -202,6 +207,22 @@ def main():
corrector,
force_version
])

# For CBV corrected data, create a separate table for the CBV files:
if corrector == 'cbv':
cursor.execute("DROP TABLE IF EXISTS release_cbv;")
cursor.execute("""CREATE TABLE release_cbv (
path TEXT NOT NULL PRIMARY KEY,
sector INTEGER NOT NULL,
camera INTEGER NOT NULL,
ccd INTEGER NOT NULL,
cbv_area INTEGER NOT NULL,
cadence INTEGER NOT NULL,
datarel INTEGER NOT NULL,
filesize INTEGER NOT NULL,
filehash TEXT NOT NULL
);""")

conn.commit()

# Ensure that we are not running an existing file with new settings:
Expand All @@ -214,6 +235,31 @@ def main():
logger.error("Inconsistent VERSION provided")
return 2

# For CBV corrected data, first we find all the CBV files, add these to their own
# release table, and keep a record of which ones were found, so we can check below
# if all the lightcurves can be associated with a single CBV file:
cbvs = []
if corrector == 'cbv':
cbv_files = glob.glob(os.path.join(input_folder, 'cbv-prepare', '*-tasoc_cbv.fits.gz'))
for fname in cbv_files:
info = process_cbv(fname, input_folder, force_version=force_version)
logger.debug(info)
cursor.execute("INSERT INTO release_cbv (path, sector, camera, ccd, cbv_area, cadence, datarel, filesize, filehash) VALUES (?,?,?,?,?,?,?,?,?);", [
info['path'],
info['sector'],
info['camera'],
info['ccd'],
info['cbv_area'],
info['cadence'],
info['datarel'],
info['filesize'],
info['filehash']
])
cbvs.append((info['sector'], info['cadence'], info['cbv_area']))
cbvs = set(cbvs)
print(cbvs)

# Figure out which files needs to be processed:
cursor.execute("SELECT priority FROM release;")
already_processed = set([row[0] for row in cursor.fetchall()])
not_yet_processed = []
Expand All @@ -237,19 +283,30 @@ def main():
inserted = 0
for info in tqdm(m(fix_file_wrapper, not_yet_processed), total=numfiles, **tqdm_settings):
logger.debug(info)
cursor.execute("INSERT INTO release (priority, lightcurve, starid, sector, camera, ccd, cadence, filesize, filehash, datarel, dataval, dependency) VALUES (?,?,?,?,?,?,?,?,?,?,?,?);", [

# For CBV corrected data, check that the corresponding CBV was also found:
if corrector == 'cbv' and (info['sector'], info['cadence'], info['cbv_area']) not in cbvs:
raise Exception("CBV not found")

dependency_lc = info['dependency_lc']
if dependency_lc is not None:
dependency_lc = ','.join([str(t) for t in info['dependency_lc']])

cursor.execute("INSERT INTO release (priority, lightcurve, starid, sector, camera, ccd, cbv_area, cadence, filesize, filehash, datarel, dataval, dependency_tpf, dependency_lc) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?);", [
info['priority'],
info['lightcurve'],
info['starid'],
info['sector'],
info['camera'],
info['ccd'],
info['cbv_area'],
info['cadence'],
info['filesize'],
info['filehash'],
info['datarel'],
info['dataval'],
info['dependency']
info['dependency_tpf'],
dependency_lc
])

inserted += 1
Expand Down
Git LFS file not shown
Git LFS file not shown
Loading

0 comments on commit 4d9ac0a

Please sign in to comment.