-
Notifications
You must be signed in to change notification settings - Fork 0
/
rdahmm_model_single.py
executable file
·139 lines (127 loc) · 5.53 KB
/
rdahmm_model_single.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
#!/usr/local/bin/python
#==========================================================================
# Given an ingested scripps dataset, generate necessary input files and
# execute RDAHMM modeling command for each station.
# To be invoked by the overall rdahmm_model.py using subprocess,
# ingested data and destination model directories are defined in properties.
#
# JIRA QUAKESIM-219:
# a) All data should be trained for at least 3 years.
# b) Data with < 3 years should also be processed,
# but user is warned that these are trained on insufficient data.
# c) Data > 3 years should trained up to Dec 31, 2011.
# d) Data with < 3 years should be trained on all the available data.
# e) We will need to retrain data < 3 years old every week.
# f) We will need to let the user know the training period for all data
# and when the data training period is changed, and archive previous
# results.
#
# input: scripps dataset name, e.g WNAM_Clean_DetrendNeuTimeSeries_comb
# output: RDAHMM input files and modeling results for each station
#
# usage:
# rdahmm_model_single.py scripps_dataset_name
#
# output:
# /path/to/rdahmm/model/daily_project_stationID/*
# Besides rdahmm model result files:
# - stationID.sqlite is copied from scripps ingested data directory,
# which was used to generate daily_project_stationID.input
# - daily_project_stationID.input.[start|end]time specifying model
# training range
# - daily_project_stationID.input.ref specifying station reference
# location
#===========================================================================
import os, sys, string, glob
import sqlite3 as db
import datetime, csv
from properties import properties
import subprocess
numargv = len(sys.argv)
if numargv == 1:
sys.exit("usage: rdahmm_model_single.py scripps_dataset_name")
elif numargv == 2:
dataset = sys.argv[1]
else:
sys.exit("Invalid number of parameters!")
data_path = properties('data_path') + "/" + dataset + "/"
model_path = properties('model_path') + "/" + dataset + "/"
train_epoch = properties('train_epoch')
rdahmm_bin = properties('rdahmm_bin')
datasetdb = glob.glob(data_path+dataset+"*.sqlite")[0]
datasetconn = db.connect(datasetdb)
datasetcur = datasetconn.cursor()
if not os.path.exists(model_path):
cmd = "mkdir -p " + model_path
os.system(cmd)
for dbfile in glob.glob(data_path+"/????.sqlite"):
stationID = string.split(dbfile, "/")[-1][:-7]
#print stationID
stationDir = model_path + "daily_project_" + stationID + "/"
if not os.path.exists(stationDir):
cmd = "mkdir -p " + stationDir
os.system(cmd)
# use station model directory as current working directory
os.chdir(stationDir)
#print os.getcwd()
# copy station dbfile to the model directory for archiving purpose
cmd = "cp -p " + dbfile + " ."
os.system(cmd)
stationdb = stationDir + stationID + ".sqlite"
#print stationdb
# connect to the station database to generate training data file
conn = db.connect(stationdb)
cur = conn.cursor()
# check if there's more than 3 years of data by end of train_epoch
# if yes, model ends on defined train_epoch, otherwise use all available
# data for model run
sql = "SELECT MIN(Timestamp) FROM StationGPSTimeSeries"
start_epoch = cur.execute(sql).fetchone()[0]
end_epoch = train_epoch
start = datetime.datetime(*map(int, string.split(start_epoch, "-")))
end = datetime.datetime(*map(int, string.split(end_epoch, "-")))
if (end - start) < datetime.timedelta(days=(3*365)):
sql = "SELECT MAX(Timestamp) FROM StationGPSTimeSeries"
end_epoch = cur.execute(sql).fetchone()[0]
#print start_epoch, end_epoch
# generate model input file using all data up to end_epoch
sql = "SELECT North, East, Up FROM StationGPSTimeSeries WHERE Timestamp <= '" + end_epoch + "' ORDER BY Timestamp ASC"
rows = cur.execute(sql).fetchall()
dataCount = str(len(rows))
modelfile = stationDir + "daily_project_" + stationID + ".input"
csvWriter = csv.writer(open(modelfile, 'w'), delimiter = ' ')
csvWriter.writerows(rows)
cur.close()
conn.close()
del csvWriter
# record start and end epoch for training period in files
startfile = modelfile + ".starttime"
endfile = modelfile + ".endtime"
with (open(startfile, 'w')) as f:
f.write(start_epoch)
f.close
with (open(endfile, 'w')) as f:
f.write(end_epoch)
f.close
# generate a .ref file for station lat, lon, height reference value.
sql = "SELECT Latitude, Longitude, Height FROM ReferencePositions WHERE StationID == '" + stationID + "'"
ref = datasetcur.execute(sql).fetchone()
reffile = modelfile + ".ref"
csvWriter = csv.writer(open(reffile, 'w'), delimiter = ' ')
csvWriter.writerow(ref)
del csvWriter
# execute RDAHMM model command with properly replaced parameters
dimensionCount = "3"
rdahmm_model_parm = properties('rdahmm_model_parm')
rdahmm_model_parm = string.replace(rdahmm_model_parm, "<inputFile>", modelfile)
rdahmm_model_parm = string.replace(rdahmm_model_parm, "<dataCount>", dataCount)
rdahmm_model_parm = string.replace(rdahmm_model_parm, "<dimensionCount>", dimensionCount)
rdahmm_model_cmd = rdahmm_bin + " " + rdahmm_model_parm
#print rdahmm_model_cmd
# os.system can be replaced with other non-blocking invocation method.
os.system(rdahmm_model_cmd)
#subprocess.check_call(string.split(rdahmm_model_cmd))
#print "==============="
#sys.exit(0)
datasetcur.close()
datasetconn.close()