-
Notifications
You must be signed in to change notification settings - Fork 13
/
policies_intake.py
251 lines (214 loc) · 9.82 KB
/
policies_intake.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
"""Policies for intake."""
__copyright__ = 'Copyright (c) 2021-2024, Utrecht University'
__license__ = 'GPLv3, see LICENSE'
from typing import Dict
import genquery
from util import *
def object_is_locked(ctx: rule.Context, path: str, is_collection: bool) -> Dict:
"""Returns whether given object in path (collection or dataobject) is locked or frozen
:param ctx: Combined type of a callback and rei struct
:param path: Path to object or collection
:param is_collection: Whether path contains a collection or data object
:returns: Returns locked state
"""
locked_state = {"locked": False,
"frozen": False}
if is_collection:
iter = genquery.row_iterator(
"META_COLL_ATTR_NAME",
"COLL_NAME = '" + path + "'",
genquery.AS_LIST, ctx
)
for row in iter:
if row[0] in ['to_vault_lock', 'to_vault_freeze']:
locked_state['locked'] = True
if row[0] == 'to_vault_freeze':
locked_state['frozen'] = True
else:
parent_coll = pathutil.dirname(path)
iter = genquery.row_iterator(
"META_DATA_ATTR_NAME",
"COLL_NAME = '" + parent_coll + "' AND DATA_NAME = '" + pathutil.basename(path) + "'",
genquery.AS_LIST, ctx
)
# return locked_state
for row in iter:
if row[0] in ['to_vault_lock', 'to_vault_freeze']:
locked_state['locked'] = True
if row[0] == 'to_vault_freeze':
locked_state['frozen'] = True
return locked_state
def is_data_in_locked_dataset(ctx: rule.Context, actor: str, path: str) -> bool:
""" Check whether given data object is within a locked dataset """
dataset_id = ''
coll = pathutil.chop(path)[0]
data_name = pathutil.chop(path)[1]
intake_group_prefix = _get_intake_group_prefix(coll)
# look for DATA based info first.
iter = genquery.row_iterator(
"META_DATA_ATTR_VALUE",
"DATA_NAME = '" + data_name + "' AND META_DATA_ATTR_NAME = 'dataset_id' AND COLL_NAME = '" + coll + "' ",
genquery.AS_LIST, ctx
)
for row in iter:
dataset_id = row[0]
log.debug(ctx, 'DATA - dataset found: ' + dataset_id)
if not dataset_id:
# look for COLL based info
iter = genquery.row_iterator(
"META_COLL_ATTR_VALUE",
"META_COLL_ATTR_NAME = 'dataset_id' AND COLL_NAME = '" + coll + "' ",
genquery.AS_LIST, ctx
)
for row in iter:
dataset_id = row[0]
log.debug(ctx, 'COLL - dataset found: ' + dataset_id)
if dataset_id:
# now check whether a lock exists
# Find the toplevel and get the collection check whether is locked
iter = genquery.row_iterator(
"COLL_NAME",
"META_COLL_ATTR_VALUE = '{}' AND META_COLL_ATTR_NAME = 'dataset_toplevel' AND COLL_NAME like '/{}/home/{}-%'".format(dataset_id, user.zone(ctx), intake_group_prefix),
genquery.AS_LIST, ctx
)
toplevel_collection = ''
toplevel_is_collection = False
for row in iter:
toplevel_collection = row[0]
toplevel_is_collection = True
if not toplevel_collection:
# dataset is based on a data object
iter = genquery.row_iterator(
"COLL_NAME, DATA_NAME",
"META_DATA_ATTR_VALUE = '{}' AND META_DATA_ATTR_NAME = 'dataset_toplevel' AND COLL_NAME like '/{}/home/{}-%'".format(dataset_id, user.zone(ctx), intake_group_prefix),
genquery.AS_LIST, ctx
)
for row in iter:
toplevel_collection = row[0] + '/' + row[1]
toplevel_is_collection = False
if toplevel_collection:
locked_state = object_is_locked(ctx, toplevel_collection, toplevel_is_collection)
log.debug(ctx, locked_state)
return (locked_state['locked'] or locked_state['frozen']) and not user.is_admin(ctx, actor)
else:
# Lock status could not be determined. Assume data object is not locked.
log.debug(ctx, "Could not determine lock state of data object " + path)
return False
log.debug(ctx, 'After check for datasetid - no dataset found')
return False
def is_coll_in_locked_dataset(ctx: rule.Context, actor: str, coll: str) -> bool:
""" Check whether given collection is within a locked dataset """
dataset_id = ''
intake_group_prefix = _get_intake_group_prefix(coll)
iter = genquery.row_iterator(
"META_COLL_ATTR_VALUE",
"COLL_NAME = '" + coll + "' AND META_COLL_ATTR_NAME = 'dataset_id' ",
genquery.AS_LIST, ctx
)
for row in iter:
dataset_id = row[0]
log.debug(ctx, 'dataset found: ' + dataset_id)
# now check whether a lock exists
# return True
# Find the toplevel and get the collection check whether is locked
iter = genquery.row_iterator(
"COLL_NAME",
"META_COLL_ATTR_VALUE = '{}' AND META_COLL_ATTR_NAME = 'dataset_toplevel' AND COLL_NAME like '/{}/home/{}-%'".format(dataset_id, user.zone(ctx), intake_group_prefix),
genquery.AS_LIST, ctx
)
toplevel_collection = ''
toplevel_is_collection = False
for row in iter:
toplevel_collection = row[0]
toplevel_is_collection = True
if not toplevel_collection:
# dataset is based on a data object
iter = genquery.row_iterator(
"COLL_NAME",
"META_DATA_ATTR_VALUE = '{}' AND META_DATA_ATTR_NAME = 'dataset_toplevel' AND COLL_NAME like '/{}/home/{}-%'".format(dataset_id, user.zone(ctx), intake_group_prefix),
genquery.AS_LIST, ctx
)
for row in iter:
toplevel_collection = row[0]
toplevel_is_collection = False
if toplevel_collection:
locked_state = object_is_locked(ctx, toplevel_collection, toplevel_is_collection)
log.debug(ctx, locked_state)
return (locked_state['locked'] or locked_state['frozen']) and not user.is_admin(ctx, actor)
else:
# Lock status could not be determined. Assume collection is not locked.
log.debug(ctx, "Could not determine lock state of data object " + coll)
return False
log.debug(ctx, 'After check for datasetid - no dataset found')
return False
def coll_in_path_of_locked_dataset(ctx: rule.Context, actor: str, coll: str) -> bool:
""" If collection is part of a locked dataset, or holds one on a deeper level, then deletion is not allowed """
dataset_id = ''
intake_group_prefix = _get_intake_group_prefix(coll)
iter = genquery.row_iterator(
"META_COLL_ATTR_VALUE",
"COLL_NAME = '" + coll + "' AND META_COLL_ATTR_NAME = 'dataset_id' ",
genquery.AS_LIST, ctx
)
for row in iter:
dataset_id = row[0]
log.debug(ctx, 'dataset found: ' + dataset_id)
if dataset_id:
# Now find the toplevel and get the collection check whether is locked
iter = genquery.row_iterator(
"COLL_NAME",
"META_COLL_ATTR_VALUE = '{}' AND META_COLL_ATTR_NAME = 'dataset_toplevel' AND COLL_NAME like '/{}/home/{}-%'".format(dataset_id, user.zone(ctx), intake_group_prefix),
genquery.AS_LIST, ctx
)
toplevel_collection = ''
toplevel_is_collection = False
for row in iter:
toplevel_collection = row[0]
toplevel_is_collection = True
if not toplevel_collection:
# dataset is based on a data object
iter = genquery.row_iterator(
"COLL_NAME",
"META_DATA_ATTR_VALUE = '{}' AND META_DATA_ATTR_NAME = 'dataset_toplevel' AND COLL_NAME like '/{}/home/{}-%'".format(dataset_id, user.zone(ctx), intake_group_prefix),
genquery.AS_LIST, ctx
)
for row in iter:
toplevel_collection = row[0]
toplevel_is_collection = False
if toplevel_collection:
locked_state = object_is_locked(ctx, toplevel_collection, toplevel_is_collection)
log.debug(ctx, locked_state)
return (locked_state['locked'] or locked_state['frozen']) and not user.is_admin(ctx, actor)
else:
log.debug(ctx, "Could not determine lock state of data object " + coll)
# Pretend presence of a lock so no unwanted data gets deleted
return True
else:
# No dataset found on indicated collection. Possibly in deeper collections.
# Can be dataset based upon collection or data object
iter = genquery.row_iterator(
"META_COLL_ATTR_VALUE",
"COLL_NAME like '" + coll + "%' AND META_COLL_ATTR_NAME in ('to_vault_lock','to_vault_freeze') ",
genquery.AS_LIST, ctx
)
for _row in iter:
log.debug(ctx, 'Found deeper LOCK')
# If present there is a lock. No need to further inquire
return not user.is_admin(ctx, actor)
# Could be a dataset based on a data object
iter = genquery.row_iterator(
"META_DATA_ATTR_VALUE",
"COLL_NAME like '" + coll + "%' AND META_DATA_ATTR_NAME in ('to_vault_lock','to_vault_freeze') ",
genquery.AS_LIST, ctx
)
for _row in iter:
log.debug(ctx, 'Found deeper LOCK')
# If present there is a lock. No need to further inquire
return not user.is_admin(ctx, actor)
# There is no lock present
return False
def _get_intake_group_prefix(coll: str) -> str:
""" Get the group prefix of a intake collection name: 'grp-intake' or 'intake' """
parts = coll.split('/')[3].split('-')
del parts[-1]
return '-'.join(parts)