Coverage for python / lsst / ap / association / association.py: 27%

82 statements  

« prev     ^ index     » next       coverage.py v7.14.0, created at 2026-05-14 08:07 +0000

1# This file is part of ap_association. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22"""A simple implementation of source association task for ap_verify. 

23""" 

24 

25__all__ = ["AssociationConfig", "AssociationTask"] 

26 

27import numpy as np 

28import pandas as pd 

29from scipy.spatial import cKDTree 

30 

31import lsst.geom as geom 

32import lsst.pex.config as pexConfig 

33import lsst.pipe.base as pipeBase 

34from lsst.utils.timer import timeMethod 

35 

36# Enforce an error for unsafe column/array value setting in pandas. 

37pd.options.mode.chained_assignment = 'raise' 

38 

39 

40class AssociationConfig(pexConfig.Config): 

41 """Config class for AssociationTask. 

42 """ 

43 

44 maxDistArcSeconds = pexConfig.Field( 

45 dtype=float, 

46 doc="Maximum distance in arcseconds to test for a DIASource to be a " 

47 "match to a DIAObject.", 

48 default=1.0, 

49 ) 

50 

51 

52class AssociationTask(pipeBase.Task): 

53 """Associate DIAOSources into existing DIAObjects. 

54 

55 This task performs the association of detected DIASources in a visit 

56 with the previous DIAObjects detected over time. It also creates new 

57 DIAObjects out of DIASources that cannot be associated with previously 

58 detected DIAObjects. 

59 """ 

60 

61 ConfigClass = AssociationConfig 

62 _DefaultName = "association" 

63 

64 @timeMethod 

65 def run(self, 

66 diaSources, 

67 diaObjects): 

68 """Associate the new DiaSources with existing DiaObjects. 

69 

70 Parameters 

71 ---------- 

72 diaSources : `pandas.DataFrame` 

73 New DIASources to be associated with existing DIAObjects. 

74 diaObjects : `pandas.DataFrame` 

75 Existing diaObjects from the Apdb. 

76 

77 Returns 

78 ------- 

79 result : `lsst.pipe.base.Struct` 

80 Results struct with components. 

81 

82 - ``matchedDiaSources`` : DiaSources that were matched. Matched 

83 Sources have their diaObjectId updated and set to the id of the 

84 diaObject they were matched to. (`pandas.DataFrame`) 

85 - ``unAssocDiaSources`` : DiaSources that were not matched. 

86 Unassociated sources have their diaObject set to 0 as they 

87 were not associated with any existing DiaObjects. 

88 (`pandas.DataFrame`) 

89 - ``nUpdatedDiaObjects`` : Number of DiaObjects that were 

90 matched to new DiaSources. (`int`) 

91 - ``nUnassociatedDiaObjects`` : Number of DiaObjects that were 

92 not matched a new DiaSource. (`int`) 

93 """ 

94 diaSources = self.check_dia_source_radec(diaSources) 

95 

96 if len(diaObjects) == 0: 

97 return pipeBase.Struct( 

98 matchedDiaSources=pd.DataFrame(columns=diaSources.columns), 

99 unAssocDiaSources=diaSources, 

100 nUpdatedDiaObjects=0, 

101 nUnassociatedDiaObjects=0) 

102 

103 matchResult = self.associate_sources(diaObjects, diaSources) 

104 

105 mask = matchResult.diaSources["diaObjectId"] != 0 

106 

107 return pipeBase.Struct( 

108 matchedDiaSources=matchResult.diaSources[mask].reset_index(drop=True), 

109 unAssocDiaSources=matchResult.diaSources[~mask].reset_index(drop=True), 

110 nUpdatedDiaObjects=matchResult.nUpdatedDiaObjects, 

111 nUnassociatedDiaObjects=matchResult.nUnassociatedDiaObjects) 

112 

113 def check_dia_source_radec(self, dia_sources): 

114 """Check that all DiaSources have non-NaN values for RA/DEC. 

115 

116 If one or more DiaSources are found to have NaN values, throw a 

117 warning to the log with the ids of the offending sources. Drop them 

118 from the table. 

119 

120 Parameters 

121 ---------- 

122 dia_sources : `pandas.DataFrame` 

123 Input DiaSources to check for NaN values. 

124 

125 Returns 

126 ------- 

127 trimmed_sources : `pandas.DataFrame` 

128 DataFrame of DiaSources trimmed of all entries with NaN values for 

129 RA/DEC. 

130 """ 

131 nan_mask = dia_sources["ra"].isnull() | dia_sources["dec"].isnull() 

132 if nan_mask.any(): 

133 nan_ids = dia_sources.loc[nan_mask, "diaSourceId"] 

134 for nan_id in nan_ids: 

135 self.log.warning( 

136 "DiaSource %i has NaN value for RA/DEC, " 

137 "dropping from association.", nan_id) 

138 dia_sources = dia_sources[~nan_mask] 

139 return dia_sources 

140 

141 @timeMethod 

142 def associate_sources(self, dia_objects, dia_sources): 

143 """Associate the input DIASources with the catalog of DIAObjects. 

144 

145 DiaObject DataFrame must be indexed on ``diaObjectId``. 

146 

147 Parameters 

148 ---------- 

149 dia_objects : `pandas.DataFrame` 

150 Catalog of DIAObjects to attempt to associate the input 

151 DIASources into. 

152 dia_sources : `pandas.DataFrame` 

153 DIASources to associate into the DIAObjectCollection. 

154 

155 Returns 

156 ------- 

157 result : `lsst.pipe.base.Struct` 

158 Results struct with components. 

159 

160 - ``diaSources`` : Full set of diaSources both matched and not. 

161 (`pandas.DataFrame`) 

162 - ``nUpdatedDiaObjects`` : Number of DiaObjects that were 

163 associated. (`int`) 

164 - ``nUnassociatedDiaObjects`` : Number of DiaObjects that were 

165 not matched a new DiaSource. (`int`) 

166 """ 

167 scores = self.score( 

168 dia_objects, dia_sources, 

169 self.config.maxDistArcSeconds * geom.arcseconds) 

170 match_result = self.match(dia_objects, dia_sources, scores) 

171 

172 return match_result 

173 

174 @timeMethod 

175 def score(self, dia_objects, dia_sources, max_dist): 

176 """Compute a quality score for each dia_source/dia_object pair 

177 between this catalog of DIAObjects and the input DIASource catalog. 

178 

179 ``max_dist`` sets maximum separation in arcseconds to consider a 

180 dia_source a possible match to a dia_object. If the pair is 

181 beyond this distance no score is computed. 

182 

183 Parameters 

184 ---------- 

185 dia_objects : `pandas.DataFrame` 

186 A contiguous catalog of DIAObjects to score against dia_sources. 

187 dia_sources : `pandas.DataFrame` 

188 A contiguous catalog of dia_sources to "score" based on distance 

189 and (in the future) other metrics. 

190 max_dist : `lsst.geom.Angle` 

191 Maximum allowed distance to compute a score for a given DIAObject 

192 DIASource pair. 

193 

194 Returns 

195 ------- 

196 result : `lsst.pipe.base.Struct` 

197 Results struct with components: 

198 

199 - ``scores``: array of floats of match quality updated DIAObjects 

200 (array-like of `float`). 

201 - ``obj_idxs``: indexes of the matched DIAObjects in the catalog. 

202 (array-like of `int`) 

203 - ``obj_ids``: array of floats of match quality updated DIAObjects 

204 (array-like of `int`). 

205 

206 Default values for these arrays are 

207 INF, -1, and -1 respectively for unassociated sources. 

208 """ 

209 scores = np.full(len(dia_sources), np.inf, dtype=np.float64) 

210 obj_idxs = np.full(len(dia_sources), -1, dtype=np.int64) 

211 obj_ids = np.full(len(dia_sources), 0, dtype=np.int64) 

212 

213 if len(dia_objects) == 0: 

214 return pipeBase.Struct( 

215 scores=scores, 

216 obj_idxs=obj_idxs, 

217 obj_ids=obj_ids) 

218 

219 spatial_tree = self._make_spatial_tree(dia_objects) 

220 

221 max_dist_rad = max_dist.asRadians() 

222 

223 vectors = self._radec_to_xyz(dia_sources) 

224 

225 scores, obj_idxs = spatial_tree.query( 

226 vectors, 

227 distance_upper_bound=max_dist_rad) 

228 matched_src_idxs = np.argwhere(np.isfinite(scores)) 

229 obj_ids[matched_src_idxs] = dia_objects.index.to_numpy()[ 

230 obj_idxs[matched_src_idxs]] 

231 

232 return pipeBase.Struct( 

233 scores=scores, 

234 obj_idxs=obj_idxs, 

235 obj_ids=obj_ids) 

236 

237 def _make_spatial_tree(self, dia_objects): 

238 """Create a searchable kd-tree the input dia_object positions. 

239 

240 Parameters 

241 ---------- 

242 dia_objects : `pandas.DataFrame` 

243 A catalog of DIAObjects to create the tree from. 

244 

245 Returns 

246 ------- 

247 kd_tree : `scipy.spatical.cKDTree` 

248 Searchable kd-tree created from the positions of the DIAObjects. 

249 """ 

250 vectors = self._radec_to_xyz(dia_objects) 

251 return cKDTree(vectors) 

252 

253 def _radec_to_xyz(self, catalog): 

254 """Convert input ra/dec coordinates to spherical unit-vectors. 

255 

256 Parameters 

257 ---------- 

258 catalog : `pandas.DataFrame` 

259 Catalog to produce spherical unit-vector from. 

260 

261 Returns 

262 ------- 

263 vectors : `numpy.ndarray`, (N, 3) 

264 Output unit-vectors 

265 """ 

266 ras = np.radians(catalog["ra"]) 

267 decs = np.radians(catalog["dec"]) 

268 vectors = np.empty((len(ras), 3)) 

269 

270 sin_dec = np.sin(np.pi / 2 - decs) 

271 vectors[:, 0] = sin_dec * np.cos(ras) 

272 vectors[:, 1] = sin_dec * np.sin(ras) 

273 vectors[:, 2] = np.cos(np.pi / 2 - decs) 

274 

275 return vectors 

276 

277 @timeMethod 

278 def match(self, dia_objects, dia_sources, score_struct): 

279 """Match DIAsources to DiaObjects given a score. 

280 

281 Parameters 

282 ---------- 

283 dia_objects : `pandas.DataFrame` 

284 A SourceCatalog of DIAObjects to associate to DIASources. 

285 dia_sources : `pandas.DataFrame` 

286 A contiguous catalog of dia_sources for which the set of scores 

287 has been computed on with DIAObjectCollection.score. 

288 score_struct : `lsst.pipe.base.Struct` 

289 Results struct with components: 

290 

291 - ``"scores"``: array of floats of match quality 

292 updated DIAObjects (array-like of `float`). 

293 - ``"obj_ids"``: array of floats of match quality 

294 updated DIAObjects (array-like of `int`). 

295 - ``"obj_idxs"``: indexes of the matched DIAObjects in the catalog. 

296 (array-like of `int`) 

297 

298 Default values for these arrays are 

299 INF, -1 and -1 respectively for unassociated sources. 

300 

301 Returns 

302 ------- 

303 result : `lsst.pipe.base.Struct` 

304 Results struct with components. 

305 

306 - ``"diaSources"`` : Full set of diaSources both matched and not. 

307 (`pandas.DataFrame`) 

308 - ``"nUpdatedDiaObjects"`` : Number of DiaObjects that were 

309 associated. (`int`) 

310 - ``"nUnassociatedDiaObjects"`` : Number of DiaObjects that were 

311 not matched a new DiaSource. (`int`) 

312 """ 

313 n_previous_dia_objects = len(dia_objects) 

314 used_dia_object = np.zeros(n_previous_dia_objects, dtype=bool) 

315 used_dia_source = np.zeros(len(dia_sources), dtype=bool) 

316 associated_dia_object_ids = np.zeros(len(dia_sources), 

317 dtype=np.uint64) 

318 n_updated_dia_objects = 0 

319 

320 # We sort from best match to worst to effectively perform a 

321 # "handshake" match where both the DIASources and DIAObjects agree 

322 # their the best match. Sources with non-finite scores have no 

323 # match and are skipped — they are left for the new-DIAObject loop. 

324 finite_idx = np.flatnonzero(np.isfinite(score_struct.scores)) 

325 score_args = finite_idx[np.argsort(score_struct.scores[finite_idx])] 

326 for score_idx in score_args: 

327 dia_obj_idx = score_struct.obj_idxs[score_idx] 

328 if used_dia_object[dia_obj_idx]: 

329 continue 

330 used_dia_object[dia_obj_idx] = True 

331 used_dia_source[score_idx] = True 

332 obj_id = score_struct.obj_ids[score_idx] 

333 associated_dia_object_ids[score_idx] = obj_id 

334 n_updated_dia_objects += 1 

335 

336 # Assign positionally rather than by label — score_idx values from 

337 # argsort are 0..N-1, but dia_sources may have a non-contiguous 

338 # label index after upstream NaN filtering. 

339 dia_sources = dia_sources.copy() 

340 dia_sources["diaObjectId"] = associated_dia_object_ids 

341 

342 return pipeBase.Struct( 

343 diaSources=dia_sources, 

344 nUpdatedDiaObjects=n_updated_dia_objects, 

345 nUnassociatedDiaObjects=(n_previous_dia_objects 

346 - n_updated_dia_objects))