Coverage for python / lsst / afw / table / _base.py: 11%

194 statements  

« prev     ^ index     » next       coverage.py v7.14.0, created at 2026-05-21 01:29 -0700

1# This file is part of afw. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21import numpy as np 

22 

23from lsst.utils import continueClass, TemplateMeta 

24from ._table import BaseRecord, BaseCatalog 

25from ._schema import Key 

26 

27 

28__all__ = ["Catalog"] 

29 

30 

31@continueClass 

32class BaseRecord: # noqa: F811 

33 

34 def extract(self, *patterns, **kwargs): 

35 """Extract a dictionary of {<name>: <field-value>} in which the field 

36 names match the given shell-style glob pattern(s). 

37 

38 Any number of glob patterns may be passed; the result will be the union 

39 of all the result of each glob considered separately. 

40 

41 Parameters 

42 ---------- 

43 items : `dict` 

44 The result of a call to self.schema.extract(); this will be used 

45 instead of doing any new matching, and allows the pattern matching 

46 to be reused to extract values from multiple records. This 

47 keyword is incompatible with any position arguments and the regex, 

48 sub, and ordered keyword arguments. 

49 regex : `str` or `re` pattern object 

50 A regular expression to be used in addition to any glob patterns 

51 passed as positional arguments. Note that this will be compared 

52 with re.match, not re.search. 

53 sub : `str` 

54 A replacement string (see `re.MatchObject.expand`) used to set the 

55 dictionary keys of any fields matched by regex. 

56 ordered : `bool` 

57 If `True`, a `collections.OrderedDict` will be returned instead of 

58 a standard dict, with the order corresponding to the definition 

59 order of the `Schema`. Default is `False`. 

60 """ 

61 d = kwargs.pop("items", None) 

62 if d is None: 

63 d = self.schema.extract(*patterns, **kwargs).copy() 

64 elif kwargs: 

65 kwargsStr = ", ".join(kwargs.keys()) 

66 raise ValueError(f"Unrecognized keyword arguments for extract: {kwargsStr}") 

67 return {name: self.get(schemaItem.key) for name, schemaItem in d.items()} 

68 

69 def __repr__(self): 

70 return f"{type(self)}\n{self}" 

71 

72 

73class Catalog(metaclass=TemplateMeta): 

74 

75 def getColumnView(self): 

76 self._columns = self._getColumnView() 

77 return self._columns 

78 

79 def __getColumns(self): 

80 if not hasattr(self, "_columns") or self._columns is None: 

81 self._columns = self._getColumnView() 

82 return self._columns 

83 columns = property(__getColumns, doc="a column view of the catalog") 

84 

85 def __getitem__(self, key): 

86 """Return the record at index key if key is an integer, 

87 return a column if `key` is a string field name or Key, 

88 or return a subset of the catalog if key is a slice 

89 or boolean NumPy array. 

90 """ 

91 if type(key) is slice: 

92 (start, stop, step) = (key.start, key.stop, key.step) 

93 if step is None: 

94 step = 1 

95 if start is None: 

96 start = 0 

97 if stop is None: 

98 stop = len(self) 

99 return self.subset(start, stop, step) 

100 elif isinstance(key, np.ndarray): 

101 if key.dtype == bool: 

102 return self.subset(key) 

103 raise RuntimeError("Unsupported array type for indexing a Catalog, " 

104 f"only boolean arrays are supported: {key.dtype}") 

105 elif isinstance(key, str): 

106 key = self.schema.find(key).key 

107 result, self._columns = self._get_column_from_key(key, self._columns) 

108 return result 

109 elif isinstance(key, Key): 

110 result, self._columns = self._get_column_from_key(key, self._columns) 

111 return result 

112 else: 

113 return self._getitem_(key) 

114 

115 def __setitem__(self, key, value): 

116 """If ``key`` is an integer, set ``catalog[key]`` to 

117 ``value``. Otherwise select column ``key`` and set it to 

118 ``value``. 

119 """ 

120 self._columns = None 

121 if isinstance(key, str): 

122 key = self.schema[key].asKey() 

123 if isinstance(key, Key): 

124 if isinstance(key, Key["Flag"]): 

125 self._set_flag(key, value) 

126 else: 

127 self.columns[key] = value 

128 else: 

129 return self.set(key, value) 

130 

131 def __delitem__(self, key): 

132 self._columns = None 

133 if isinstance(key, slice): 

134 self._delslice_(key) 

135 else: 

136 self._delitem_(key) 

137 

138 def append(self, record): 

139 self._columns = None 

140 self._append(record) 

141 

142 def insert(self, key, value): 

143 self._columns = None 

144 self._insert(key, value) 

145 

146 def clear(self): 

147 self._columns = None 

148 self._clear() 

149 

150 def addNew(self): 

151 self._columns = None 

152 return self._addNew() 

153 

154 def cast(self, type_, deep=False): 

155 """Return a copy of the catalog with the given type. 

156 

157 Parameters 

158 ---------- 

159 type_ : 

160 Type of catalog to return. 

161 deep : `bool`, optional 

162 If `True`, clone the table and deep copy all records. 

163 

164 Returns 

165 ------- 

166 copy : 

167 Copy of catalog with the requested type. 

168 """ 

169 if deep: 

170 table = self.table.clone() 

171 table.preallocate(len(self)) 

172 else: 

173 table = self.table 

174 copy = type_(table) 

175 copy.extend(self, deep=deep) 

176 return copy 

177 

178 def copy(self, deep=False): 

179 """ 

180 Copy a catalog (default is not a deep copy). 

181 """ 

182 return self.cast(type(self), deep) 

183 

184 def extend(self, iterable, deep=False, mapper=None): 

185 """Append all records in the given iterable to the catalog. 

186 

187 Parameters 

188 ---------- 

189 iterable : 

190 Any Python iterable containing records. 

191 deep : `bool`, optional 

192 If `True`, the records will be deep-copied; ignored if 

193 mapper is not `None` (that always implies `True`). 

194 mapper : `lsst.afw.table.schemaMapper.SchemaMapper`, optional 

195 Used to translate records. 

196 """ 

197 self._columns = None 

198 # We can't use isinstance here, because the SchemaMapper symbol isn't available 

199 # when this code is part of a subclass of Catalog in another package. 

200 if type(deep).__name__ == "SchemaMapper": 

201 mapper = deep 

202 deep = None 

203 if isinstance(iterable, type(self)): 

204 if mapper is not None: 

205 self._extend(iterable, mapper) 

206 else: 

207 self._extend(iterable, deep) 

208 else: 

209 for record in iterable: 

210 if mapper is not None: 

211 self._append(self.table.copyRecord(record, mapper)) 

212 elif deep: 

213 self._append(self.table.copyRecord(record)) 

214 else: 

215 self._append(record) 

216 

217 def __reduce__(self): 

218 import lsst.afw.fits 

219 return lsst.afw.fits.reduceToFits(self) 

220 

221 def asAstropy(self, cls=None, copy=False, unviewable="copy"): 

222 """Return an astropy.table.Table (or subclass thereof) view into this catalog. 

223 

224 Parameters 

225 ---------- 

226 cls : 

227 Table subclass to use; `None` implies `astropy.table.Table` 

228 itself. Use `astropy.table.QTable` to get Quantity columns. 

229 copy : bool, optional 

230 If `True`, copy data from the LSST catalog to the astropy 

231 table. Not copying is usually faster, but can keep memory 

232 from being freed if columns are later removed from the 

233 Astropy view. 

234 unviewable : `str`, optional 

235 One of the following options (which is ignored if 

236 copy=`True` ), indicating how to handle field types (`str` 

237 and `Flag`) for which views cannot be constructed: 

238 

239 - 'copy' (default): copy only the unviewable fields. 

240 - 'raise': raise ValueError if unviewable fields are present. 

241 - 'skip': do not include unviewable fields in the Astropy Table. 

242 

243 Returns 

244 ------- 

245 cls : `astropy.table.Table` 

246 Astropy view into the catalog. 

247 

248 Raises 

249 ------ 

250 ValueError 

251 Raised if the `unviewable` option is not a known value, or 

252 if the option is 'raise' and an uncopyable field is found. 

253 

254 """ 

255 import astropy.table 

256 if cls is None: 

257 cls = astropy.table.Table 

258 if unviewable not in ("copy", "raise", "skip"): 

259 raise ValueError( 

260 f"'unviewable'={unviewable!r} must be one of 'copy', 'raise', or 'skip'") 

261 ps = self.getMetadata() 

262 meta = ps.toOrderedDict() if ps is not None else None 

263 columns = [] 

264 items = self.schema.extract("*", ordered=True) 

265 for name, item in items.items(): 

266 key = item.key 

267 unit = item.field.getUnits() or None # use None instead of "" when empty 

268 if key.getTypeString() == "String": 

269 if not copy: 

270 if unviewable == "raise": 

271 raise ValueError("Cannot extract string " 

272 "unless copy=True or unviewable='copy' or 'skip'.") 

273 elif unviewable == "skip": 

274 continue 

275 data = np.zeros( 

276 len(self), dtype=np.dtype((str, key.getSize()))) 

277 for i, record in enumerate(self): 

278 data[i] = record.get(key) 

279 elif key.getTypeString() == "Flag": 

280 if not copy: 

281 if unviewable == "raise": 

282 raise ValueError("Cannot extract packed bit columns " 

283 "unless copy=True or unviewable='copy' or 'skip'.") 

284 elif unviewable == "skip": 

285 continue 

286 data = self[key] 

287 elif key.getTypeString() == "Angle": 

288 data = self.columns.get(key) 

289 unit = "radian" 

290 elif "Array" in key.getTypeString() and key.isVariableLength(): 

291 # Can't get columns for variable-length array fields. 

292 if unviewable == "raise": 

293 raise ValueError("Cannot extract variable-length array fields unless unviewable='skip'.") 

294 elif unviewable == "skip" or unviewable == "copy": 

295 continue 

296 else: 

297 data = self.columns.get(key) 

298 columns.append( 

299 astropy.table.Column( 

300 data, 

301 name=name, 

302 unit=unit, 

303 description=item.field.getDoc() 

304 ) 

305 ) 

306 return cls(columns, meta=meta, copy=copy) 

307 

308 def __dir__(self): 

309 """ 

310 This custom dir is necessary due to the custom getattr below. 

311 Without it, not all of the methods available are returned with dir. 

312 See DM-7199. 

313 """ 

314 def recursive_get_class_dir(cls): 

315 """ 

316 Return a set containing the names of all methods 

317 for a given class *and* all of its subclasses. 

318 """ 

319 result = set() 

320 if cls.__bases__: 

321 for subcls in cls.__bases__: 

322 result |= recursive_get_class_dir(subcls) 

323 result |= set(cls.__dict__.keys()) 

324 return result 

325 return sorted(set(dir(self.columns)) | set(dir(self.table)) 

326 | recursive_get_class_dir(type(self)) | set(self.__dict__.keys())) 

327 

328 def __getattr__(self, name): 

329 # Catalog forwards unknown method calls to its table and column view 

330 # for convenience. (Feature requested by RHL; complaints about magic 

331 # should be directed to him.) 

332 if name == "_columns": 

333 self._columns = None 

334 return None 

335 try: 

336 return getattr(self.table, name) 

337 except AttributeError: 

338 # Special case __ properties as they are never going to be column 

339 # names. 

340 if name.startswith("__"): 

341 raise 

342 # This can fail if the table is non-contiguous 

343 try: 

344 attr = getattr(self.columns, name) 

345 except Exception as e: 

346 e.add_note(f"Error retrieving column attribute '{name}' from {type(self)}") 

347 raise 

348 return attr 

349 

350 def __str__(self): 

351 if self.isContiguous(): 

352 return str(self.asAstropy()) 

353 else: 

354 fields = ' '.join(x.field.getName() for x in self.schema) 

355 return f"Non-contiguous afw.Catalog of {len(self)} rows.\ncolumns: {fields}" 

356 

357 def __repr__(self): 

358 return "%s\n%s" % (type(self), self) 

359 

360 def extract(self, *patterns, **kwds): 

361 """Extract a dictionary of {<name>: <column-array>} in which the field 

362 names match the given shell-style glob pattern(s). 

363 

364 Any number of glob patterns may be passed (including none); the result 

365 will be the union of all the result of each glob considered separately. 

366 

367 Note that extract("*", copy=True) provides an easy way to transform a 

368 catalog into a set of writeable contiguous NumPy arrays. 

369 

370 This routines unpacks `Flag` columns into full boolean arrays. String 

371 fields are silently ignored. 

372 

373 Parameters 

374 ---------- 

375 patterns : Array of `str` 

376 List of glob patterns to use to select field names. 

377 kwds : `dict` 

378 Dictionary of additional keyword arguments. May contain: 

379 

380 ``items`` : `list` 

381 The result of a call to self.schema.extract(); this will be 

382 used instead of doing any new matching, and allows the pattern 

383 matching to be reused to extract values from multiple records. 

384 This keyword is incompatible with any position arguments and 

385 the regex, sub, and ordered keyword arguments. 

386 ``where`` : array index expression 

387 Any expression that can be passed as indices to a NumPy array, 

388 including slices, boolean arrays, and index arrays, that will 

389 be used to index each column array. This is applied before 

390 arrays are copied when copy is True, so if the indexing results 

391 in an implicit copy no unnecessary second copy is performed. 

392 ``copy`` : `bool` 

393 If True, the returned arrays will be contiguous copies rather 

394 than strided views into the catalog. This ensures that the 

395 lifetime of the catalog is not tied to the lifetime of a 

396 particular catalog, and it also may improve the performance if 

397 the array is used repeatedly. Default is False. Copies are 

398 always made if the catalog is noncontiguous, but if 

399 ``copy=False`` these set as read-only to ensure code does not 

400 assume they are views that could modify the original catalog. 

401 ``regex`` : `str` or `re` pattern 

402 A regular expression to be used in addition to any glob 

403 patterns passed as positional arguments. Note that this will 

404 be compared with re.match, not re.search. 

405 ``sub`` : `str` 

406 A replacement string (see re.MatchObject.expand) used to set 

407 the dictionary keys of any fields matched by regex. 

408 ``ordered`` : `bool` 

409 If True, a collections.OrderedDict will be returned instead of 

410 a standard dict, with the order corresponding to the definition 

411 order of the Schema. Default is False. 

412 

413 Returns 

414 ------- 

415 d : `dict` 

416 Dictionary of extracted name-column array sets. 

417 

418 Raises 

419 ------ 

420 ValueError 

421 Raised if a list of ``items`` is supplied with additional keywords. 

422 """ 

423 copy = kwds.pop("copy", False) 

424 where = kwds.pop("where", None) 

425 d = kwds.pop("items", None) 

426 # If ``items`` is given as a kwd, an extraction has already been 

427 # performed and there shouldn't be any additional keywords. Otherwise 

428 # call schema.extract to load the dictionary. 

429 if d is None: 

430 d = self.schema.extract(*patterns, **kwds).copy() 

431 elif kwds: 

432 raise ValueError( 

433 "kwd 'items' was specified, which is not compatible with additional keywords") 

434 

435 def processArray(a): 

436 if where is not None: 

437 a = a[where] 

438 if copy: 

439 a = a.copy() 

440 return a 

441 

442 # must use list because we might be adding/deleting elements 

443 for name, schemaItem in list(d.items()): 

444 key = schemaItem.key 

445 if key.getTypeString() == "String": 

446 del d[name] 

447 else: 

448 d[name] = processArray(self[schemaItem.key]) 

449 return d 

450 

451 

452Catalog.register("Base", BaseCatalog)