Coverage for peeemtee/tools.py: 98%

Hot-keys on this page

r m x p toggle line displays

j k next/prev highlighted chunk

0 (zero) top of page

1 (one) first highlighted chunk

1#!/usr/bin/env python

3import os

4import numpy as np

5from scipy.optimize import curve_fit

6from numba import njit

7import h5py

8import codecs

9import datetime

10import pytz.reference

11from sklearn.neighbors import KernelDensity

13TIMEZONE = pytz.reference.LocalTimezone()

16def gaussian(x, mean, sigma, A):

17 return (

18 A

19 / np.sqrt(2 * np.pi)

20 / sigma

21 * np.exp(-0.5 * (x - mean) ** 2 / sigma ** 2)

22 )

25def gaussian_with_offset(x, mean, sigma, A, offset):

26 return (

27 A

28 / np.sqrt(2 * np.pi)

29 / sigma

30 * np.exp(-0.5 * (x - mean) ** 2 / sigma ** 2)

31 + offset

32 )

35def calculate_charges(

36 waveforms, ped_min, ped_max, sig_min, sig_max, method="sum"

37):

38 """

39 Calculates the charges of an array of waveforms

41 Parameters

42 ----------

43 waveforms: np.array

44 2D numpy array with one waveform in each row

45 [[waveform1],

46 [waveform2],

47 ...]

48 ped_min: int

49 minimum of window for pedestal integration

50 ped_max: int

51 maximum of window for pedestal integration

52 sig_min: int

53 minimum of window for signal integration

54 sig_max: int

55 maximum of window for signal integration

56 method: string

57 method used for "integration"

58 "sum" -> np.sum

59 "trapz" -> np.trapz

61 Returns

62 -------

63 charges: np.array

64 1D array with charges matching axis 0 of the waveforms array

66 """

67 sig_ped_ratio = (sig_max - sig_min) / (ped_max - ped_min)

68 if method == "sum":

69 func = np.sum

70 elif method == "trapz":

71 func = np.trapz

72 else:

73 print("unknown method. try sum or trapz!")

74 return None

75 pedestals = func(waveforms[:, ped_min:ped_max], axis=1)

76 signals = func(waveforms[:, sig_min:sig_max], axis=1)

77 charges = -(signals - pedestals * sig_ped_ratio)

79 return charges

82def calculate_transit_times(

83 signals, baseline_min, baseline_max, threshold, polarity="negative"

84):

85 """

86 Calculates transit times of signals

88 Parameters

89 ----------

90 signals: np.array

91 2D numpy array with one signal waveform in each row

92 [[signal1],

93 [signal2],

94 ...]

95 baseline_min: int

96 minimum of baseline calculation window

97 baseline_max: int

98 maximum of baseline calculation window

99 threshold: float

100 transit time is calculated when signal crosses threshold

101 polarity: str

102 'positive' if PMT signals have positive polarity,

103 'negative' if PMT signals have negative polarity

104

105 Returns

106 -------

107 charges: np.array

108 1D array with transit times matching axis 0 of the signals array

109

110 """

111 zeroed_signals = (

112 signals.T - np.mean(signals[:, baseline_min:baseline_max], axis=1)

113 ).T

114 if polarity == "negative":

115 transit_times = np.argmax(zeroed_signals < threshold, axis=1)

116 elif polarity == "positive":

117 transit_times = np.argmax(zeroed_signals > threshold, axis=1)

118 else:

119 print("polarity has to be 'positive' or 'negative'")

120 return None

121 return transit_times

122

123

124def bin_data(data, bins=10, range=None, density=False):

125 """

126 Calculates values and bin centres of a histogram of a set of data

127

128 Parameters

129 ----------

130 data: list or np.array

131 1D array of input data

132 bins: int

133 number of bins of the histogram

134 range: tuple(int)

135 lower and upper range of the bins

136 normed: boolean

137 set to True to norm the histogram data

138

139 Returns

140 -------

141 x: np.array

142 bin centres of the histogram

143 y: np.array

144 values of the histogram

145

146 """

147 y, x = np.histogram(data, bins=bins, range=range, density=density)

148 x = x[:-1]

149 x = x + (x[1] - x[0]) / 2

150 return x, y

151

152

153def calculate_persist_data(waveforms, bins=(10, 10), range=None):

154 """

155 Calculates 2D histogram data like persistence mode on oscilloscope

156

157 Parameters

158 ----------

159 waveforms: np.array

160 2D numpy array with one waveform in each row

161 [[waveform1],

162 [waveform2],

163 ...]

164 bins: tuple(int)

165 number of bins in both directions

166 range: tuple(tuple(int))

167 lower and upper range of the x-bins and y-bins

168

169 Returns

170 -------

171 x: np.array

172 x-bin centres of the histogram

173 y: np.array

174 y-bin centres of the histogram

175 z: np.array

176 z values of the histogram

177

178 """

179 times = np.tile(np.arange(waveforms.shape[1]), (waveforms.shape[0], 1))

180 z, xs, ys = np.histogram2d(

181 times.flatten(), waveforms.flatten(), bins=bins, range=range

182 )

183 xs = (xs + (xs[1] - xs[0]) / 2)[:-1]

184 ys = (ys + (ys[1] - ys[0]) / 2)[:-1]

185 x = np.array([[x] * bins[1] for x in xs])

186 y = np.array(list(ys) * bins[0])

187 return x.flatten(), y.flatten(), z.flatten()

188

189

190def calculate_mean_signal(signals, shift_by="min"):

191 """

192 Calculates mean signals from several PMT signals with shifting the signals

193 by their minimum or maximum to correct for time jitter

194

195 Parameters

196 ----------

197 signals: np.array

198 2D numpy array with one signal (y-values) in each row

199 [[signal1],

200 [signal2],

201 ...]

202 shift_by: str

203 shift by "min" or "max" of the signal to correct for time jitter

204 Returns

205 -------

206 mean_signal: (np.array, np.array)

207 x and y values of mean signal

208 """

209 rolled_signals = []

210 if shift_by == "min":

211 f = np.argmin

212 elif shift_by == "max":

213 f = np.argmax

214 else:

215 print("can only shift by 'min' or 'max'")

216 return None

217 nx = signals.shape[1]

218 xs = np.arange(nx)

219 for signal in signals:

220 shift = f(signal)

221 rolled_signals.append(np.roll(signal, -shift + int(nx / 2)))

222 mean_signal = np.mean(rolled_signals, axis=0)

223 return mean_signal

224

225

226@njit

227def peak_finder(waveforms, threshold): # pragma: no cover

228 """

229 Finds peaks in waveforms

230

231 Parameters

232 ----------

233 waveforms: np.array

234 2D numpy array with one waveform (y-values) in each row

235 [[waveform1],

236 [waveform2],

237 ...]

238 threshold: float

239 voltage value the waveform has to cross in order to identify a peak

240

241 Returns

242 -------

243 peak_positions: list(list(floats))

244 x and y values of mean signal

245 """

246 peak_positions = []

247 I, J = waveforms.shape

248 for i in range(I):

249 peaks = []

250 X = 0

251 x = 0

252 for j in range(J):

253 if waveforms[i][j] <= threshold:

254 X += j

255 x += 1

256 if j + 1 >= J or waveforms[i][j + 1] > threshold:

257 peaks.append(X / x)

258 X = 0

259 x = 0

260 if len(peaks) > 0:

261 peak_positions.append(peaks)

262 return peak_positions

263

264

265def find_nominal_hv(filename, nominal_gain):

266 """

267 Finds nominal HV of a measured PMT dataset

268

269 Parameters

270 ----------

271 filename: string

272 nominal gain: float

273 gain for which the nominal HV should be found

274

275 Returns

276 -------

277 nominal_hv: int

278 nominal HV

279 """

280

281 f = h5py.File(filename, "r")

282 gains = []

283 hvs = []

284 keys = f.keys()

285 for key in keys:

286 gains.append(f[key]["fit_results"]["gain"][()])

287 hvs.append(int(key))

288 f.close()

289 gains = np.array(gains)

290 hvs = np.array(hvs)

291

292 diff = abs(np.array(gains) - nominal_gain)

293 nominal_hv = int(hvs[diff == np.min(diff)])

294 return nominal_hv

295

296

297def calculate_rise_times(waveforms, relative_thresholds=(0.1, 0.9)):

298 """

299 Calculates rise times of waveforms

300

301 Parameters

302 ----------

303 waveforms: np.array

304 2D numpy array with one waveform (y-values) in each row

305 [[waveform1],

306 [waveform2],

307 ...]

308 relative_thresholds: tuple(float)

309 relative lower and upper threshold inbetween which to calculate rise time

310

311 Returns

312 -------

313 rise_times: np.array

314 rise times

315 """

316 mins = np.min(waveforms, axis=1)

317 argmins = np.argmin(waveforms, axis=1)

318 rise_times = []

319 for min, argmin, waveform in zip(mins, argmins, waveforms):

320 below_first_thr = waveform > (min * relative_thresholds[0])

321 below_second_thr = waveform > (min * relative_thresholds[1])

322 try:

323 first_time = argmin - np.argmax(below_first_thr[:argmin][::-1])

324 second_time = argmin - np.argmax(below_second_thr[:argmin][::-1])

325 except ValueError:

326 first_time = 0

327 second_time = 0

328 rise_times.append(second_time - first_time)

329 return np.array(rise_times)

330

331

332def read_spectral_scan(filename):

333 """Reads wavelengths and currents from spectral PMT or PHD scan

334

335 Parameters

336 ----------

337 filename: str

338

339 Returns

340 -------

341 (wavelengths, currents): (np.array(float), np.array(float))

342 """

343 data = np.loadtxt(filename, unpack=True, encoding="latin1")

344 with codecs.open(filename, "r", encoding="utf-8", errors="ignore") as f:

345 dcs = f.read().split("\n")[-2].split("\t")

346 wavelengths = data[0]

347 currents = data[1]

348 dc = np.linspace(float(dcs[-2]), float(dcs[-1]), len(currents))

349 currents = currents - dc

350 return wavelengths, currents

351

352

353def read_datetime(filename):

354 """Reads time of a spectral PMT or PHD scan

355

356 Parameters

357 ----------

358 filename: str

359

360 Returns

361 -------

362 time: str

363 """

364 f = codecs.open(filename, "r", encoding="utf-8", errors="ignore")

365 datetime_string = f.read().split("\n")[2]

366 f.close()

367 return datetime_string.split(" ")[1] + ";" + datetime_string.split(" ")[2]

368

369

370def convert_to_secs(date_time):

371 """Converts time string to seconds

372

373 Parameters

374 ----------

375 date_time: str

376

377 Returns

378 -------

379 unix time in seconds: int

380 """

381 t = datetime.datetime.strptime(date_time, "%Y-%m-%d;%H:%M:%S")

382 return t.timestamp() + TIMEZONE.utcoffset(t).seconds

383

384

385def choose_ref(phd_filenames, pmt_filename):

386 """Chooses reference measurement closest (in time) to the actual measurement

387

388 Parameters

389 ----------

390 phd_filenames: list(str)

391 pmt_filename: str

392

393 Returns

394 -------

395 phd_filename: str

396 """

397 diffs = []

398 pmt_time = convert_to_secs(read_datetime(pmt_filename))

399 for filename in phd_filenames:

400 phd_time = convert_to_secs(read_datetime(filename))

401 diffs.append(abs(pmt_time - phd_time))

402 phd_filename = phd_filenames[np.argmin(diffs)]

403 return phd_filename

404

405

406def remove_double_peaks(peaks, distance=20):

407 """Removes secondary peaks with a distance <= distance from the primary

408 peak from 2D array of peaks

409

410 Parameters

411 ----------

412 peaks: 2D array of peaks

413 distance: float

414

415 Returns

416 -------

417 new_peaks: 2D np.array

418 """

419 new_peaks = []

420 for peak in peaks:

421 mp = -(distance + 1)

422 new_peak = []

423 for p in peak:

424 if np.fabs(mp - p) >= distance:

425 new_peak.append(p)

426 mp = p

427 new_peaks.append(new_peak)

428 return np.array(new_peaks)

429

430

431def peaks_with_signal(peaks, signal_range):

432 """Returns peaks with at least one peak in signal_range

433

434 Parameters

435 ----------

436 peaks: 2D array of peaks

437 signal_range: tuple(float)

438 (min, max) of signal window

439

440 Returns

441 -------

442 peaks_with_signal: 2D np.array

443 """

444 peaks_with_signal = []

445 for peak in peaks:

446 got_signal = False

447 for p in peak:

448 if p > signal_range[0] and p < signal_range[1]:

449 got_signal = True

450 if got_signal:

451 peaks_with_signal.append(peak)

452 return peaks_with_signal

453

454

455def estimate_kernel_density(

456 data, kernel="tophat", bandwidth=0.02, n_sampling_points=200

457):

458 """Estimates kernel density of given data in order to avoid binning artifacts

459

460 Parameters

461 ----------

462 data: list or np.array

463 1D array of input data

464 kernel: str

465 kernel to use for estimation ("tophat", "gaussian", etc.)

466 bandwidth: float

467 bandwidth of the kernel

468 n_sampling_points: int

469 number of sample points to return from distribution

470

471 Returns

472 -------

473 x: np.array(float)

474 x-values of samples of distribution

475 y: np.array(float)

476 y-values of samples of distribution

477 """

478 kde = KernelDensity(bandwidth=bandwidth, kernel=kernel)

479 kde.fit(data[:, None])

480 x = np.linspace(np.min(data), np.max(data), n_sampling_points)

481 y = np.exp(kde.score_samples(x[:, None]))

482 return x, y

483

484

485@njit

486def align_waveforms(

487 waveforms, baseline_min=None, baseline_max=None, inplace=True

488): # pragma: no cover

489 """

490 Subtracts the mean of (a part of the) waveforms from waveforms (individually)

491

492 Parameters

493 ----------

494 waveforms: np.array

495 2D numpy array with one waveform in each row

496 [[waveform1],

497 [waveform2],

498 ...]

499 baseline_min: int

500 index of minimum of window for mean calculation (included)

501 baseline_max: int

502 index of maximum of window for mean calculation (excluded)

503 inplace: bool

504 perform calculation inplace or not

505

506 Returns

507 -------

508 waveforms: np.array

509 aligned waveform array

510 [[aligned waveform1],

511 [aligned waveform2],

512 ...]

513 """

514

515 if not inplace:

516 waveforms = np.copy(waveforms)

517 n, m = waveforms.shape

518 for i in range(n):

519 mean = np.mean(waveforms[i][baseline_min:baseline_max])

520 for j in range(m):

521 waveforms[i][j] -= mean

522 return waveforms

Coverage for peeemtee/tools.py : 98%

146 statements

Coverage for peeemtee/tools.py : 98%

146 statements 143 run 3 missing 29 excluded

146 statements