Coverage for yasfpy/functions/cuda_numba.py: 13%

115 statements  

« prev     ^ index     » next       coverage.py v7.4.1, created at 2024-02-15 20:36 +0100

1import numpy as np 

2from numba import cuda 

3from cmath import exp, sqrt 

4 

5 

6# TODO: Implement data batching for GPUs with smaller memory 

7@cuda.jit(fastmath=True) 

8def particle_interaction_gpu( 

9 lmax: int, 

10 particle_number: int, 

11 idx: np.ndarray, 

12 x: np.ndarray, 

13 wx_real: np.ndarray, 

14 wx_imag: np.ndarray, 

15 translation_table: np.ndarray, 

16 plm: np.ndarray, 

17 sph_h: np.ndarray, 

18 e_j_dm_phi, 

19): 

20 j1, j2, w = cuda.grid(3) 

21 

22 jmax = particle_number * 2 * lmax * (lmax + 2) 

23 channels = sph_h.shape[-1] 

24 

25 if (j1 >= jmax) or (j2 >= jmax) or (w >= channels): 

26 return 

27 

28 s1, n1, tau1, l1, m1 = idx[j1, :] 

29 s2, n2, tau2, l2, m2 = idx[j2, :] 

30 

31 if s1 == s2: 

32 return 

33 

34 delta_tau = abs(tau1 - tau2) 

35 delta_l = abs(l1 - l2) 

36 delta_m = abs(m1 - m2) 

37 

38 p_dependent = complex(0) 

39 for p in range(max(delta_m, delta_l + delta_tau), l1 + l2 + 1): 

40 p_dependent += ( 

41 translation_table[n2, n1, p] 

42 * plm[p * (p + 1) // 2 + delta_m, s1, s2] 

43 * sph_h[p, s1, s2, w] 

44 ) 

45 p_dependent *= e_j_dm_phi[m2 - m1 + 2 * lmax, s1, s2] * x[j2] 

46 

47 # atomic.add performs the += operation in sync 

48 cuda.atomic.add(wx_real, (j1, w), p_dependent.real) 

49 cuda.atomic.add(wx_imag, (j1, w), p_dependent.imag) 

50 

51 

52@cuda.jit(fastmath=True) 

53def compute_scattering_cross_section_gpu( 

54 lmax: int, 

55 particle_number: int, 

56 idx: np.ndarray, 

57 sfc: np.ndarray, 

58 translation_table: np.ndarray, 

59 plm: np.ndarray, 

60 sph_h: np.ndarray, 

61 e_j_dm_phi: np.ndarray, 

62 c_sca_real: np.ndarray, 

63 c_sca_imag: np.ndarray, 

64): 

65 jmax = particle_number * 2 * lmax * (lmax + 2) 

66 channels = sph_h.shape[-1] 

67 

68 j1, j2, w = cuda.grid(3) 

69 

70 if (j1 >= jmax) or (j2 >= jmax) or (w >= channels): 

71 return 

72 

73 s1, n1, _, _, m1 = idx[j1, :] 

74 s2, n2, _, _, m2 = idx[j2, :] 

75 

76 delta_m = abs(m1 - m2) 

77 f = sfc[:, :, w] 

78 

79 p_dependent = complex(0) 

80 for p in range(delta_m, 2 * lmax + 1): 

81 p_dependent += ( 

82 translation_table[n2, n1, p] 

83 * plm[p * (p + 1) // 2 + delta_m, s1, s2] 

84 * sph_h[p, s1, s2, w] 

85 ) 

86 p_dependent *= ( 

87 f[s1, n1].conjugate() * e_j_dm_phi[m2 - m1 + 2 * lmax, s1, s2] * f[s2, n2] 

88 ) 

89 

90 # atomic.add performs the += operation in sync 

91 cuda.atomic.add(c_sca_real, w, p_dependent.real) 

92 cuda.atomic.add(c_sca_imag, w, p_dependent.imag) 

93 

94 

95@cuda.jit(fastmath=True) 

96def compute_radial_independent_scattered_field_gpu( 

97 lmax: int, 

98 particles_position: np.ndarray, 

99 idx: np.ndarray, 

100 sfc: np.ndarray, 

101 k_medium: np.ndarray, 

102 azimuthal_angles: np.ndarray, 

103 e_r: np.ndarray, 

104 e_phi: np.ndarray, 

105 e_theta: np.ndarray, 

106 pilm: np.ndarray, 

107 taulm: np.ndarray, 

108 e_1_sca_real: np.ndarray, 

109 e_1_sca_imag: np.ndarray, 

110): 

111 j_idx, a_idx, w_idx = cuda.grid(3) 

112 

113 jmax = particles_position.shape[0] * 2 * lmax * (lmax + 2) 

114 

115 if (j_idx >= jmax) or (a_idx >= azimuthal_angles.size) or (w_idx >= k_medium.size): 

116 return 

117 

118 s, n, tau, l, m = idx[j_idx, :] 

119 

120 # Temporary variable 

121 # If tau = 1 -> 1j**(tau-1) = 1, if tau = 2 -> 1j**(tau-1) = 1j 

122 # 1j**(-l-1) = (-1j)**(l+1) => both lead to the coefficient 1j**(tau-l-2) 

123 # k * <particle_position, e_r> is the phase shift due to the distance and relative position 

124 t = ( 

125 1j ** (tau - l - 2) 

126 * sfc[s, n, w_idx] 

127 / sqrt(2 * l * (l + 1)) 

128 * exp( 

129 1j 

130 * ( 

131 m * azimuthal_angles[a_idx] 

132 - k_medium[w_idx] 

133 * ( 

134 particles_position[s, 0] * e_r[a_idx, 0] 

135 + particles_position[s, 1] * e_r[a_idx, 1] 

136 + particles_position[s, 2] * e_r[a_idx, 2] 

137 ) 

138 ) 

139 ) 

140 ) 

141 

142 for c in range(3): 

143 if tau == 1: 

144 e_1_sca = t * ( 

145 e_theta[a_idx, c] * pilm[l, abs(m), a_idx] * 1j * m 

146 - e_phi[a_idx, c] * taulm[l, abs(m), a_idx] 

147 ) 

148 else: 

149 e_1_sca = t * ( 

150 e_phi[a_idx, c] * pilm[l, abs(m), a_idx] * 1j * m 

151 + e_theta[a_idx, c] * taulm[l, abs(m), a_idx] 

152 ) 

153 

154 cuda.atomic.add(e_1_sca_real, (a_idx, c, w_idx), e_1_sca.real) 

155 cuda.atomic.add(e_1_sca_imag, (a_idx, c, w_idx), e_1_sca.imag) 

156 

157 

158@cuda.jit(fastmath=True) 

159def compute_electric_field_angle_components_gpu( 

160 lmax: int, 

161 particles_position: np.ndarray, 

162 idx: np.ndarray, 

163 sfc: np.ndarray, 

164 k_medium: np.ndarray, 

165 azimuthal_angles: np.ndarray, 

166 e_r: np.ndarray, 

167 pilm: np.ndarray, 

168 taulm: np.ndarray, 

169 e_field_theta_real: np.ndarray, 

170 e_field_theta_imag: np.ndarray, 

171 e_field_phi_real: np.ndarray, 

172 e_field_phi_imag: np.ndarray, 

173): 

174 j_idx, a_idx, w_idx = cuda.grid(3) 

175 jmax = particles_position.shape[0] * 2 * lmax * (lmax + 2) 

176 if (j_idx >= jmax) or (a_idx >= azimuthal_angles.size) or (w_idx >= k_medium.size): 

177 return 

178 

179 s, n, tau, l, m = idx[j_idx, :] 

180 

181 t = ( 

182 1j ** (tau - l - 2) 

183 * sfc[s, n, w_idx] 

184 / sqrt(2 * l * (l + 1)) 

185 * exp( 

186 1j 

187 * ( 

188 m * azimuthal_angles[a_idx] 

189 - k_medium[w_idx] 

190 * ( 

191 particles_position[s, 0] * e_r[a_idx, 0] 

192 + particles_position[s, 1] * e_r[a_idx, 1] 

193 + particles_position[s, 2] * e_r[a_idx, 2] 

194 ) 

195 ) 

196 ) 

197 ) 

198 

199 if tau == 1: 

200 e_field_theta = t * pilm[l, abs(m), a_idx] * 1j * m 

201 e_field_phi = -t * taulm[l, abs(m), a_idx] 

202 else: 

203 e_field_theta = t * taulm[l, abs(m), a_idx] 

204 e_field_phi = t * pilm[l, abs(m), a_idx] * 1j * m 

205 

206 cuda.atomic.add(e_field_theta_real, (a_idx, w_idx), e_field_theta.real) 

207 cuda.atomic.add(e_field_theta_imag, (a_idx, w_idx), e_field_theta.imag) 

208 cuda.atomic.add(e_field_phi_real, (a_idx, w_idx), e_field_phi.real) 

209 cuda.atomic.add(e_field_phi_imag, (a_idx, w_idx), e_field_phi.imag) 

210 

211 

212@cuda.jit(fastmath=True) 

213def compute_polarization_components_gpu( 

214 number_of_wavelengths: int, 

215 number_of_angles: int, 

216 e_field_theta_real: np.ndarray, 

217 e_field_theta_imag: np.ndarray, 

218 e_field_phi_real: np.ndarray, 

219 e_field_phi_imag: np.ndarray, 

220 intensity: np.ndarray, 

221 degree_of_polarization: np.ndarray, 

222 degree_of_linear_polarization: np.ndarray, 

223 degree_of_linear_polarization_q: np.ndarray, 

224 degree_of_linear_polarization_u: np.ndarray, 

225 degree_of_circular_polarization: np.ndarray, 

226): 

227 a_idx, w_idx = cuda.grid(2) 

228 if (w_idx >= number_of_wavelengths) or (a_idx >= number_of_angles): 

229 return 

230 

231 # Jones vector components (1,2,4) 

232 e_field_theta_abs = ( 

233 e_field_theta_real[a_idx, w_idx] ** 2 + e_field_theta_imag[a_idx, w_idx] ** 2 

234 ) 

235 e_field_phi_abs = ( 

236 e_field_phi_real[a_idx, w_idx] ** 2 + e_field_phi_imag[a_idx, w_idx] ** 2 

237 ) 

238 e_field_angle_interaction_real = ( 

239 e_field_theta_real[a_idx, w_idx] * e_field_phi_real[a_idx, w_idx] 

240 + e_field_theta_imag[a_idx, w_idx] * e_field_phi_imag[a_idx, w_idx] 

241 ) 

242 e_field_angle_interaction_imag = ( 

243 e_field_theta_imag[a_idx, w_idx] * e_field_phi_real[a_idx, w_idx] 

244 - e_field_theta_real[a_idx, w_idx] * e_field_phi_imag[a_idx, w_idx] 

245 ) 

246 

247 # Stokes components S = (I, Q, U, V) 

248 I = e_field_theta_abs + e_field_phi_abs 

249 Q = e_field_theta_abs - e_field_phi_abs 

250 U = -2 * e_field_angle_interaction_real 

251 V = 2 * e_field_angle_interaction_imag 

252 

253 intensity[a_idx, w_idx] = I 

254 degree_of_polarization[a_idx, w_idx] = sqrt(Q**2 + U**2 + V**2).real / I 

255 degree_of_linear_polarization[a_idx, w_idx] = sqrt(Q**2 + U**2).real / I 

256 degree_of_linear_polarization_q[a_idx, w_idx] = -Q.real / I 

257 degree_of_linear_polarization_u[a_idx, w_idx] = U.real / I 

258 degree_of_circular_polarization[a_idx, w_idx] = V / I 

259 

260 

261@cuda.jit(fastmath=True) 

262def compute_field_gpu( 

263 lmax: int, 

264 idx: np.ndarray, 

265 size_parameter: np.ndarray, 

266 sph_h: np.ndarray, 

267 derivative: np.ndarray, 

268 e_j_dm_phi: np.ndarray, 

269 p_lm: np.ndarray, 

270 pi_lm: np.ndarray, 

271 tau_lm: np.ndarray, 

272 e_r: np.ndarray, 

273 e_theta: np.ndarray, 

274 e_phi: np.ndarray, 

275 scattered_field_coefficients: np.ndarray, 

276 field_real: np.ndarray, 

277 field_imag: np.ndarray, 

278): # , initial_field_coefficients: np.ndarray, scatter_to_internal: np.ndarray): 

279 jmax = sph_h.shape[1] * 2 * lmax * (lmax + 2) 

280 channels = sph_h.shape[-1] 

281 

282 sampling_idx, j_idx, w = cuda.grid(3) 

283 

284 if (sampling_idx >= sph_h.shape[2]) or (j_idx >= jmax) or (w >= channels): 

285 return 

286 

287 particle_idx, n, tau, l, m = idx[j_idx, :] 

288 

289 invariant = ( 

290 1 / sqrt(2 * (l + 1) * l) * e_j_dm_phi[m + 2 * lmax, particle_idx, sampling_idx] 

291 ) 

292 

293 for c in range(3): 

294 term = scattered_field_coefficients[particle_idx, n, w] * invariant 

295 

296 # Calculate M 

297 if tau == 1: 

298 c_term_1 = ( 

299 pi_lm[l, abs(m), particle_idx, sampling_idx] 

300 * e_theta[particle_idx, sampling_idx, c] 

301 * 1j 

302 * m 

303 ) 

304 c_term_2 = ( 

305 tau_lm[l, abs(m), particle_idx, sampling_idx] 

306 * e_phi[particle_idx, sampling_idx, c] 

307 ) 

308 c_term = sph_h[l, particle_idx, sampling_idx, w] * (c_term_1 - c_term_2) 

309 

310 term *= c_term 

311 

312 # Calculate N 

313 else: 

314 p_term = ( 

315 l 

316 * (l + 1) 

317 / size_parameter[particle_idx, sampling_idx, w] 

318 * sph_h[l, particle_idx, sampling_idx, w] 

319 ) 

320 p_term *= ( 

321 p_lm[l, abs(m), particle_idx, sampling_idx] 

322 * e_r[particle_idx, sampling_idx, c] 

323 ) 

324 

325 b_term_1 = ( 

326 derivative[l, particle_idx, sampling_idx, w] 

327 / size_parameter[particle_idx, sampling_idx, w] 

328 ) 

329 b_term_2 = ( 

330 tau_lm[l, abs(m), particle_idx, sampling_idx] 

331 * e_theta[particle_idx, sampling_idx, c] 

332 ) 

333 b_term_3 = ( 

334 pi_lm[l, abs(m), particle_idx, sampling_idx] 

335 * e_phi[particle_idx, sampling_idx, c] 

336 * 1j 

337 * m 

338 ) 

339 b_term = b_term_1 * (b_term_2 + b_term_3) 

340 

341 term *= p_term + b_term 

342 

343 cuda.atomic.add(field_real, (w, sampling_idx, c), term.real) 

344 cuda.atomic.add(field_imag, (w, sampling_idx, c), term.imag)