Coverage for yasfpy/functions/cuda_numba.py: 13%
115 statements
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-15 20:36 +0100
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-15 20:36 +0100
1import numpy as np
2from numba import cuda
3from cmath import exp, sqrt
6# TODO: Implement data batching for GPUs with smaller memory
7@cuda.jit(fastmath=True)
8def particle_interaction_gpu(
9 lmax: int,
10 particle_number: int,
11 idx: np.ndarray,
12 x: np.ndarray,
13 wx_real: np.ndarray,
14 wx_imag: np.ndarray,
15 translation_table: np.ndarray,
16 plm: np.ndarray,
17 sph_h: np.ndarray,
18 e_j_dm_phi,
19):
20 j1, j2, w = cuda.grid(3)
22 jmax = particle_number * 2 * lmax * (lmax + 2)
23 channels = sph_h.shape[-1]
25 if (j1 >= jmax) or (j2 >= jmax) or (w >= channels):
26 return
28 s1, n1, tau1, l1, m1 = idx[j1, :]
29 s2, n2, tau2, l2, m2 = idx[j2, :]
31 if s1 == s2:
32 return
34 delta_tau = abs(tau1 - tau2)
35 delta_l = abs(l1 - l2)
36 delta_m = abs(m1 - m2)
38 p_dependent = complex(0)
39 for p in range(max(delta_m, delta_l + delta_tau), l1 + l2 + 1):
40 p_dependent += (
41 translation_table[n2, n1, p]
42 * plm[p * (p + 1) // 2 + delta_m, s1, s2]
43 * sph_h[p, s1, s2, w]
44 )
45 p_dependent *= e_j_dm_phi[m2 - m1 + 2 * lmax, s1, s2] * x[j2]
47 # atomic.add performs the += operation in sync
48 cuda.atomic.add(wx_real, (j1, w), p_dependent.real)
49 cuda.atomic.add(wx_imag, (j1, w), p_dependent.imag)
52@cuda.jit(fastmath=True)
53def compute_scattering_cross_section_gpu(
54 lmax: int,
55 particle_number: int,
56 idx: np.ndarray,
57 sfc: np.ndarray,
58 translation_table: np.ndarray,
59 plm: np.ndarray,
60 sph_h: np.ndarray,
61 e_j_dm_phi: np.ndarray,
62 c_sca_real: np.ndarray,
63 c_sca_imag: np.ndarray,
64):
65 jmax = particle_number * 2 * lmax * (lmax + 2)
66 channels = sph_h.shape[-1]
68 j1, j2, w = cuda.grid(3)
70 if (j1 >= jmax) or (j2 >= jmax) or (w >= channels):
71 return
73 s1, n1, _, _, m1 = idx[j1, :]
74 s2, n2, _, _, m2 = idx[j2, :]
76 delta_m = abs(m1 - m2)
77 f = sfc[:, :, w]
79 p_dependent = complex(0)
80 for p in range(delta_m, 2 * lmax + 1):
81 p_dependent += (
82 translation_table[n2, n1, p]
83 * plm[p * (p + 1) // 2 + delta_m, s1, s2]
84 * sph_h[p, s1, s2, w]
85 )
86 p_dependent *= (
87 f[s1, n1].conjugate() * e_j_dm_phi[m2 - m1 + 2 * lmax, s1, s2] * f[s2, n2]
88 )
90 # atomic.add performs the += operation in sync
91 cuda.atomic.add(c_sca_real, w, p_dependent.real)
92 cuda.atomic.add(c_sca_imag, w, p_dependent.imag)
95@cuda.jit(fastmath=True)
96def compute_radial_independent_scattered_field_gpu(
97 lmax: int,
98 particles_position: np.ndarray,
99 idx: np.ndarray,
100 sfc: np.ndarray,
101 k_medium: np.ndarray,
102 azimuthal_angles: np.ndarray,
103 e_r: np.ndarray,
104 e_phi: np.ndarray,
105 e_theta: np.ndarray,
106 pilm: np.ndarray,
107 taulm: np.ndarray,
108 e_1_sca_real: np.ndarray,
109 e_1_sca_imag: np.ndarray,
110):
111 j_idx, a_idx, w_idx = cuda.grid(3)
113 jmax = particles_position.shape[0] * 2 * lmax * (lmax + 2)
115 if (j_idx >= jmax) or (a_idx >= azimuthal_angles.size) or (w_idx >= k_medium.size):
116 return
118 s, n, tau, l, m = idx[j_idx, :]
120 # Temporary variable
121 # If tau = 1 -> 1j**(tau-1) = 1, if tau = 2 -> 1j**(tau-1) = 1j
122 # 1j**(-l-1) = (-1j)**(l+1) => both lead to the coefficient 1j**(tau-l-2)
123 # k * <particle_position, e_r> is the phase shift due to the distance and relative position
124 t = (
125 1j ** (tau - l - 2)
126 * sfc[s, n, w_idx]
127 / sqrt(2 * l * (l + 1))
128 * exp(
129 1j
130 * (
131 m * azimuthal_angles[a_idx]
132 - k_medium[w_idx]
133 * (
134 particles_position[s, 0] * e_r[a_idx, 0]
135 + particles_position[s, 1] * e_r[a_idx, 1]
136 + particles_position[s, 2] * e_r[a_idx, 2]
137 )
138 )
139 )
140 )
142 for c in range(3):
143 if tau == 1:
144 e_1_sca = t * (
145 e_theta[a_idx, c] * pilm[l, abs(m), a_idx] * 1j * m
146 - e_phi[a_idx, c] * taulm[l, abs(m), a_idx]
147 )
148 else:
149 e_1_sca = t * (
150 e_phi[a_idx, c] * pilm[l, abs(m), a_idx] * 1j * m
151 + e_theta[a_idx, c] * taulm[l, abs(m), a_idx]
152 )
154 cuda.atomic.add(e_1_sca_real, (a_idx, c, w_idx), e_1_sca.real)
155 cuda.atomic.add(e_1_sca_imag, (a_idx, c, w_idx), e_1_sca.imag)
158@cuda.jit(fastmath=True)
159def compute_electric_field_angle_components_gpu(
160 lmax: int,
161 particles_position: np.ndarray,
162 idx: np.ndarray,
163 sfc: np.ndarray,
164 k_medium: np.ndarray,
165 azimuthal_angles: np.ndarray,
166 e_r: np.ndarray,
167 pilm: np.ndarray,
168 taulm: np.ndarray,
169 e_field_theta_real: np.ndarray,
170 e_field_theta_imag: np.ndarray,
171 e_field_phi_real: np.ndarray,
172 e_field_phi_imag: np.ndarray,
173):
174 j_idx, a_idx, w_idx = cuda.grid(3)
175 jmax = particles_position.shape[0] * 2 * lmax * (lmax + 2)
176 if (j_idx >= jmax) or (a_idx >= azimuthal_angles.size) or (w_idx >= k_medium.size):
177 return
179 s, n, tau, l, m = idx[j_idx, :]
181 t = (
182 1j ** (tau - l - 2)
183 * sfc[s, n, w_idx]
184 / sqrt(2 * l * (l + 1))
185 * exp(
186 1j
187 * (
188 m * azimuthal_angles[a_idx]
189 - k_medium[w_idx]
190 * (
191 particles_position[s, 0] * e_r[a_idx, 0]
192 + particles_position[s, 1] * e_r[a_idx, 1]
193 + particles_position[s, 2] * e_r[a_idx, 2]
194 )
195 )
196 )
197 )
199 if tau == 1:
200 e_field_theta = t * pilm[l, abs(m), a_idx] * 1j * m
201 e_field_phi = -t * taulm[l, abs(m), a_idx]
202 else:
203 e_field_theta = t * taulm[l, abs(m), a_idx]
204 e_field_phi = t * pilm[l, abs(m), a_idx] * 1j * m
206 cuda.atomic.add(e_field_theta_real, (a_idx, w_idx), e_field_theta.real)
207 cuda.atomic.add(e_field_theta_imag, (a_idx, w_idx), e_field_theta.imag)
208 cuda.atomic.add(e_field_phi_real, (a_idx, w_idx), e_field_phi.real)
209 cuda.atomic.add(e_field_phi_imag, (a_idx, w_idx), e_field_phi.imag)
212@cuda.jit(fastmath=True)
213def compute_polarization_components_gpu(
214 number_of_wavelengths: int,
215 number_of_angles: int,
216 e_field_theta_real: np.ndarray,
217 e_field_theta_imag: np.ndarray,
218 e_field_phi_real: np.ndarray,
219 e_field_phi_imag: np.ndarray,
220 intensity: np.ndarray,
221 degree_of_polarization: np.ndarray,
222 degree_of_linear_polarization: np.ndarray,
223 degree_of_linear_polarization_q: np.ndarray,
224 degree_of_linear_polarization_u: np.ndarray,
225 degree_of_circular_polarization: np.ndarray,
226):
227 a_idx, w_idx = cuda.grid(2)
228 if (w_idx >= number_of_wavelengths) or (a_idx >= number_of_angles):
229 return
231 # Jones vector components (1,2,4)
232 e_field_theta_abs = (
233 e_field_theta_real[a_idx, w_idx] ** 2 + e_field_theta_imag[a_idx, w_idx] ** 2
234 )
235 e_field_phi_abs = (
236 e_field_phi_real[a_idx, w_idx] ** 2 + e_field_phi_imag[a_idx, w_idx] ** 2
237 )
238 e_field_angle_interaction_real = (
239 e_field_theta_real[a_idx, w_idx] * e_field_phi_real[a_idx, w_idx]
240 + e_field_theta_imag[a_idx, w_idx] * e_field_phi_imag[a_idx, w_idx]
241 )
242 e_field_angle_interaction_imag = (
243 e_field_theta_imag[a_idx, w_idx] * e_field_phi_real[a_idx, w_idx]
244 - e_field_theta_real[a_idx, w_idx] * e_field_phi_imag[a_idx, w_idx]
245 )
247 # Stokes components S = (I, Q, U, V)
248 I = e_field_theta_abs + e_field_phi_abs
249 Q = e_field_theta_abs - e_field_phi_abs
250 U = -2 * e_field_angle_interaction_real
251 V = 2 * e_field_angle_interaction_imag
253 intensity[a_idx, w_idx] = I
254 degree_of_polarization[a_idx, w_idx] = sqrt(Q**2 + U**2 + V**2).real / I
255 degree_of_linear_polarization[a_idx, w_idx] = sqrt(Q**2 + U**2).real / I
256 degree_of_linear_polarization_q[a_idx, w_idx] = -Q.real / I
257 degree_of_linear_polarization_u[a_idx, w_idx] = U.real / I
258 degree_of_circular_polarization[a_idx, w_idx] = V / I
261@cuda.jit(fastmath=True)
262def compute_field_gpu(
263 lmax: int,
264 idx: np.ndarray,
265 size_parameter: np.ndarray,
266 sph_h: np.ndarray,
267 derivative: np.ndarray,
268 e_j_dm_phi: np.ndarray,
269 p_lm: np.ndarray,
270 pi_lm: np.ndarray,
271 tau_lm: np.ndarray,
272 e_r: np.ndarray,
273 e_theta: np.ndarray,
274 e_phi: np.ndarray,
275 scattered_field_coefficients: np.ndarray,
276 field_real: np.ndarray,
277 field_imag: np.ndarray,
278): # , initial_field_coefficients: np.ndarray, scatter_to_internal: np.ndarray):
279 jmax = sph_h.shape[1] * 2 * lmax * (lmax + 2)
280 channels = sph_h.shape[-1]
282 sampling_idx, j_idx, w = cuda.grid(3)
284 if (sampling_idx >= sph_h.shape[2]) or (j_idx >= jmax) or (w >= channels):
285 return
287 particle_idx, n, tau, l, m = idx[j_idx, :]
289 invariant = (
290 1 / sqrt(2 * (l + 1) * l) * e_j_dm_phi[m + 2 * lmax, particle_idx, sampling_idx]
291 )
293 for c in range(3):
294 term = scattered_field_coefficients[particle_idx, n, w] * invariant
296 # Calculate M
297 if tau == 1:
298 c_term_1 = (
299 pi_lm[l, abs(m), particle_idx, sampling_idx]
300 * e_theta[particle_idx, sampling_idx, c]
301 * 1j
302 * m
303 )
304 c_term_2 = (
305 tau_lm[l, abs(m), particle_idx, sampling_idx]
306 * e_phi[particle_idx, sampling_idx, c]
307 )
308 c_term = sph_h[l, particle_idx, sampling_idx, w] * (c_term_1 - c_term_2)
310 term *= c_term
312 # Calculate N
313 else:
314 p_term = (
315 l
316 * (l + 1)
317 / size_parameter[particle_idx, sampling_idx, w]
318 * sph_h[l, particle_idx, sampling_idx, w]
319 )
320 p_term *= (
321 p_lm[l, abs(m), particle_idx, sampling_idx]
322 * e_r[particle_idx, sampling_idx, c]
323 )
325 b_term_1 = (
326 derivative[l, particle_idx, sampling_idx, w]
327 / size_parameter[particle_idx, sampling_idx, w]
328 )
329 b_term_2 = (
330 tau_lm[l, abs(m), particle_idx, sampling_idx]
331 * e_theta[particle_idx, sampling_idx, c]
332 )
333 b_term_3 = (
334 pi_lm[l, abs(m), particle_idx, sampling_idx]
335 * e_phi[particle_idx, sampling_idx, c]
336 * 1j
337 * m
338 )
339 b_term = b_term_1 * (b_term_2 + b_term_3)
341 term *= p_term + b_term
343 cuda.atomic.add(field_real, (w, sampling_idx, c), term.real)
344 cuda.atomic.add(field_imag, (w, sampling_idx, c), term.imag)