Lines Matching defs:slice_id
80 PetscInt i, row, slice_id, row_in_slice;
85 slice_id = row / sliceheight;
88 for (i = sliidx[slice_id] + row_in_slice; i < sliidx[slice_id + 1]; i += sliceheight) sum += aval[i] * x[acolidx[i]];
95 PetscInt i, row, slice_id, row_in_slice;
100 slice_id = row / sliceheight;
103 for (i = sliidx[slice_id] + row_in_slice; i < sliidx[slice_id + 1]; i += sliceheight) sum += aval[i] * x[acolidx[i]];
115 PetscInt i, row, slice_id = blockIdx.x;
122 row = slice_id * sliceheight + threadIdx.x % sliceheight;
124 for (i = sliidx[slice_id] + threadIdx.x + WARP_SIZE * threadIdx.y; i < sliidx[slice_id + 1]; i += WARP_SIZE * BLOCKY) t += aval[i] * x[acolidx[i]];
144 PetscInt i, row, slice_id = blockIdx.x;
151 row = slice_id * sliceheight + threadIdx.x % sliceheight;
153 for (i = sliidx[slice_id] + threadIdx.x + WARP_SIZE * threadIdx.y; i < sliidx[slice_id + 1]; i += WARP_SIZE * BLOCKY) t += aval[i] * x[acolidx[i]];
208 PetscInt slice_id = start_slice, totalslices = PetscCeilIntMacro(nrows, sliceheight), totalentries = sliidx[totalslices];
210 while (gid < totalentries && gid >= sliidx[slice_id + 1]) slice_id++;
211 if (threadIdx.x % (WARP_SIZE / 2) == 0) flag[threadIdx.y * 2 + threadIdx.x / (WARP_SIZE / 2)] = slice_id;
212 row = slice_id * sliceheight + threadIdx.x % sliceheight;
263 PetscInt slice_id = start_slice, totalslices = PetscCeilIntMacro(nrows, sliceheight), totalentries = sliidx[totalslices];
265 while (gid < totalentries && gid >= sliidx[slice_id + 1]) slice_id++;
266 if (threadIdx.x % (WARP_SIZE / 2) == 0) flag[threadIdx.y * 2 + threadIdx.x / (WARP_SIZE / 2)] = slice_id;
267 row = slice_id * sliceheight + threadIdx.x % sliceheight;
300 PetscInt i, row, slice_id;
301 slice_id = blockIdx.x * blockDim.y + threadIdx.y;
302 row = slice_id * sliceheight + threadIdx.x % sliceheight;
305 for (i = sliidx[slice_id] + threadIdx.x; i < sliidx[slice_id + 1]; i += WARP_SIZE) t += aval[i] * x[acolidx[i]];
315 PetscInt i, row, slice_id;
316 slice_id = blockIdx.x * blockDim.y + threadIdx.y;
317 row = slice_id * sliceheight + threadIdx.x % sliceheight;
320 for (i = sliidx[slice_id] + threadIdx.x; i < sliidx[slice_id + 1]; i += WARP_SIZE) t += aval[i] * x[acolidx[i]];
334 PetscInt i, row, slice_id, row_in_slice;
338 slice_id = row / sliceheight;
342 for (i = sliidx[slice_id] + row_in_slice + sliceheight * threadIdx.y; i < sliidx[slice_id + 1]; i += sliceheight * blockDim.y) shared[threadIdx.y * blockDim.x + threadIdx.x] += aval[i] * x[acolidx[i]];
362 PetscInt i, row, slice_id, row_in_slice;
366 slice_id = row / sliceheight;
370 for (i = sliidx[slice_id] + row_in_slice + sliceheight * threadIdx.y; i < sliidx[slice_id + 1]; i += sliceheight * blockDim.y) shared[threadIdx.y * blockDim.x + threadIdx.x] += aval[i] * x[acolidx[i]];
388 PetscInt i, row, slice_id, row_in_slice;
392 slice_id = row / sliceheight;
396 for (i = sliidx[slice_id] + row_in_slice + sliceheight * threadIdx.y; i < sliidx[slice_id + 1]; i += sliceheight * blockDim.y) shared[threadIdx.y * blockDim.x + threadIdx.x] += aval[i] * x[acolidx[i]];
412 PetscInt i, row, slice_id, row_in_slice;
416 slice_id = row / sliceheight;
420 for (i = sliidx[slice_id] + row_in_slice + sliceheight * threadIdx.y; i < sliidx[slice_id + 1]; i += sliceheight * blockDim.y) shared[threadIdx.y * blockDim.x + threadIdx.x] += aval[i] * x[acolidx[i]];
434 PetscInt i, row, slice_id, row_in_slice;
438 slice_id = row / sliceheight;
442 for (i = sliidx[slice_id] + row_in_slice + sliceheight * threadIdx.y; i < sliidx[slice_id + 1]; i += sliceheight * blockDim.y) shared[threadIdx.y * blockDim.x + threadIdx.x] += aval[i] * x[acolidx[i]];
454 PetscInt i, row, slice_id, row_in_slice;
458 slice_id = row / sliceheight;
462 for (i = sliidx[slice_id] + row_in_slice + sliceheight * threadIdx.y; i < sliidx[slice_id + 1]; i += sliceheight * blockDim.y) shared[threadIdx.y * blockDim.x + threadIdx.x] += aval[i] * x[acolidx[i]];
482 PetscInt i, row, slice_id, row_in_slice;
486 slice_id = row / sliceheight;
490 for (i = sliidx[slice_id] + row_in_slice + sliceheight * threadIdx.y; i < sliidx[slice_id + 1]; i += sliceheight * blockDim.y) shared[threadIdx.y * blockDim.x + threadIdx.x] += aval[i] * x[acolidx[i]];
508 PetscInt i, row, slice_id, row_in_slice;
512 slice_id = row / sliceheight;
516 for (i = sliidx[slice_id] + row_in_slice + sliceheight * threadIdx.y; i < sliidx[slice_id + 1]; i += sliceheight * blockDim.y) shared[threadIdx.y * blockDim.x + threadIdx.x] += aval[i] * x[acolidx[i]];
532 PetscInt i, row, slice_id, row_in_slice;
536 slice_id = row / sliceheight;
540 for (i = sliidx[slice_id] + row_in_slice + sliceheight * threadIdx.y; i < sliidx[slice_id + 1]; i += sliceheight * blockDim.y) shared[threadIdx.y * blockDim.x + threadIdx.x] += aval[i] * x[acolidx[i]];
554 PetscInt i, row, slice_id, row_in_slice;
558 slice_id = row / sliceheight;
562 for (i = sliidx[slice_id] + row_in_slice + sliceheight * threadIdx.y; i < sliidx[slice_id + 1]; i += sliceheight * blockDim.y) shared[threadIdx.y * blockDim.x + threadIdx.x] += aval[i] * x[acolidx[i]];