1 2 /* 3 Defines the basic matrix operations for the SELL matrix storage format. 4 */ 5 #include <../src/mat/impls/sell/seq/sell.h> /*I "petscmat.h" I*/ 6 #include <petscblaslapack.h> 7 #include <petsc/private/kernels/blocktranspose.h> 8 #if defined(PETSC_HAVE_IMMINTRIN_H) && (defined(__AVX512F__) || (defined(__AVX2__) && defined(__FMA__)) || defined(__AVX__)) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 9 10 #include <immintrin.h> 11 12 #if !defined(_MM_SCALE_8) 13 #define _MM_SCALE_8 8 14 #endif 15 16 #if defined(__AVX512F__) 17 /* these do not work 18 vec_idx = _mm512_loadunpackhi_epi32(vec_idx,acolidx); 19 vec_vals = _mm512_loadunpackhi_pd(vec_vals,aval); 20 */ 21 #define AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y) \ 22 /* if the mask bit is set, copy from acolidx, otherwise from vec_idx */ \ 23 vec_idx = _mm256_loadu_si256((__m256i const*)acolidx); \ 24 vec_vals = _mm512_loadu_pd(aval); \ 25 vec_x = _mm512_i32gather_pd(vec_idx,x,_MM_SCALE_8); \ 26 vec_y = _mm512_fmadd_pd(vec_x,vec_vals,vec_y) 27 #elif defined(__AVX2__) && defined(__FMA__) 28 #define AVX2_Mult_Private(vec_idx,vec_x,vec_vals,vec_y) \ 29 vec_vals = _mm256_loadu_pd(aval); \ 30 vec_idx = _mm_loadu_si128((__m128i const*)acolidx); /* SSE2 */ \ 31 vec_x = _mm256_i32gather_pd(x,vec_idx,_MM_SCALE_8); \ 32 vec_y = _mm256_fmadd_pd(vec_x,vec_vals,vec_y) 33 #endif 34 #endif /* PETSC_HAVE_IMMINTRIN_H */ 35 36 /*@C 37 MatSeqSELLSetPreallocation - For good matrix assembly performance 38 the user should preallocate the matrix storage by setting the parameter nz 39 (or the array nnz). By setting these parameters accurately, performance 40 during matrix assembly can be increased significantly. 41 42 Collective 43 44 Input Parameters: 45 + B - The matrix 46 . nz - number of nonzeros per row (same for all rows) 47 - nnz - array containing the number of nonzeros in the various rows 48 (possibly different for each row) or NULL 49 50 Notes: 51 If nnz is given then nz is ignored. 52 53 Specify the preallocated storage with either nz or nnz (not both). 54 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 55 allocation. For large problems you MUST preallocate memory or you 56 will get TERRIBLE performance, see the users' manual chapter on matrices. 57 58 You can call MatGetInfo() to get information on how effective the preallocation was; 59 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 60 You can also run with the option -info and look for messages with the string 61 malloc in them to see if additional memory allocation was needed. 62 63 Developers: Use nz of MAT_SKIP_ALLOCATION to not allocate any space for the matrix 64 entries or columns indices. 65 66 The maximum number of nonzeos in any row should be as accurate as possible. 67 If it is underestimated, you will get bad performance due to reallocation 68 (MatSeqXSELLReallocateSELL). 69 70 Level: intermediate 71 72 .seealso: MatCreate(), MatCreateSELL(), MatSetValues(), MatGetInfo() 73 74 @*/ 75 PetscErrorCode MatSeqSELLSetPreallocation(Mat B,PetscInt rlenmax,const PetscInt rlen[]) 76 { 77 PetscErrorCode ierr; 78 79 PetscFunctionBegin; 80 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 81 PetscValidType(B,1); 82 ierr = PetscTryMethod(B,"MatSeqSELLSetPreallocation_C",(Mat,PetscInt,const PetscInt[]),(B,rlenmax,rlen));CHKERRQ(ierr); 83 PetscFunctionReturn(0); 84 } 85 86 PetscErrorCode MatSeqSELLSetPreallocation_SeqSELL(Mat B,PetscInt maxallocrow,const PetscInt rlen[]) 87 { 88 Mat_SeqSELL *b; 89 PetscInt i,j,totalslices; 90 PetscBool skipallocation=PETSC_FALSE,realalloc=PETSC_FALSE; 91 PetscErrorCode ierr; 92 93 PetscFunctionBegin; 94 if (maxallocrow >= 0 || rlen) realalloc = PETSC_TRUE; 95 if (maxallocrow == MAT_SKIP_ALLOCATION) { 96 skipallocation = PETSC_TRUE; 97 maxallocrow = 0; 98 } 99 100 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 101 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 102 103 /* FIXME: if one preallocates more space than needed, the matrix does not shrink automatically, but for best performance it should */ 104 if (maxallocrow == PETSC_DEFAULT || maxallocrow == PETSC_DECIDE) maxallocrow = 5; 105 if (maxallocrow < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"maxallocrow cannot be less than 0: value %" PetscInt_FMT,maxallocrow); 106 if (rlen) { 107 for (i=0; i<B->rmap->n; i++) { 108 if (rlen[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"rlen cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT,i,rlen[i]); 109 if (rlen[i] > B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"rlen cannot be greater than row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT,i,rlen[i],B->cmap->n); 110 } 111 } 112 113 B->preallocated = PETSC_TRUE; 114 115 b = (Mat_SeqSELL*)B->data; 116 117 totalslices = PetscCeilInt(B->rmap->n,8); 118 b->totalslices = totalslices; 119 if (!skipallocation) { 120 if (B->rmap->n & 0x07) {ierr = PetscInfo1(B,"Padding rows to the SEQSELL matrix because the number of rows is not the multiple of 8 (value %" PetscInt_FMT ")\n",B->rmap->n);CHKERRQ(ierr);} 121 122 if (!b->sliidx) { /* sliidx gives the starting index of each slice, the last element is the total space allocated */ 123 ierr = PetscMalloc1(totalslices+1,&b->sliidx);CHKERRQ(ierr); 124 ierr = PetscLogObjectMemory((PetscObject)B,(totalslices+1)*sizeof(PetscInt));CHKERRQ(ierr); 125 } 126 if (!rlen) { /* if rlen is not provided, allocate same space for all the slices */ 127 if (maxallocrow == PETSC_DEFAULT || maxallocrow == PETSC_DECIDE) maxallocrow = 10; 128 else if (maxallocrow < 0) maxallocrow = 1; 129 for (i=0; i<=totalslices; i++) b->sliidx[i] = i*8*maxallocrow; 130 } else { 131 maxallocrow = 0; 132 b->sliidx[0] = 0; 133 for (i=1; i<totalslices; i++) { 134 b->sliidx[i] = 0; 135 for (j=0;j<8;j++) { 136 b->sliidx[i] = PetscMax(b->sliidx[i],rlen[8*(i-1)+j]); 137 } 138 maxallocrow = PetscMax(b->sliidx[i],maxallocrow); 139 ierr = PetscIntSumError(b->sliidx[i-1],8*b->sliidx[i],&b->sliidx[i]);CHKERRQ(ierr); 140 } 141 /* last slice */ 142 b->sliidx[totalslices] = 0; 143 for (j=(totalslices-1)*8;j<B->rmap->n;j++) b->sliidx[totalslices] = PetscMax(b->sliidx[totalslices],rlen[j]); 144 maxallocrow = PetscMax(b->sliidx[totalslices],maxallocrow); 145 b->sliidx[totalslices] = b->sliidx[totalslices-1] + 8*b->sliidx[totalslices]; 146 } 147 148 /* allocate space for val, colidx, rlen */ 149 /* FIXME: should B's old memory be unlogged? */ 150 ierr = MatSeqXSELLFreeSELL(B,&b->val,&b->colidx);CHKERRQ(ierr); 151 /* FIXME: assuming an element of the bit array takes 8 bits */ 152 ierr = PetscMalloc2(b->sliidx[totalslices],&b->val,b->sliidx[totalslices],&b->colidx);CHKERRQ(ierr); 153 ierr = PetscLogObjectMemory((PetscObject)B,b->sliidx[totalslices]*(sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 154 /* b->rlen will count nonzeros in each row so far. We dont copy rlen to b->rlen because the matrix has not been set. */ 155 ierr = PetscCalloc1(8*totalslices,&b->rlen);CHKERRQ(ierr); 156 ierr = PetscLogObjectMemory((PetscObject)B,8*totalslices*sizeof(PetscInt));CHKERRQ(ierr); 157 158 b->singlemalloc = PETSC_TRUE; 159 b->free_val = PETSC_TRUE; 160 b->free_colidx = PETSC_TRUE; 161 } else { 162 b->free_val = PETSC_FALSE; 163 b->free_colidx = PETSC_FALSE; 164 } 165 166 b->nz = 0; 167 b->maxallocrow = maxallocrow; 168 b->rlenmax = maxallocrow; 169 b->maxallocmat = b->sliidx[totalslices]; 170 B->info.nz_unneeded = (double)b->maxallocmat; 171 if (realalloc) { 172 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 173 } 174 PetscFunctionReturn(0); 175 } 176 177 PetscErrorCode MatGetRow_SeqSELL(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 178 { 179 Mat_SeqSELL *a = (Mat_SeqSELL*)A->data; 180 PetscInt shift; 181 182 PetscFunctionBegin; 183 if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range",row); 184 if (nz) *nz = a->rlen[row]; 185 shift = a->sliidx[row>>3]+(row&0x07); 186 if (!a->getrowcols) { 187 PetscErrorCode ierr; 188 189 ierr = PetscMalloc2(a->rlenmax,&a->getrowcols,a->rlenmax,&a->getrowvals);CHKERRQ(ierr); 190 } 191 if (idx) { 192 PetscInt j; 193 for (j=0; j<a->rlen[row]; j++) a->getrowcols[j] = a->colidx[shift+8*j]; 194 *idx = a->getrowcols; 195 } 196 if (v) { 197 PetscInt j; 198 for (j=0; j<a->rlen[row]; j++) a->getrowvals[j] = a->val[shift+8*j]; 199 *v = a->getrowvals; 200 } 201 PetscFunctionReturn(0); 202 } 203 204 PetscErrorCode MatRestoreRow_SeqSELL(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 205 { 206 PetscFunctionBegin; 207 PetscFunctionReturn(0); 208 } 209 210 PetscErrorCode MatConvert_SeqSELL_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 211 { 212 Mat B; 213 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 214 PetscInt i; 215 PetscErrorCode ierr; 216 217 PetscFunctionBegin; 218 if (reuse == MAT_REUSE_MATRIX) { 219 B = *newmat; 220 ierr = MatZeroEntries(B);CHKERRQ(ierr); 221 } else { 222 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 223 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 224 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 225 ierr = MatSeqAIJSetPreallocation(B,0,a->rlen);CHKERRQ(ierr); 226 } 227 228 for (i=0; i<A->rmap->n; i++) { 229 PetscInt nz = 0,*cols = NULL; 230 PetscScalar *vals = NULL; 231 232 ierr = MatGetRow_SeqSELL(A,i,&nz,&cols,&vals);CHKERRQ(ierr); 233 ierr = MatSetValues(B,1,&i,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 234 ierr = MatRestoreRow_SeqSELL(A,i,&nz,&cols,&vals);CHKERRQ(ierr); 235 } 236 237 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 238 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 239 B->rmap->bs = A->rmap->bs; 240 241 if (reuse == MAT_INPLACE_MATRIX) { 242 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 243 } else { 244 *newmat = B; 245 } 246 PetscFunctionReturn(0); 247 } 248 249 #include <../src/mat/impls/aij/seq/aij.h> 250 251 PetscErrorCode MatConvert_SeqAIJ_SeqSELL(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 252 { 253 Mat B; 254 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 255 PetscInt *ai=a->i,m=A->rmap->N,n=A->cmap->N,i,*rowlengths,row,ncols; 256 const PetscInt *cols; 257 const PetscScalar *vals; 258 PetscErrorCode ierr; 259 260 PetscFunctionBegin; 261 262 if (reuse == MAT_REUSE_MATRIX) { 263 B = *newmat; 264 } else { 265 if (PetscDefined(USE_DEBUG) || !a->ilen) { 266 ierr = PetscMalloc1(m,&rowlengths);CHKERRQ(ierr); 267 for (i=0; i<m; i++) { 268 rowlengths[i] = ai[i+1] - ai[i]; 269 } 270 } 271 if (PetscDefined(USE_DEBUG) && a->ilen) { 272 PetscBool eq; 273 ierr = PetscMemcmp(rowlengths,a->ilen,m*sizeof(PetscInt),&eq);CHKERRQ(ierr); 274 if (!eq) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"SeqAIJ ilen array incorrect"); 275 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 276 rowlengths = a->ilen; 277 } else if (a->ilen) rowlengths = a->ilen; 278 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 279 ierr = MatSetSizes(B,m,n,m,n);CHKERRQ(ierr); 280 ierr = MatSetType(B,MATSEQSELL);CHKERRQ(ierr); 281 ierr = MatSeqSELLSetPreallocation(B,0,rowlengths);CHKERRQ(ierr); 282 if (rowlengths != a->ilen) {ierr = PetscFree(rowlengths);CHKERRQ(ierr);} 283 } 284 285 for (row=0; row<m; row++) { 286 ierr = MatGetRow_SeqAIJ(A,row,&ncols,(PetscInt**)&cols,(PetscScalar**)&vals);CHKERRQ(ierr); 287 ierr = MatSetValues_SeqSELL(B,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 288 ierr = MatRestoreRow_SeqAIJ(A,row,&ncols,(PetscInt**)&cols,(PetscScalar**)&vals);CHKERRQ(ierr); 289 } 290 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 291 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 292 B->rmap->bs = A->rmap->bs; 293 294 if (reuse == MAT_INPLACE_MATRIX) { 295 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 296 } else { 297 *newmat = B; 298 } 299 PetscFunctionReturn(0); 300 } 301 302 PetscErrorCode MatMult_SeqSELL(Mat A,Vec xx,Vec yy) 303 { 304 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 305 PetscScalar *y; 306 const PetscScalar *x; 307 const MatScalar *aval=a->val; 308 PetscInt totalslices=a->totalslices; 309 const PetscInt *acolidx=a->colidx; 310 PetscInt i,j; 311 PetscErrorCode ierr; 312 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 313 __m512d vec_x,vec_y,vec_vals; 314 __m256i vec_idx; 315 __mmask8 mask; 316 __m512d vec_x2,vec_y2,vec_vals2,vec_x3,vec_y3,vec_vals3,vec_x4,vec_y4,vec_vals4; 317 __m256i vec_idx2,vec_idx3,vec_idx4; 318 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 319 __m128i vec_idx; 320 __m256d vec_x,vec_y,vec_y2,vec_vals; 321 MatScalar yval; 322 PetscInt r,rows_left,row,nnz_in_row; 323 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 324 __m128d vec_x_tmp; 325 __m256d vec_x,vec_y,vec_y2,vec_vals; 326 MatScalar yval; 327 PetscInt r,rows_left,row,nnz_in_row; 328 #else 329 PetscScalar sum[8]; 330 #endif 331 332 #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 333 #pragma disjoint(*x,*y,*aval) 334 #endif 335 336 PetscFunctionBegin; 337 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 338 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 339 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 340 for (i=0; i<totalslices; i++) { /* loop over slices */ 341 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 342 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 343 344 vec_y = _mm512_setzero_pd(); 345 vec_y2 = _mm512_setzero_pd(); 346 vec_y3 = _mm512_setzero_pd(); 347 vec_y4 = _mm512_setzero_pd(); 348 349 j = a->sliidx[i]>>3; /* 8 bytes are read at each time, corresponding to a slice columnn */ 350 switch ((a->sliidx[i+1]-a->sliidx[i])/8 & 3) { 351 case 3: 352 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 353 acolidx += 8; aval += 8; 354 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 355 acolidx += 8; aval += 8; 356 AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3); 357 acolidx += 8; aval += 8; 358 j += 3; 359 break; 360 case 2: 361 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 362 acolidx += 8; aval += 8; 363 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 364 acolidx += 8; aval += 8; 365 j += 2; 366 break; 367 case 1: 368 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 369 acolidx += 8; aval += 8; 370 j += 1; 371 break; 372 } 373 #pragma novector 374 for (; j<(a->sliidx[i+1]>>3); j+=4) { 375 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 376 acolidx += 8; aval += 8; 377 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 378 acolidx += 8; aval += 8; 379 AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3); 380 acolidx += 8; aval += 8; 381 AVX512_Mult_Private(vec_idx4,vec_x4,vec_vals4,vec_y4); 382 acolidx += 8; aval += 8; 383 } 384 385 vec_y = _mm512_add_pd(vec_y,vec_y2); 386 vec_y = _mm512_add_pd(vec_y,vec_y3); 387 vec_y = _mm512_add_pd(vec_y,vec_y4); 388 if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */ 389 mask = (__mmask8)(0xff >> (8-(A->rmap->n & 0x07))); 390 _mm512_mask_storeu_pd(&y[8*i],mask,vec_y); 391 } else { 392 _mm512_storeu_pd(&y[8*i],vec_y); 393 } 394 } 395 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 396 for (i=0; i<totalslices; i++) { /* loop over full slices */ 397 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 398 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 399 400 /* last slice may have padding rows. Don't use vectorization. */ 401 if (i == totalslices-1 && (A->rmap->n & 0x07)) { 402 rows_left = A->rmap->n - 8*i; 403 for (r=0; r<rows_left; ++r) { 404 yval = (MatScalar)0; 405 row = 8*i + r; 406 nnz_in_row = a->rlen[row]; 407 for (j=0; j<nnz_in_row; ++j) yval += aval[8*j+r] * x[acolidx[8*j+r]]; 408 y[row] = yval; 409 } 410 break; 411 } 412 413 vec_y = _mm256_setzero_pd(); 414 vec_y2 = _mm256_setzero_pd(); 415 416 /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */ 417 #pragma novector 418 #pragma unroll(2) 419 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 420 AVX2_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 421 aval += 4; acolidx += 4; 422 AVX2_Mult_Private(vec_idx,vec_x,vec_vals,vec_y2); 423 aval += 4; acolidx += 4; 424 } 425 426 _mm256_storeu_pd(y+i*8,vec_y); 427 _mm256_storeu_pd(y+i*8+4,vec_y2); 428 } 429 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 430 for (i=0; i<totalslices; i++) { /* loop over full slices */ 431 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 432 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 433 434 vec_y = _mm256_setzero_pd(); 435 vec_y2 = _mm256_setzero_pd(); 436 437 /* last slice may have padding rows. Don't use vectorization. */ 438 if (i == totalslices-1 && (A->rmap->n & 0x07)) { 439 rows_left = A->rmap->n - 8*i; 440 for (r=0; r<rows_left; ++r) { 441 yval = (MatScalar)0; 442 row = 8*i + r; 443 nnz_in_row = a->rlen[row]; 444 for (j=0; j<nnz_in_row; ++j) yval += aval[8*j + r] * x[acolidx[8*j + r]]; 445 y[row] = yval; 446 } 447 break; 448 } 449 450 /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */ 451 #pragma novector 452 #pragma unroll(2) 453 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 454 vec_vals = _mm256_loadu_pd(aval); 455 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 456 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 457 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0); 458 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 459 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 460 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1); 461 vec_y = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y); 462 aval += 4; 463 464 vec_vals = _mm256_loadu_pd(aval); 465 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 466 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 467 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0); 468 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 469 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 470 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1); 471 vec_y2 = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y2); 472 aval += 4; 473 } 474 475 _mm256_storeu_pd(y + i*8, vec_y); 476 _mm256_storeu_pd(y + i*8 + 4, vec_y2); 477 } 478 #else 479 for (i=0; i<totalslices; i++) { /* loop over slices */ 480 for (j=0; j<8; j++) sum[j] = 0.0; 481 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 482 sum[0] += aval[j] * x[acolidx[j]]; 483 sum[1] += aval[j+1] * x[acolidx[j+1]]; 484 sum[2] += aval[j+2] * x[acolidx[j+2]]; 485 sum[3] += aval[j+3] * x[acolidx[j+3]]; 486 sum[4] += aval[j+4] * x[acolidx[j+4]]; 487 sum[5] += aval[j+5] * x[acolidx[j+5]]; 488 sum[6] += aval[j+6] * x[acolidx[j+6]]; 489 sum[7] += aval[j+7] * x[acolidx[j+7]]; 490 } 491 if (i == totalslices-1 && (A->rmap->n & 0x07)) { /* if last slice has padding rows */ 492 for (j=0; j<(A->rmap->n & 0x07); j++) y[8*i+j] = sum[j]; 493 } else { 494 for (j=0; j<8; j++) y[8*i+j] = sum[j]; 495 } 496 } 497 #endif 498 499 ierr = PetscLogFlops(2.0*a->nz-a->nonzerorowcnt);CHKERRQ(ierr); /* theoretical minimal FLOPs */ 500 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 501 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 502 PetscFunctionReturn(0); 503 } 504 505 #include <../src/mat/impls/aij/seq/ftn-kernels/fmultadd.h> 506 PetscErrorCode MatMultAdd_SeqSELL(Mat A,Vec xx,Vec yy,Vec zz) 507 { 508 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 509 PetscScalar *y,*z; 510 const PetscScalar *x; 511 const MatScalar *aval=a->val; 512 PetscInt totalslices=a->totalslices; 513 const PetscInt *acolidx=a->colidx; 514 PetscInt i,j; 515 PetscErrorCode ierr; 516 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 517 __m512d vec_x,vec_y,vec_vals; 518 __m256i vec_idx; 519 __mmask8 mask; 520 __m512d vec_x2,vec_y2,vec_vals2,vec_x3,vec_y3,vec_vals3,vec_x4,vec_y4,vec_vals4; 521 __m256i vec_idx2,vec_idx3,vec_idx4; 522 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 523 __m128d vec_x_tmp; 524 __m256d vec_x,vec_y,vec_y2,vec_vals; 525 MatScalar yval; 526 PetscInt r,row,nnz_in_row; 527 #else 528 PetscScalar sum[8]; 529 #endif 530 531 #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 532 #pragma disjoint(*x,*y,*aval) 533 #endif 534 535 PetscFunctionBegin; 536 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 537 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 538 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 539 for (i=0; i<totalslices; i++) { /* loop over slices */ 540 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 541 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 542 543 if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */ 544 mask = (__mmask8)(0xff >> (8-(A->rmap->n & 0x07))); 545 vec_y = _mm512_mask_loadu_pd(vec_y,mask,&y[8*i]); 546 } else { 547 vec_y = _mm512_loadu_pd(&y[8*i]); 548 } 549 vec_y2 = _mm512_setzero_pd(); 550 vec_y3 = _mm512_setzero_pd(); 551 vec_y4 = _mm512_setzero_pd(); 552 553 j = a->sliidx[i]>>3; /* 8 bytes are read at each time, corresponding to a slice columnn */ 554 switch ((a->sliidx[i+1]-a->sliidx[i])/8 & 3) { 555 case 3: 556 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 557 acolidx += 8; aval += 8; 558 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 559 acolidx += 8; aval += 8; 560 AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3); 561 acolidx += 8; aval += 8; 562 j += 3; 563 break; 564 case 2: 565 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 566 acolidx += 8; aval += 8; 567 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 568 acolidx += 8; aval += 8; 569 j += 2; 570 break; 571 case 1: 572 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 573 acolidx += 8; aval += 8; 574 j += 1; 575 break; 576 } 577 #pragma novector 578 for (; j<(a->sliidx[i+1]>>3); j+=4) { 579 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 580 acolidx += 8; aval += 8; 581 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 582 acolidx += 8; aval += 8; 583 AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3); 584 acolidx += 8; aval += 8; 585 AVX512_Mult_Private(vec_idx4,vec_x4,vec_vals4,vec_y4); 586 acolidx += 8; aval += 8; 587 } 588 589 vec_y = _mm512_add_pd(vec_y,vec_y2); 590 vec_y = _mm512_add_pd(vec_y,vec_y3); 591 vec_y = _mm512_add_pd(vec_y,vec_y4); 592 if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */ 593 _mm512_mask_storeu_pd(&z[8*i],mask,vec_y); 594 } else { 595 _mm512_storeu_pd(&z[8*i],vec_y); 596 } 597 } 598 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 599 for (i=0; i<totalslices; i++) { /* loop over full slices */ 600 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 601 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 602 603 /* last slice may have padding rows. Don't use vectorization. */ 604 if (i == totalslices-1 && (A->rmap->n & 0x07)) { 605 for (r=0; r<(A->rmap->n & 0x07); ++r) { 606 row = 8*i + r; 607 yval = (MatScalar)0.0; 608 nnz_in_row = a->rlen[row]; 609 for (j=0; j<nnz_in_row; ++j) yval += aval[8*j+r] * x[acolidx[8*j+r]]; 610 z[row] = y[row] + yval; 611 } 612 break; 613 } 614 615 vec_y = _mm256_loadu_pd(y+8*i); 616 vec_y2 = _mm256_loadu_pd(y+8*i+4); 617 618 /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */ 619 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 620 vec_vals = _mm256_loadu_pd(aval); 621 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 622 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 623 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0); 624 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 625 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 626 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1); 627 vec_y = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y); 628 aval += 4; 629 630 vec_vals = _mm256_loadu_pd(aval); 631 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 632 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 633 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0); 634 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 635 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 636 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1); 637 vec_y2 = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y2); 638 aval += 4; 639 } 640 641 _mm256_storeu_pd(z+i*8,vec_y); 642 _mm256_storeu_pd(z+i*8+4,vec_y2); 643 } 644 #else 645 for (i=0; i<totalslices; i++) { /* loop over slices */ 646 for (j=0; j<8; j++) sum[j] = 0.0; 647 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 648 sum[0] += aval[j] * x[acolidx[j]]; 649 sum[1] += aval[j+1] * x[acolidx[j+1]]; 650 sum[2] += aval[j+2] * x[acolidx[j+2]]; 651 sum[3] += aval[j+3] * x[acolidx[j+3]]; 652 sum[4] += aval[j+4] * x[acolidx[j+4]]; 653 sum[5] += aval[j+5] * x[acolidx[j+5]]; 654 sum[6] += aval[j+6] * x[acolidx[j+6]]; 655 sum[7] += aval[j+7] * x[acolidx[j+7]]; 656 } 657 if (i == totalslices-1 && (A->rmap->n & 0x07)) { 658 for (j=0; j<(A->rmap->n & 0x07); j++) z[8*i+j] = y[8*i+j] + sum[j]; 659 } else { 660 for (j=0; j<8; j++) z[8*i+j] = y[8*i+j] + sum[j]; 661 } 662 } 663 #endif 664 665 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 666 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 667 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 668 PetscFunctionReturn(0); 669 } 670 671 PetscErrorCode MatMultTransposeAdd_SeqSELL(Mat A,Vec xx,Vec zz,Vec yy) 672 { 673 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 674 PetscScalar *y; 675 const PetscScalar *x; 676 const MatScalar *aval=a->val; 677 const PetscInt *acolidx=a->colidx; 678 PetscInt i,j,r,row,nnz_in_row,totalslices=a->totalslices; 679 PetscErrorCode ierr; 680 681 #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 682 #pragma disjoint(*x,*y,*aval) 683 #endif 684 685 PetscFunctionBegin; 686 if (A->symmetric) { 687 ierr = MatMultAdd_SeqSELL(A,xx,zz,yy);CHKERRQ(ierr); 688 PetscFunctionReturn(0); 689 } 690 if (zz != yy) { ierr = VecCopy(zz,yy);CHKERRQ(ierr); } 691 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 692 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 693 for (i=0; i<a->totalslices; i++) { /* loop over slices */ 694 if (i == totalslices-1 && (A->rmap->n & 0x07)) { 695 for (r=0; r<(A->rmap->n & 0x07); ++r) { 696 row = 8*i + r; 697 nnz_in_row = a->rlen[row]; 698 for (j=0; j<nnz_in_row; ++j) y[acolidx[8*j+r]] += aval[8*j+r] * x[row]; 699 } 700 break; 701 } 702 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 703 y[acolidx[j]] += aval[j] * x[8*i]; 704 y[acolidx[j+1]] += aval[j+1] * x[8*i+1]; 705 y[acolidx[j+2]] += aval[j+2] * x[8*i+2]; 706 y[acolidx[j+3]] += aval[j+3] * x[8*i+3]; 707 y[acolidx[j+4]] += aval[j+4] * x[8*i+4]; 708 y[acolidx[j+5]] += aval[j+5] * x[8*i+5]; 709 y[acolidx[j+6]] += aval[j+6] * x[8*i+6]; 710 y[acolidx[j+7]] += aval[j+7] * x[8*i+7]; 711 } 712 } 713 ierr = PetscLogFlops(2.0*a->sliidx[a->totalslices]);CHKERRQ(ierr); 714 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 715 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 716 PetscFunctionReturn(0); 717 } 718 719 PetscErrorCode MatMultTranspose_SeqSELL(Mat A,Vec xx,Vec yy) 720 { 721 PetscErrorCode ierr; 722 723 PetscFunctionBegin; 724 if (A->symmetric) { 725 ierr = MatMult_SeqSELL(A,xx,yy);CHKERRQ(ierr); 726 } else { 727 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 728 ierr = MatMultTransposeAdd_SeqSELL(A,xx,yy,yy);CHKERRQ(ierr); 729 } 730 PetscFunctionReturn(0); 731 } 732 733 /* 734 Checks for missing diagonals 735 */ 736 PetscErrorCode MatMissingDiagonal_SeqSELL(Mat A,PetscBool *missing,PetscInt *d) 737 { 738 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 739 PetscInt *diag,i; 740 PetscErrorCode ierr; 741 742 PetscFunctionBegin; 743 *missing = PETSC_FALSE; 744 if (A->rmap->n > 0 && !(a->colidx)) { 745 *missing = PETSC_TRUE; 746 if (d) *d = 0; 747 ierr = PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");CHKERRQ(ierr); 748 } else { 749 diag = a->diag; 750 for (i=0; i<A->rmap->n; i++) { 751 if (diag[i] == -1) { 752 *missing = PETSC_TRUE; 753 if (d) *d = i; 754 ierr = PetscInfo1(A,"Matrix is missing diagonal number %" PetscInt_FMT "\n",i);CHKERRQ(ierr); 755 break; 756 } 757 } 758 } 759 PetscFunctionReturn(0); 760 } 761 762 PetscErrorCode MatMarkDiagonal_SeqSELL(Mat A) 763 { 764 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 765 PetscInt i,j,m=A->rmap->n,shift; 766 PetscErrorCode ierr; 767 768 PetscFunctionBegin; 769 if (!a->diag) { 770 ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr); 771 ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr); 772 a->free_diag = PETSC_TRUE; 773 } 774 for (i=0; i<m; i++) { /* loop over rows */ 775 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 776 a->diag[i] = -1; 777 for (j=0; j<a->rlen[i]; j++) { 778 if (a->colidx[shift+j*8] == i) { 779 a->diag[i] = shift+j*8; 780 break; 781 } 782 } 783 } 784 PetscFunctionReturn(0); 785 } 786 787 /* 788 Negative shift indicates do not generate an error if there is a zero diagonal, just invert it anyways 789 */ 790 PetscErrorCode MatInvertDiagonal_SeqSELL(Mat A,PetscScalar omega,PetscScalar fshift) 791 { 792 Mat_SeqSELL *a=(Mat_SeqSELL*) A->data; 793 PetscInt i,*diag,m = A->rmap->n; 794 MatScalar *val = a->val; 795 PetscScalar *idiag,*mdiag; 796 PetscErrorCode ierr; 797 798 PetscFunctionBegin; 799 if (a->idiagvalid) PetscFunctionReturn(0); 800 ierr = MatMarkDiagonal_SeqSELL(A);CHKERRQ(ierr); 801 diag = a->diag; 802 if (!a->idiag) { 803 ierr = PetscMalloc3(m,&a->idiag,m,&a->mdiag,m,&a->ssor_work);CHKERRQ(ierr); 804 ierr = PetscLogObjectMemory((PetscObject)A, 3*m*sizeof(PetscScalar));CHKERRQ(ierr); 805 val = a->val; 806 } 807 mdiag = a->mdiag; 808 idiag = a->idiag; 809 810 if (omega == 1.0 && PetscRealPart(fshift) <= 0.0) { 811 for (i=0; i<m; i++) { 812 mdiag[i] = val[diag[i]]; 813 if (!PetscAbsScalar(mdiag[i])) { /* zero diagonal */ 814 if (PetscRealPart(fshift)) { 815 ierr = PetscInfo1(A,"Zero diagonal on row %" PetscInt_FMT "\n",i);CHKERRQ(ierr); 816 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 817 A->factorerror_zeropivot_value = 0.0; 818 A->factorerror_zeropivot_row = i; 819 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Zero diagonal on row %" PetscInt_FMT,i); 820 } 821 idiag[i] = 1.0/val[diag[i]]; 822 } 823 ierr = PetscLogFlops(m);CHKERRQ(ierr); 824 } else { 825 for (i=0; i<m; i++) { 826 mdiag[i] = val[diag[i]]; 827 idiag[i] = omega/(fshift + val[diag[i]]); 828 } 829 ierr = PetscLogFlops(2.0*m);CHKERRQ(ierr); 830 } 831 a->idiagvalid = PETSC_TRUE; 832 PetscFunctionReturn(0); 833 } 834 835 PetscErrorCode MatZeroEntries_SeqSELL(Mat A) 836 { 837 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 838 PetscErrorCode ierr; 839 840 PetscFunctionBegin; 841 ierr = PetscArrayzero(a->val,a->sliidx[a->totalslices]);CHKERRQ(ierr); 842 ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr); 843 PetscFunctionReturn(0); 844 } 845 846 PetscErrorCode MatDestroy_SeqSELL(Mat A) 847 { 848 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 849 PetscErrorCode ierr; 850 851 PetscFunctionBegin; 852 #if defined(PETSC_USE_LOG) 853 PetscLogObjectState((PetscObject)A,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT ", NZ=%" PetscInt_FMT,A->rmap->n,A->cmap->n,a->nz); 854 #endif 855 ierr = MatSeqXSELLFreeSELL(A,&a->val,&a->colidx);CHKERRQ(ierr); 856 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 857 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 858 ierr = PetscFree(a->diag);CHKERRQ(ierr); 859 ierr = PetscFree(a->rlen);CHKERRQ(ierr); 860 ierr = PetscFree(a->sliidx);CHKERRQ(ierr); 861 ierr = PetscFree3(a->idiag,a->mdiag,a->ssor_work);CHKERRQ(ierr); 862 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 863 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 864 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 865 ierr = PetscFree2(a->getrowcols,a->getrowvals);CHKERRQ(ierr); 866 867 ierr = PetscFree(A->data);CHKERRQ(ierr); 868 869 ierr = PetscObjectChangeTypeName((PetscObject)A,NULL);CHKERRQ(ierr); 870 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 871 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 872 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",NULL);CHKERRQ(ierr); 873 PetscFunctionReturn(0); 874 } 875 876 PetscErrorCode MatSetOption_SeqSELL(Mat A,MatOption op,PetscBool flg) 877 { 878 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 879 PetscErrorCode ierr; 880 881 PetscFunctionBegin; 882 switch (op) { 883 case MAT_ROW_ORIENTED: 884 a->roworiented = flg; 885 break; 886 case MAT_KEEP_NONZERO_PATTERN: 887 a->keepnonzeropattern = flg; 888 break; 889 case MAT_NEW_NONZERO_LOCATIONS: 890 a->nonew = (flg ? 0 : 1); 891 break; 892 case MAT_NEW_NONZERO_LOCATION_ERR: 893 a->nonew = (flg ? -1 : 0); 894 break; 895 case MAT_NEW_NONZERO_ALLOCATION_ERR: 896 a->nonew = (flg ? -2 : 0); 897 break; 898 case MAT_UNUSED_NONZERO_LOCATION_ERR: 899 a->nounused = (flg ? -1 : 0); 900 break; 901 case MAT_FORCE_DIAGONAL_ENTRIES: 902 case MAT_IGNORE_OFF_PROC_ENTRIES: 903 case MAT_USE_HASH_TABLE: 904 case MAT_SORTED_FULL: 905 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 906 break; 907 case MAT_SPD: 908 case MAT_SYMMETRIC: 909 case MAT_STRUCTURALLY_SYMMETRIC: 910 case MAT_HERMITIAN: 911 case MAT_SYMMETRY_ETERNAL: 912 /* These options are handled directly by MatSetOption() */ 913 break; 914 default: 915 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 916 } 917 PetscFunctionReturn(0); 918 } 919 920 PetscErrorCode MatGetDiagonal_SeqSELL(Mat A,Vec v) 921 { 922 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 923 PetscInt i,j,n,shift; 924 PetscScalar *x,zero=0.0; 925 PetscErrorCode ierr; 926 927 PetscFunctionBegin; 928 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 929 if (n != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 930 931 if (A->factortype == MAT_FACTOR_ILU || A->factortype == MAT_FACTOR_LU) { 932 PetscInt *diag=a->diag; 933 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 934 for (i=0; i<n; i++) x[i] = 1.0/a->val[diag[i]]; 935 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 936 PetscFunctionReturn(0); 937 } 938 939 ierr = VecSet(v,zero);CHKERRQ(ierr); 940 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 941 for (i=0; i<n; i++) { /* loop over rows */ 942 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 943 x[i] = 0; 944 for (j=0; j<a->rlen[i]; j++) { 945 if (a->colidx[shift+j*8] == i) { 946 x[i] = a->val[shift+j*8]; 947 break; 948 } 949 } 950 } 951 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 952 PetscFunctionReturn(0); 953 } 954 955 PetscErrorCode MatDiagonalScale_SeqSELL(Mat A,Vec ll,Vec rr) 956 { 957 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 958 const PetscScalar *l,*r; 959 PetscInt i,j,m,n,row; 960 PetscErrorCode ierr; 961 962 PetscFunctionBegin; 963 if (ll) { 964 /* The local size is used so that VecMPI can be passed to this routine 965 by MatDiagonalScale_MPISELL */ 966 ierr = VecGetLocalSize(ll,&m);CHKERRQ(ierr); 967 if (m != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length"); 968 ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr); 969 for (i=0; i<a->totalslices; i++) { /* loop over slices */ 970 if (i == a->totalslices-1 && (A->rmap->n & 0x07)) { /* if last slice has padding rows */ 971 for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) { 972 if (row < (A->rmap->n & 0x07)) a->val[j] *= l[8*i+row]; 973 } 974 } else { 975 for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) { 976 a->val[j] *= l[8*i+row]; 977 } 978 } 979 } 980 ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr); 981 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 982 } 983 if (rr) { 984 ierr = VecGetLocalSize(rr,&n);CHKERRQ(ierr); 985 if (n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length"); 986 ierr = VecGetArrayRead(rr,&r);CHKERRQ(ierr); 987 for (i=0; i<a->totalslices; i++) { /* loop over slices */ 988 if (i == a->totalslices-1 && (A->rmap->n & 0x07)) { /* if last slice has padding rows */ 989 for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) { 990 if (row < (A->rmap->n & 0x07)) a->val[j] *= r[a->colidx[j]]; 991 } 992 } else { 993 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j++) { 994 a->val[j] *= r[a->colidx[j]]; 995 } 996 } 997 } 998 ierr = VecRestoreArrayRead(rr,&r);CHKERRQ(ierr); 999 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 1000 } 1001 ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr); 1002 PetscFunctionReturn(0); 1003 } 1004 1005 extern PetscErrorCode MatSetValues_SeqSELL(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 1006 1007 PetscErrorCode MatGetValues_SeqSELL(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1008 { 1009 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1010 PetscInt *cp,i,k,low,high,t,row,col,l; 1011 PetscInt shift; 1012 MatScalar *vp; 1013 1014 PetscFunctionBegin; 1015 for (k=0; k<m; k++) { /* loop over requested rows */ 1016 row = im[k]; 1017 if (row<0) continue; 1018 if (PetscUnlikelyDebug(row >= A->rmap->n)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,row,A->rmap->n-1); 1019 shift = a->sliidx[row>>3]+(row&0x07); /* starting index of the row */ 1020 cp = a->colidx+shift; /* pointer to the row */ 1021 vp = a->val+shift; /* pointer to the row */ 1022 for (l=0; l<n; l++) { /* loop over requested columns */ 1023 col = in[l]; 1024 if (col<0) continue; 1025 if (PetscUnlikelyDebug(col >= A->cmap->n)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: row %" PetscInt_FMT " max %" PetscInt_FMT,col,A->cmap->n-1); 1026 high = a->rlen[row]; low = 0; /* assume unsorted */ 1027 while (high-low > 5) { 1028 t = (low+high)/2; 1029 if (*(cp+t*8) > col) high = t; 1030 else low = t; 1031 } 1032 for (i=low; i<high; i++) { 1033 if (*(cp+8*i) > col) break; 1034 if (*(cp+8*i) == col) { 1035 *v++ = *(vp+8*i); 1036 goto finished; 1037 } 1038 } 1039 *v++ = 0.0; 1040 finished:; 1041 } 1042 } 1043 PetscFunctionReturn(0); 1044 } 1045 1046 PetscErrorCode MatView_SeqSELL_ASCII(Mat A,PetscViewer viewer) 1047 { 1048 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1049 PetscInt i,j,m=A->rmap->n,shift; 1050 const char *name; 1051 PetscViewerFormat format; 1052 PetscErrorCode ierr; 1053 1054 PetscFunctionBegin; 1055 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1056 if (format == PETSC_VIEWER_ASCII_MATLAB) { 1057 PetscInt nofinalvalue = 0; 1058 /* 1059 if (m && ((a->i[m] == a->i[m-1]) || (a->j[a->nz-1] != A->cmap->n-1))) { 1060 nofinalvalue = 1; 1061 } 1062 */ 1063 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1064 ierr = PetscViewerASCIIPrintf(viewer,"%% Size = %" PetscInt_FMT " %" PetscInt_FMT " \n",m,A->cmap->n);CHKERRQ(ierr); 1065 ierr = PetscViewerASCIIPrintf(viewer,"%% Nonzeros = %" PetscInt_FMT " \n",a->nz);CHKERRQ(ierr); 1066 #if defined(PETSC_USE_COMPLEX) 1067 ierr = PetscViewerASCIIPrintf(viewer,"zzz = zeros(%" PetscInt_FMT ",4);\n",a->nz+nofinalvalue);CHKERRQ(ierr); 1068 #else 1069 ierr = PetscViewerASCIIPrintf(viewer,"zzz = zeros(%" PetscInt_FMT ",3);\n",a->nz+nofinalvalue);CHKERRQ(ierr); 1070 #endif 1071 ierr = PetscViewerASCIIPrintf(viewer,"zzz = [\n");CHKERRQ(ierr); 1072 1073 for (i=0; i<m; i++) { 1074 shift = a->sliidx[i>>3]+(i&0x07); 1075 for (j=0; j<a->rlen[i]; j++) { 1076 #if defined(PETSC_USE_COMPLEX) 1077 ierr = PetscViewerASCIIPrintf(viewer,"%" PetscInt_FMT " %" PetscInt_FMT " %18.16e %18.16e\n",i+1,a->colidx[shift+8*j]+1,(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1078 #else 1079 ierr = PetscViewerASCIIPrintf(viewer,"%" PetscInt_FMT " %" PetscInt_FMT " %18.16e\n",i+1,a->colidx[shift+8*j]+1,(double)a->val[shift+8*j]);CHKERRQ(ierr); 1080 #endif 1081 } 1082 } 1083 /* 1084 if (nofinalvalue) { 1085 #if defined(PETSC_USE_COMPLEX) 1086 ierr = PetscViewerASCIIPrintf(viewer,"%" PetscInt_FMT " %" PetscInt_FMT " %18.16e %18.16e\n",m,A->cmap->n,0.,0.);CHKERRQ(ierr); 1087 #else 1088 ierr = PetscViewerASCIIPrintf(viewer,"%" PetscInt_FMT " %" PetscInt_FMT " %18.16e\n",m,A->cmap->n,0.0);CHKERRQ(ierr); 1089 #endif 1090 } 1091 */ 1092 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 1093 ierr = PetscViewerASCIIPrintf(viewer,"];\n %s = spconvert(zzz);\n",name);CHKERRQ(ierr); 1094 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1095 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) { 1096 PetscFunctionReturn(0); 1097 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1098 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1099 for (i=0; i<m; i++) { 1100 ierr = PetscViewerASCIIPrintf(viewer,"row %" PetscInt_FMT ":",i);CHKERRQ(ierr); 1101 shift = a->sliidx[i>>3]+(i&0x07); 1102 for (j=0; j<a->rlen[i]; j++) { 1103 #if defined(PETSC_USE_COMPLEX) 1104 if (PetscImaginaryPart(a->val[shift+8*j]) > 0.0 && PetscRealPart(a->val[shift+8*j]) != 0.0) { 1105 ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g + %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1106 } else if (PetscImaginaryPart(a->val[shift+8*j]) < 0.0 && PetscRealPart(a->val[shift+8*j]) != 0.0) { 1107 ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g - %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)-PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1108 } else if (PetscRealPart(a->val[shift+8*j]) != 0.0) { 1109 ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]));CHKERRQ(ierr); 1110 } 1111 #else 1112 if (a->val[shift+8*j] != 0.0) {ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",a->colidx[shift+8*j],(double)a->val[shift+8*j]);CHKERRQ(ierr);} 1113 #endif 1114 } 1115 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1116 } 1117 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1118 } else if (format == PETSC_VIEWER_ASCII_DENSE) { 1119 PetscInt cnt=0,jcnt; 1120 PetscScalar value; 1121 #if defined(PETSC_USE_COMPLEX) 1122 PetscBool realonly=PETSC_TRUE; 1123 for (i=0; i<a->sliidx[a->totalslices]; i++) { 1124 if (PetscImaginaryPart(a->val[i]) != 0.0) { 1125 realonly = PETSC_FALSE; 1126 break; 1127 } 1128 } 1129 #endif 1130 1131 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1132 for (i=0; i<m; i++) { 1133 jcnt = 0; 1134 shift = a->sliidx[i>>3]+(i&0x07); 1135 for (j=0; j<A->cmap->n; j++) { 1136 if (jcnt < a->rlen[i] && j == a->colidx[shift+8*j]) { 1137 value = a->val[cnt++]; 1138 jcnt++; 1139 } else { 1140 value = 0.0; 1141 } 1142 #if defined(PETSC_USE_COMPLEX) 1143 if (realonly) { 1144 ierr = PetscViewerASCIIPrintf(viewer," %7.5e ",(double)PetscRealPart(value));CHKERRQ(ierr); 1145 } else { 1146 ierr = PetscViewerASCIIPrintf(viewer," %7.5e+%7.5e i ",(double)PetscRealPart(value),(double)PetscImaginaryPart(value));CHKERRQ(ierr); 1147 } 1148 #else 1149 ierr = PetscViewerASCIIPrintf(viewer," %7.5e ",(double)value);CHKERRQ(ierr); 1150 #endif 1151 } 1152 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1153 } 1154 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1155 } else if (format == PETSC_VIEWER_ASCII_MATRIXMARKET) { 1156 PetscInt fshift=1; 1157 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1158 #if defined(PETSC_USE_COMPLEX) 1159 ierr = PetscViewerASCIIPrintf(viewer,"%%%%MatrixMarket matrix coordinate complex general\n");CHKERRQ(ierr); 1160 #else 1161 ierr = PetscViewerASCIIPrintf(viewer,"%%%%MatrixMarket matrix coordinate real general\n");CHKERRQ(ierr); 1162 #endif 1163 ierr = PetscViewerASCIIPrintf(viewer,"%" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n", m, A->cmap->n, a->nz);CHKERRQ(ierr); 1164 for (i=0; i<m; i++) { 1165 shift = a->sliidx[i>>3]+(i&0x07); 1166 for (j=0; j<a->rlen[i]; j++) { 1167 #if defined(PETSC_USE_COMPLEX) 1168 ierr = PetscViewerASCIIPrintf(viewer,"%" PetscInt_FMT " %" PetscInt_FMT " %g %g\n",i+fshift,a->colidx[shift+8*j]+fshift,(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1169 #else 1170 ierr = PetscViewerASCIIPrintf(viewer,"%" PetscInt_FMT " %" PetscInt_FMT " %g\n",i+fshift,a->colidx[shift+8*j]+fshift,(double)a->val[shift+8*j]);CHKERRQ(ierr); 1171 #endif 1172 } 1173 } 1174 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1175 } else if (format == PETSC_VIEWER_NATIVE) { 1176 for (i=0; i<a->totalslices; i++) { /* loop over slices */ 1177 PetscInt row; 1178 ierr = PetscViewerASCIIPrintf(viewer,"slice %" PetscInt_FMT ": %" PetscInt_FMT " %" PetscInt_FMT "\n",i,a->sliidx[i],a->sliidx[i+1]);CHKERRQ(ierr); 1179 for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) { 1180 #if defined(PETSC_USE_COMPLEX) 1181 if (PetscImaginaryPart(a->val[j]) > 0.0) { 1182 ierr = PetscViewerASCIIPrintf(viewer," %" PetscInt_FMT " %" PetscInt_FMT " %g + %g i\n",8*i+row,a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr); 1183 } else if (PetscImaginaryPart(a->val[j]) < 0.0) { 1184 ierr = PetscViewerASCIIPrintf(viewer," %" PetscInt_FMT " %" PetscInt_FMT " %g - %g i\n",8*i+row,a->colidx[j],(double)PetscRealPart(a->val[j]),-(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr); 1185 } else { 1186 ierr = PetscViewerASCIIPrintf(viewer," %" PetscInt_FMT " %" PetscInt_FMT " %g\n",8*i+row,a->colidx[j],(double)PetscRealPart(a->val[j]));CHKERRQ(ierr); 1187 } 1188 #else 1189 ierr = PetscViewerASCIIPrintf(viewer," %" PetscInt_FMT " %" PetscInt_FMT " %g\n",8*i+row,a->colidx[j],(double)a->val[j]);CHKERRQ(ierr); 1190 #endif 1191 } 1192 } 1193 } else { 1194 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1195 if (A->factortype) { 1196 for (i=0; i<m; i++) { 1197 shift = a->sliidx[i>>3]+(i&0x07); 1198 ierr = PetscViewerASCIIPrintf(viewer,"row %" PetscInt_FMT ":",i);CHKERRQ(ierr); 1199 /* L part */ 1200 for (j=shift; j<a->diag[i]; j+=8) { 1201 #if defined(PETSC_USE_COMPLEX) 1202 if (PetscImaginaryPart(a->val[shift+8*j]) > 0.0) { 1203 ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g + %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr); 1204 } else if (PetscImaginaryPart(a->val[shift+8*j]) < 0.0) { 1205 ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g - %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)(-PetscImaginaryPart(a->val[j])));CHKERRQ(ierr); 1206 } else { 1207 ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",a->colidx[j],(double)PetscRealPart(a->val[j]));CHKERRQ(ierr); 1208 } 1209 #else 1210 ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",a->colidx[j],(double)a->val[j]);CHKERRQ(ierr); 1211 #endif 1212 } 1213 /* diagonal */ 1214 j = a->diag[i]; 1215 #if defined(PETSC_USE_COMPLEX) 1216 if (PetscImaginaryPart(a->val[j]) > 0.0) { 1217 ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g + %g i)",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]),(double)PetscImaginaryPart(1.0/a->val[j]));CHKERRQ(ierr); 1218 } else if (PetscImaginaryPart(a->val[j]) < 0.0) { 1219 ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g - %g i)",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]),(double)(-PetscImaginaryPart(1.0/a->val[j])));CHKERRQ(ierr); 1220 } else { 1221 ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]));CHKERRQ(ierr); 1222 } 1223 #else 1224 ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",a->colidx[j],(double)(1.0/a->val[j]));CHKERRQ(ierr); 1225 #endif 1226 1227 /* U part */ 1228 for (j=a->diag[i]+1; j<shift+8*a->rlen[i]; j+=8) { 1229 #if defined(PETSC_USE_COMPLEX) 1230 if (PetscImaginaryPart(a->val[j]) > 0.0) { 1231 ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g + %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr); 1232 } else if (PetscImaginaryPart(a->val[j]) < 0.0) { 1233 ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g - %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)(-PetscImaginaryPart(a->val[j])));CHKERRQ(ierr); 1234 } else { 1235 ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",a->colidx[j],(double)PetscRealPart(a->val[j]));CHKERRQ(ierr); 1236 } 1237 #else 1238 ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",a->colidx[j],(double)a->val[j]);CHKERRQ(ierr); 1239 #endif 1240 } 1241 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1242 } 1243 } else { 1244 for (i=0; i<m; i++) { 1245 shift = a->sliidx[i>>3]+(i&0x07); 1246 ierr = PetscViewerASCIIPrintf(viewer,"row %" PetscInt_FMT ":",i);CHKERRQ(ierr); 1247 for (j=0; j<a->rlen[i]; j++) { 1248 #if defined(PETSC_USE_COMPLEX) 1249 if (PetscImaginaryPart(a->val[j]) > 0.0) { 1250 ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g + %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1251 } else if (PetscImaginaryPart(a->val[j]) < 0.0) { 1252 ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g - %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)-PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1253 } else { 1254 ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]));CHKERRQ(ierr); 1255 } 1256 #else 1257 ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",a->colidx[shift+8*j],(double)a->val[shift+8*j]);CHKERRQ(ierr); 1258 #endif 1259 } 1260 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1261 } 1262 } 1263 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1264 } 1265 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1266 PetscFunctionReturn(0); 1267 } 1268 1269 #include <petscdraw.h> 1270 PetscErrorCode MatView_SeqSELL_Draw_Zoom(PetscDraw draw,void *Aa) 1271 { 1272 Mat A=(Mat)Aa; 1273 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1274 PetscInt i,j,m=A->rmap->n,shift; 1275 int color; 1276 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1277 PetscViewer viewer; 1278 PetscViewerFormat format; 1279 PetscErrorCode ierr; 1280 1281 PetscFunctionBegin; 1282 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1283 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1284 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1285 1286 /* loop over matrix elements drawing boxes */ 1287 1288 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1289 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1290 /* Blue for negative, Cyan for zero and Red for positive */ 1291 color = PETSC_DRAW_BLUE; 1292 for (i=0; i<m; i++) { 1293 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1294 y_l = m - i - 1.0; y_r = y_l + 1.0; 1295 for (j=0; j<a->rlen[i]; j++) { 1296 x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0; 1297 if (PetscRealPart(a->val[shift+8*j]) >= 0.) continue; 1298 ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr); 1299 } 1300 } 1301 color = PETSC_DRAW_CYAN; 1302 for (i=0; i<m; i++) { 1303 shift = a->sliidx[i>>3]+(i&0x07); 1304 y_l = m - i - 1.0; y_r = y_l + 1.0; 1305 for (j=0; j<a->rlen[i]; j++) { 1306 x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0; 1307 if (a->val[shift+8*j] != 0.) continue; 1308 ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr); 1309 } 1310 } 1311 color = PETSC_DRAW_RED; 1312 for (i=0; i<m; i++) { 1313 shift = a->sliidx[i>>3]+(i&0x07); 1314 y_l = m - i - 1.0; y_r = y_l + 1.0; 1315 for (j=0; j<a->rlen[i]; j++) { 1316 x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0; 1317 if (PetscRealPart(a->val[shift+8*j]) <= 0.) continue; 1318 ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr); 1319 } 1320 } 1321 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1322 } else { 1323 /* use contour shading to indicate magnitude of values */ 1324 /* first determine max of all nonzero values */ 1325 PetscReal minv=0.0,maxv=0.0; 1326 PetscInt count=0; 1327 PetscDraw popup; 1328 for (i=0; i<a->sliidx[a->totalslices]; i++) { 1329 if (PetscAbsScalar(a->val[i]) > maxv) maxv = PetscAbsScalar(a->val[i]); 1330 } 1331 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1332 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1333 ierr = PetscDrawScalePopup(popup,minv,maxv);CHKERRQ(ierr); 1334 1335 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1336 for (i=0; i<m; i++) { 1337 shift = a->sliidx[i>>3]+(i&0x07); 1338 y_l = m - i - 1.0; 1339 y_r = y_l + 1.0; 1340 for (j=0; j<a->rlen[i]; j++) { 1341 x_l = a->colidx[shift+j*8]; 1342 x_r = x_l + 1.0; 1343 color = PetscDrawRealToColor(PetscAbsScalar(a->val[count]),minv,maxv); 1344 ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr); 1345 count++; 1346 } 1347 } 1348 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1349 } 1350 PetscFunctionReturn(0); 1351 } 1352 1353 #include <petscdraw.h> 1354 PetscErrorCode MatView_SeqSELL_Draw(Mat A,PetscViewer viewer) 1355 { 1356 PetscDraw draw; 1357 PetscReal xr,yr,xl,yl,h,w; 1358 PetscBool isnull; 1359 PetscErrorCode ierr; 1360 1361 PetscFunctionBegin; 1362 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1363 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1364 if (isnull) PetscFunctionReturn(0); 1365 1366 xr = A->cmap->n; yr = A->rmap->n; h = yr/10.0; w = xr/10.0; 1367 xr += w; yr += h; xl = -w; yl = -h; 1368 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1369 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1370 ierr = PetscDrawZoom(draw,MatView_SeqSELL_Draw_Zoom,A);CHKERRQ(ierr); 1371 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1372 ierr = PetscDrawSave(draw);CHKERRQ(ierr); 1373 PetscFunctionReturn(0); 1374 } 1375 1376 PetscErrorCode MatView_SeqSELL(Mat A,PetscViewer viewer) 1377 { 1378 PetscBool iascii,isbinary,isdraw; 1379 PetscErrorCode ierr; 1380 1381 PetscFunctionBegin; 1382 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1383 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1384 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1385 if (iascii) { 1386 ierr = MatView_SeqSELL_ASCII(A,viewer);CHKERRQ(ierr); 1387 } else if (isbinary) { 1388 /* ierr = MatView_SeqSELL_Binary(A,viewer);CHKERRQ(ierr); */ 1389 } else if (isdraw) { 1390 ierr = MatView_SeqSELL_Draw(A,viewer);CHKERRQ(ierr); 1391 } 1392 PetscFunctionReturn(0); 1393 } 1394 1395 PetscErrorCode MatAssemblyEnd_SeqSELL(Mat A,MatAssemblyType mode) 1396 { 1397 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1398 PetscInt i,shift,row_in_slice,row,nrow,*cp,lastcol,j,k; 1399 MatScalar *vp; 1400 PetscErrorCode ierr; 1401 1402 PetscFunctionBegin; 1403 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1404 /* To do: compress out the unused elements */ 1405 ierr = MatMarkDiagonal_SeqSELL(A);CHKERRQ(ierr); 1406 ierr = PetscInfo6(A,"Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT "; storage space: %" PetscInt_FMT " allocated %" PetscInt_FMT " used (%" PetscInt_FMT " nonzeros+%" PetscInt_FMT " paddedzeros)\n",A->rmap->n,A->cmap->n,a->maxallocmat,a->sliidx[a->totalslices],a->nz,a->sliidx[a->totalslices]-a->nz);CHKERRQ(ierr); 1407 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues() is %" PetscInt_FMT "\n",a->reallocs);CHKERRQ(ierr); 1408 ierr = PetscInfo1(A,"Maximum nonzeros in any row is %" PetscInt_FMT "\n",a->rlenmax);CHKERRQ(ierr); 1409 /* Set unused slots for column indices to last valid column index. Set unused slots for values to zero. This allows for a use of unmasked intrinsics -> higher performance */ 1410 for (i=0; i<a->totalslices; ++i) { 1411 shift = a->sliidx[i]; /* starting index of the slice */ 1412 cp = a->colidx+shift; /* pointer to the column indices of the slice */ 1413 vp = a->val+shift; /* pointer to the nonzero values of the slice */ 1414 for (row_in_slice=0; row_in_slice<8; ++row_in_slice) { /* loop over rows in the slice */ 1415 row = 8*i + row_in_slice; 1416 nrow = a->rlen[row]; /* number of nonzeros in row */ 1417 /* 1418 Search for the nearest nonzero. Normally setting the index to zero may cause extra communication. 1419 But if the entire slice are empty, it is fine to use 0 since the index will not be loaded. 1420 */ 1421 lastcol = 0; 1422 if (nrow>0) { /* nonempty row */ 1423 lastcol = cp[8*(nrow-1)+row_in_slice]; /* use the index from the last nonzero at current row */ 1424 } else if (!row_in_slice) { /* first row of the currect slice is empty */ 1425 for (j=1;j<8;j++) { 1426 if (a->rlen[8*i+j]) { 1427 lastcol = cp[j]; 1428 break; 1429 } 1430 } 1431 } else { 1432 if (a->sliidx[i+1] != shift) lastcol = cp[row_in_slice-1]; /* use the index from the previous row */ 1433 } 1434 1435 for (k=nrow; k<(a->sliidx[i+1]-shift)/8; ++k) { 1436 cp[8*k+row_in_slice] = lastcol; 1437 vp[8*k+row_in_slice] = (MatScalar)0; 1438 } 1439 } 1440 } 1441 1442 A->info.mallocs += a->reallocs; 1443 a->reallocs = 0; 1444 1445 ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr); 1446 PetscFunctionReturn(0); 1447 } 1448 1449 PetscErrorCode MatGetInfo_SeqSELL(Mat A,MatInfoType flag,MatInfo *info) 1450 { 1451 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1452 1453 PetscFunctionBegin; 1454 info->block_size = 1.0; 1455 info->nz_allocated = a->maxallocmat; 1456 info->nz_used = a->sliidx[a->totalslices]; /* include padding zeros */ 1457 info->nz_unneeded = (a->maxallocmat-a->sliidx[a->totalslices]); 1458 info->assemblies = A->num_ass; 1459 info->mallocs = A->info.mallocs; 1460 info->memory = ((PetscObject)A)->mem; 1461 if (A->factortype) { 1462 info->fill_ratio_given = A->info.fill_ratio_given; 1463 info->fill_ratio_needed = A->info.fill_ratio_needed; 1464 info->factor_mallocs = A->info.factor_mallocs; 1465 } else { 1466 info->fill_ratio_given = 0; 1467 info->fill_ratio_needed = 0; 1468 info->factor_mallocs = 0; 1469 } 1470 PetscFunctionReturn(0); 1471 } 1472 1473 PetscErrorCode MatSetValues_SeqSELL(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1474 { 1475 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1476 PetscInt shift,i,k,l,low,high,t,ii,row,col,nrow; 1477 PetscInt *cp,nonew=a->nonew,lastcol=-1; 1478 MatScalar *vp,value; 1479 PetscErrorCode ierr; 1480 1481 PetscFunctionBegin; 1482 for (k=0; k<m; k++) { /* loop over added rows */ 1483 row = im[k]; 1484 if (row < 0) continue; 1485 if (PetscUnlikelyDebug(row >= A->rmap->n)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,row,A->rmap->n-1); 1486 shift = a->sliidx[row>>3]+(row&0x07); /* starting index of the row */ 1487 cp = a->colidx+shift; /* pointer to the row */ 1488 vp = a->val+shift; /* pointer to the row */ 1489 nrow = a->rlen[row]; 1490 low = 0; 1491 high = nrow; 1492 1493 for (l=0; l<n; l++) { /* loop over added columns */ 1494 col = in[l]; 1495 if (col<0) continue; 1496 if (PetscUnlikelyDebug(col >= A->cmap->n)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Col too large: row %" PetscInt_FMT " max %" PetscInt_FMT,col,A->cmap->n-1); 1497 if (a->roworiented) { 1498 value = v[l+k*n]; 1499 } else { 1500 value = v[k+l*m]; 1501 } 1502 if ((value == 0.0 && a->ignorezeroentries) && (is == ADD_VALUES)) continue; 1503 1504 /* search in this row for the specified colmun, i indicates the column to be set */ 1505 if (col <= lastcol) low = 0; 1506 else high = nrow; 1507 lastcol = col; 1508 while (high-low > 5) { 1509 t = (low+high)/2; 1510 if (*(cp+t*8) > col) high = t; 1511 else low = t; 1512 } 1513 for (i=low; i<high; i++) { 1514 if (*(cp+i*8) > col) break; 1515 if (*(cp+i*8) == col) { 1516 if (is == ADD_VALUES) *(vp+i*8) += value; 1517 else *(vp+i*8) = value; 1518 low = i + 1; 1519 goto noinsert; 1520 } 1521 } 1522 if (value == 0.0 && a->ignorezeroentries) goto noinsert; 1523 if (nonew == 1) goto noinsert; 1524 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col); 1525 /* If the current row length exceeds the slice width (e.g. nrow==slice_width), allocate a new space, otherwise do nothing */ 1526 MatSeqXSELLReallocateSELL(A,A->rmap->n,1,nrow,a->sliidx,row/8,row,col,a->colidx,a->val,cp,vp,nonew,MatScalar); 1527 /* add the new nonzero to the high position, shift the remaining elements in current row to the right by one slot */ 1528 for (ii=nrow-1; ii>=i; ii--) { 1529 *(cp+(ii+1)*8) = *(cp+ii*8); 1530 *(vp+(ii+1)*8) = *(vp+ii*8); 1531 } 1532 a->rlen[row]++; 1533 *(cp+i*8) = col; 1534 *(vp+i*8) = value; 1535 a->nz++; 1536 A->nonzerostate++; 1537 low = i+1; high++; nrow++; 1538 noinsert:; 1539 } 1540 a->rlen[row] = nrow; 1541 } 1542 PetscFunctionReturn(0); 1543 } 1544 1545 PetscErrorCode MatCopy_SeqSELL(Mat A,Mat B,MatStructure str) 1546 { 1547 PetscErrorCode ierr; 1548 1549 PetscFunctionBegin; 1550 /* If the two matrices have the same copy implementation, use fast copy. */ 1551 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 1552 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1553 Mat_SeqSELL *b=(Mat_SeqSELL*)B->data; 1554 1555 if (a->sliidx[a->totalslices] != b->sliidx[b->totalslices]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 1556 ierr = PetscArraycpy(b->val,a->val,a->sliidx[a->totalslices]);CHKERRQ(ierr); 1557 } else { 1558 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1559 } 1560 PetscFunctionReturn(0); 1561 } 1562 1563 PetscErrorCode MatSetUp_SeqSELL(Mat A) 1564 { 1565 PetscErrorCode ierr; 1566 1567 PetscFunctionBegin; 1568 ierr = MatSeqSELLSetPreallocation(A,PETSC_DEFAULT,NULL);CHKERRQ(ierr); 1569 PetscFunctionReturn(0); 1570 } 1571 1572 PetscErrorCode MatSeqSELLGetArray_SeqSELL(Mat A,PetscScalar *array[]) 1573 { 1574 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1575 1576 PetscFunctionBegin; 1577 *array = a->val; 1578 PetscFunctionReturn(0); 1579 } 1580 1581 PetscErrorCode MatSeqSELLRestoreArray_SeqSELL(Mat A,PetscScalar *array[]) 1582 { 1583 PetscFunctionBegin; 1584 PetscFunctionReturn(0); 1585 } 1586 1587 PetscErrorCode MatRealPart_SeqSELL(Mat A) 1588 { 1589 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1590 PetscInt i; 1591 MatScalar *aval=a->val; 1592 1593 PetscFunctionBegin; 1594 for (i=0; i<a->sliidx[a->totalslices]; i++) aval[i]=PetscRealPart(aval[i]); 1595 PetscFunctionReturn(0); 1596 } 1597 1598 PetscErrorCode MatImaginaryPart_SeqSELL(Mat A) 1599 { 1600 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1601 PetscInt i; 1602 MatScalar *aval=a->val; 1603 PetscErrorCode ierr; 1604 1605 PetscFunctionBegin; 1606 for (i=0; i<a->sliidx[a->totalslices]; i++) aval[i] = PetscImaginaryPart(aval[i]); 1607 ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr); 1608 PetscFunctionReturn(0); 1609 } 1610 1611 PetscErrorCode MatScale_SeqSELL(Mat inA,PetscScalar alpha) 1612 { 1613 Mat_SeqSELL *a=(Mat_SeqSELL*)inA->data; 1614 MatScalar *aval=a->val; 1615 PetscScalar oalpha=alpha; 1616 PetscBLASInt one=1,size; 1617 PetscErrorCode ierr; 1618 1619 PetscFunctionBegin; 1620 ierr = PetscBLASIntCast(a->sliidx[a->totalslices],&size);CHKERRQ(ierr); 1621 PetscStackCallBLAS("BLASscal",BLASscal_(&size,&oalpha,aval,&one)); 1622 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 1623 ierr = MatSeqSELLInvalidateDiagonal(inA);CHKERRQ(ierr); 1624 PetscFunctionReturn(0); 1625 } 1626 1627 PetscErrorCode MatShift_SeqSELL(Mat Y,PetscScalar a) 1628 { 1629 Mat_SeqSELL *y=(Mat_SeqSELL*)Y->data; 1630 PetscErrorCode ierr; 1631 1632 PetscFunctionBegin; 1633 if (!Y->preallocated || !y->nz) { 1634 ierr = MatSeqSELLSetPreallocation(Y,1,NULL);CHKERRQ(ierr); 1635 } 1636 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 1637 PetscFunctionReturn(0); 1638 } 1639 1640 PetscErrorCode MatSOR_SeqSELL(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1641 { 1642 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1643 PetscScalar *x,sum,*t; 1644 const MatScalar *idiag=NULL,*mdiag; 1645 const PetscScalar *b,*xb; 1646 PetscInt n,m=A->rmap->n,i,j,shift; 1647 const PetscInt *diag; 1648 PetscErrorCode ierr; 1649 1650 PetscFunctionBegin; 1651 its = its*lits; 1652 1653 if (fshift != a->fshift || omega != a->omega) a->idiagvalid = PETSC_FALSE; /* must recompute idiag[] */ 1654 if (!a->idiagvalid) {ierr = MatInvertDiagonal_SeqSELL(A,omega,fshift);CHKERRQ(ierr);} 1655 a->fshift = fshift; 1656 a->omega = omega; 1657 1658 diag = a->diag; 1659 t = a->ssor_work; 1660 idiag = a->idiag; 1661 mdiag = a->mdiag; 1662 1663 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1664 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1665 /* We count flops by assuming the upper triangular and lower triangular parts have the same number of nonzeros */ 1666 if (flag == SOR_APPLY_UPPER) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SOR_APPLY_UPPER is not implemented"); 1667 if (flag == SOR_APPLY_LOWER) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SOR_APPLY_LOWER is not implemented"); 1668 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 1669 1670 if (flag & SOR_ZERO_INITIAL_GUESS) { 1671 if ((flag & SOR_FORWARD_SWEEP) || (flag & SOR_LOCAL_FORWARD_SWEEP)) { 1672 for (i=0; i<m; i++) { 1673 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1674 sum = b[i]; 1675 n = (diag[i]-shift)/8; 1676 for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]]; 1677 t[i] = sum; 1678 x[i] = sum*idiag[i]; 1679 } 1680 xb = t; 1681 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 1682 } else xb = b; 1683 if ((flag & SOR_BACKWARD_SWEEP) || (flag & SOR_LOCAL_BACKWARD_SWEEP)) { 1684 for (i=m-1; i>=0; i--) { 1685 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1686 sum = xb[i]; 1687 n = a->rlen[i]-(diag[i]-shift)/8-1; 1688 for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]]; 1689 if (xb == b) { 1690 x[i] = sum*idiag[i]; 1691 } else { 1692 x[i] = (1.-omega)*x[i]+sum*idiag[i]; /* omega in idiag */ 1693 } 1694 } 1695 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); /* assumes 1/2 in upper */ 1696 } 1697 its--; 1698 } 1699 while (its--) { 1700 if ((flag & SOR_FORWARD_SWEEP) || (flag & SOR_LOCAL_FORWARD_SWEEP)) { 1701 for (i=0; i<m; i++) { 1702 /* lower */ 1703 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1704 sum = b[i]; 1705 n = (diag[i]-shift)/8; 1706 for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]]; 1707 t[i] = sum; /* save application of the lower-triangular part */ 1708 /* upper */ 1709 n = a->rlen[i]-(diag[i]-shift)/8-1; 1710 for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]]; 1711 x[i] = (1.-omega)*x[i]+sum*idiag[i]; /* omega in idiag */ 1712 } 1713 xb = t; 1714 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1715 } else xb = b; 1716 if ((flag & SOR_BACKWARD_SWEEP) || (flag & SOR_LOCAL_BACKWARD_SWEEP)) { 1717 for (i=m-1; i>=0; i--) { 1718 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1719 sum = xb[i]; 1720 if (xb == b) { 1721 /* whole matrix (no checkpointing available) */ 1722 n = a->rlen[i]; 1723 for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]]; 1724 x[i] = (1.-omega)*x[i]+(sum+mdiag[i]*x[i])*idiag[i]; 1725 } else { /* lower-triangular part has been saved, so only apply upper-triangular */ 1726 n = a->rlen[i]-(diag[i]-shift)/8-1; 1727 for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]]; 1728 x[i] = (1.-omega)*x[i]+sum*idiag[i]; /* omega in idiag */ 1729 } 1730 } 1731 if (xb == b) { 1732 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1733 } else { 1734 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); /* assumes 1/2 in upper */ 1735 } 1736 } 1737 } 1738 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1739 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1740 PetscFunctionReturn(0); 1741 } 1742 1743 /* -------------------------------------------------------------------*/ 1744 static struct _MatOps MatOps_Values = {MatSetValues_SeqSELL, 1745 MatGetRow_SeqSELL, 1746 MatRestoreRow_SeqSELL, 1747 MatMult_SeqSELL, 1748 /* 4*/ MatMultAdd_SeqSELL, 1749 MatMultTranspose_SeqSELL, 1750 MatMultTransposeAdd_SeqSELL, 1751 NULL, 1752 NULL, 1753 NULL, 1754 /* 10*/ NULL, 1755 NULL, 1756 NULL, 1757 MatSOR_SeqSELL, 1758 NULL, 1759 /* 15*/ MatGetInfo_SeqSELL, 1760 MatEqual_SeqSELL, 1761 MatGetDiagonal_SeqSELL, 1762 MatDiagonalScale_SeqSELL, 1763 NULL, 1764 /* 20*/ NULL, 1765 MatAssemblyEnd_SeqSELL, 1766 MatSetOption_SeqSELL, 1767 MatZeroEntries_SeqSELL, 1768 /* 24*/ NULL, 1769 NULL, 1770 NULL, 1771 NULL, 1772 NULL, 1773 /* 29*/ MatSetUp_SeqSELL, 1774 NULL, 1775 NULL, 1776 NULL, 1777 NULL, 1778 /* 34*/ MatDuplicate_SeqSELL, 1779 NULL, 1780 NULL, 1781 NULL, 1782 NULL, 1783 /* 39*/ NULL, 1784 NULL, 1785 NULL, 1786 MatGetValues_SeqSELL, 1787 MatCopy_SeqSELL, 1788 /* 44*/ NULL, 1789 MatScale_SeqSELL, 1790 MatShift_SeqSELL, 1791 NULL, 1792 NULL, 1793 /* 49*/ NULL, 1794 NULL, 1795 NULL, 1796 NULL, 1797 NULL, 1798 /* 54*/ MatFDColoringCreate_SeqXAIJ, 1799 NULL, 1800 NULL, 1801 NULL, 1802 NULL, 1803 /* 59*/ NULL, 1804 MatDestroy_SeqSELL, 1805 MatView_SeqSELL, 1806 NULL, 1807 NULL, 1808 /* 64*/ NULL, 1809 NULL, 1810 NULL, 1811 NULL, 1812 NULL, 1813 /* 69*/ NULL, 1814 NULL, 1815 NULL, 1816 NULL, 1817 NULL, 1818 /* 74*/ NULL, 1819 MatFDColoringApply_AIJ, /* reuse the FDColoring function for AIJ */ 1820 NULL, 1821 NULL, 1822 NULL, 1823 /* 79*/ NULL, 1824 NULL, 1825 NULL, 1826 NULL, 1827 NULL, 1828 /* 84*/ NULL, 1829 NULL, 1830 NULL, 1831 NULL, 1832 NULL, 1833 /* 89*/ NULL, 1834 NULL, 1835 NULL, 1836 NULL, 1837 NULL, 1838 /* 94*/ NULL, 1839 NULL, 1840 NULL, 1841 NULL, 1842 NULL, 1843 /* 99*/ NULL, 1844 NULL, 1845 NULL, 1846 MatConjugate_SeqSELL, 1847 NULL, 1848 /*104*/ NULL, 1849 NULL, 1850 NULL, 1851 NULL, 1852 NULL, 1853 /*109*/ NULL, 1854 NULL, 1855 NULL, 1856 NULL, 1857 MatMissingDiagonal_SeqSELL, 1858 /*114*/ NULL, 1859 NULL, 1860 NULL, 1861 NULL, 1862 NULL, 1863 /*119*/ NULL, 1864 NULL, 1865 NULL, 1866 NULL, 1867 NULL, 1868 /*124*/ NULL, 1869 NULL, 1870 NULL, 1871 NULL, 1872 NULL, 1873 /*129*/ NULL, 1874 NULL, 1875 NULL, 1876 NULL, 1877 NULL, 1878 /*134*/ NULL, 1879 NULL, 1880 NULL, 1881 NULL, 1882 NULL, 1883 /*139*/ NULL, 1884 NULL, 1885 NULL, 1886 MatFDColoringSetUp_SeqXAIJ, 1887 NULL, 1888 /*144*/ NULL 1889 }; 1890 1891 PetscErrorCode MatStoreValues_SeqSELL(Mat mat) 1892 { 1893 Mat_SeqSELL *a=(Mat_SeqSELL*)mat->data; 1894 PetscErrorCode ierr; 1895 1896 PetscFunctionBegin; 1897 if (!a->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 1898 1899 /* allocate space for values if not already there */ 1900 if (!a->saved_values) { 1901 ierr = PetscMalloc1(a->sliidx[a->totalslices]+1,&a->saved_values);CHKERRQ(ierr); 1902 ierr = PetscLogObjectMemory((PetscObject)mat,(a->sliidx[a->totalslices]+1)*sizeof(PetscScalar));CHKERRQ(ierr); 1903 } 1904 1905 /* copy values over */ 1906 ierr = PetscArraycpy(a->saved_values,a->val,a->sliidx[a->totalslices]);CHKERRQ(ierr); 1907 PetscFunctionReturn(0); 1908 } 1909 1910 PetscErrorCode MatRetrieveValues_SeqSELL(Mat mat) 1911 { 1912 Mat_SeqSELL *a=(Mat_SeqSELL*)mat->data; 1913 PetscErrorCode ierr; 1914 1915 PetscFunctionBegin; 1916 if (!a->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 1917 if (!a->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 1918 ierr = PetscArraycpy(a->val,a->saved_values,a->sliidx[a->totalslices]);CHKERRQ(ierr); 1919 PetscFunctionReturn(0); 1920 } 1921 1922 /*@C 1923 MatSeqSELLRestoreArray - returns access to the array where the data for a MATSEQSELL matrix is stored obtained by MatSeqSELLGetArray() 1924 1925 Not Collective 1926 1927 Input Parameters: 1928 . mat - a MATSEQSELL matrix 1929 . array - pointer to the data 1930 1931 Level: intermediate 1932 1933 .seealso: MatSeqSELLGetArray(), MatSeqSELLRestoreArrayF90() 1934 @*/ 1935 PetscErrorCode MatSeqSELLRestoreArray(Mat A,PetscScalar **array) 1936 { 1937 PetscErrorCode ierr; 1938 1939 PetscFunctionBegin; 1940 ierr = PetscUseMethod(A,"MatSeqSELLRestoreArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr); 1941 PetscFunctionReturn(0); 1942 } 1943 1944 PETSC_EXTERN PetscErrorCode MatCreate_SeqSELL(Mat B) 1945 { 1946 Mat_SeqSELL *b; 1947 PetscMPIInt size; 1948 PetscErrorCode ierr; 1949 1950 PetscFunctionBegin; 1951 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr); 1952 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Comm must be of size 1"); 1953 1954 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 1955 1956 B->data = (void*)b; 1957 1958 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1959 1960 b->row = NULL; 1961 b->col = NULL; 1962 b->icol = NULL; 1963 b->reallocs = 0; 1964 b->ignorezeroentries = PETSC_FALSE; 1965 b->roworiented = PETSC_TRUE; 1966 b->nonew = 0; 1967 b->diag = NULL; 1968 b->solve_work = NULL; 1969 B->spptr = NULL; 1970 b->saved_values = NULL; 1971 b->idiag = NULL; 1972 b->mdiag = NULL; 1973 b->ssor_work = NULL; 1974 b->omega = 1.0; 1975 b->fshift = 0.0; 1976 b->idiagvalid = PETSC_FALSE; 1977 b->keepnonzeropattern = PETSC_FALSE; 1978 1979 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQSELL);CHKERRQ(ierr); 1980 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLGetArray_C",MatSeqSELLGetArray_SeqSELL);CHKERRQ(ierr); 1981 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLRestoreArray_C",MatSeqSELLRestoreArray_SeqSELL);CHKERRQ(ierr); 1982 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqSELL);CHKERRQ(ierr); 1983 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqSELL);CHKERRQ(ierr); 1984 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLSetPreallocation_C",MatSeqSELLSetPreallocation_SeqSELL);CHKERRQ(ierr); 1985 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqsell_seqaij_C",MatConvert_SeqSELL_SeqAIJ);CHKERRQ(ierr); 1986 PetscFunctionReturn(0); 1987 } 1988 1989 /* 1990 Given a matrix generated with MatGetFactor() duplicates all the information in A into B 1991 */ 1992 PetscErrorCode MatDuplicateNoCreate_SeqSELL(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 1993 { 1994 Mat_SeqSELL *c,*a=(Mat_SeqSELL*)A->data; 1995 PetscInt i,m=A->rmap->n; 1996 PetscInt totalslices=a->totalslices; 1997 PetscErrorCode ierr; 1998 1999 PetscFunctionBegin; 2000 c = (Mat_SeqSELL*)C->data; 2001 2002 C->factortype = A->factortype; 2003 c->row = NULL; 2004 c->col = NULL; 2005 c->icol = NULL; 2006 c->reallocs = 0; 2007 2008 C->assembled = PETSC_TRUE; 2009 2010 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 2011 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 2012 2013 ierr = PetscMalloc1(8*totalslices,&c->rlen);CHKERRQ(ierr); 2014 ierr = PetscLogObjectMemory((PetscObject)C,m*sizeof(PetscInt));CHKERRQ(ierr); 2015 ierr = PetscMalloc1(totalslices+1,&c->sliidx);CHKERRQ(ierr); 2016 ierr = PetscLogObjectMemory((PetscObject)C, (totalslices+1)*sizeof(PetscInt));CHKERRQ(ierr); 2017 2018 for (i=0; i<m; i++) c->rlen[i] = a->rlen[i]; 2019 for (i=0; i<totalslices+1; i++) c->sliidx[i] = a->sliidx[i]; 2020 2021 /* allocate the matrix space */ 2022 if (mallocmatspace) { 2023 ierr = PetscMalloc2(a->maxallocmat,&c->val,a->maxallocmat,&c->colidx);CHKERRQ(ierr); 2024 ierr = PetscLogObjectMemory((PetscObject)C,a->maxallocmat*(sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2025 2026 c->singlemalloc = PETSC_TRUE; 2027 2028 if (m > 0) { 2029 ierr = PetscArraycpy(c->colidx,a->colidx,a->maxallocmat);CHKERRQ(ierr); 2030 if (cpvalues == MAT_COPY_VALUES) { 2031 ierr = PetscArraycpy(c->val,a->val,a->maxallocmat);CHKERRQ(ierr); 2032 } else { 2033 ierr = PetscArrayzero(c->val,a->maxallocmat);CHKERRQ(ierr); 2034 } 2035 } 2036 } 2037 2038 c->ignorezeroentries = a->ignorezeroentries; 2039 c->roworiented = a->roworiented; 2040 c->nonew = a->nonew; 2041 if (a->diag) { 2042 ierr = PetscMalloc1(m,&c->diag);CHKERRQ(ierr); 2043 ierr = PetscLogObjectMemory((PetscObject)C,m*sizeof(PetscInt));CHKERRQ(ierr); 2044 for (i=0; i<m; i++) { 2045 c->diag[i] = a->diag[i]; 2046 } 2047 } else c->diag = NULL; 2048 2049 c->solve_work = NULL; 2050 c->saved_values = NULL; 2051 c->idiag = NULL; 2052 c->ssor_work = NULL; 2053 c->keepnonzeropattern = a->keepnonzeropattern; 2054 c->free_val = PETSC_TRUE; 2055 c->free_colidx = PETSC_TRUE; 2056 2057 c->maxallocmat = a->maxallocmat; 2058 c->maxallocrow = a->maxallocrow; 2059 c->rlenmax = a->rlenmax; 2060 c->nz = a->nz; 2061 C->preallocated = PETSC_TRUE; 2062 2063 c->nonzerorowcnt = a->nonzerorowcnt; 2064 C->nonzerostate = A->nonzerostate; 2065 2066 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 2067 PetscFunctionReturn(0); 2068 } 2069 2070 PetscErrorCode MatDuplicate_SeqSELL(Mat A,MatDuplicateOption cpvalues,Mat *B) 2071 { 2072 PetscErrorCode ierr; 2073 2074 PetscFunctionBegin; 2075 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 2076 ierr = MatSetSizes(*B,A->rmap->n,A->cmap->n,A->rmap->n,A->cmap->n);CHKERRQ(ierr); 2077 if (!(A->rmap->n % A->rmap->bs) && !(A->cmap->n % A->cmap->bs)) { 2078 ierr = MatSetBlockSizesFromMats(*B,A,A);CHKERRQ(ierr); 2079 } 2080 ierr = MatSetType(*B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2081 ierr = MatDuplicateNoCreate_SeqSELL(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 2082 PetscFunctionReturn(0); 2083 } 2084 2085 /*@C 2086 MatCreateSeqSELL - Creates a sparse matrix in SELL format. 2087 2088 Collective 2089 2090 Input Parameters: 2091 + comm - MPI communicator, set to PETSC_COMM_SELF 2092 . m - number of rows 2093 . n - number of columns 2094 . rlenmax - maximum number of nonzeros in a row 2095 - rlen - array containing the number of nonzeros in the various rows 2096 (possibly different for each row) or NULL 2097 2098 Output Parameter: 2099 . A - the matrix 2100 2101 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2102 MatXXXXSetPreallocation() paradigm instead of this routine directly. 2103 [MatXXXXSetPreallocation() is, for example, MatSeqSELLSetPreallocation] 2104 2105 Notes: 2106 If nnz is given then nz is ignored 2107 2108 Specify the preallocated storage with either rlenmax or rlen (not both). 2109 Set rlenmax=PETSC_DEFAULT and rlen=NULL for PETSc to control dynamic memory 2110 allocation. For large problems you MUST preallocate memory or you 2111 will get TERRIBLE performance, see the users' manual chapter on matrices. 2112 2113 Level: intermediate 2114 2115 .seealso: MatCreate(), MatCreateSELL(), MatSetValues() 2116 2117 @*/ 2118 PetscErrorCode MatCreateSeqSELL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt maxallocrow,const PetscInt rlen[],Mat *A) 2119 { 2120 PetscErrorCode ierr; 2121 2122 PetscFunctionBegin; 2123 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2124 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 2125 ierr = MatSetType(*A,MATSEQSELL);CHKERRQ(ierr); 2126 ierr = MatSeqSELLSetPreallocation_SeqSELL(*A,maxallocrow,rlen);CHKERRQ(ierr); 2127 PetscFunctionReturn(0); 2128 } 2129 2130 PetscErrorCode MatEqual_SeqSELL(Mat A,Mat B,PetscBool * flg) 2131 { 2132 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data,*b=(Mat_SeqSELL*)B->data; 2133 PetscInt totalslices=a->totalslices; 2134 PetscErrorCode ierr; 2135 2136 PetscFunctionBegin; 2137 /* If the matrix dimensions are not equal,or no of nonzeros */ 2138 if ((A->rmap->n != B->rmap->n) || (A->cmap->n != B->cmap->n) ||(a->nz != b->nz) || (a->rlenmax != b->rlenmax)) { 2139 *flg = PETSC_FALSE; 2140 PetscFunctionReturn(0); 2141 } 2142 /* if the a->colidx are the same */ 2143 ierr = PetscArraycmp(a->colidx,b->colidx,a->sliidx[totalslices],flg);CHKERRQ(ierr); 2144 if (!*flg) PetscFunctionReturn(0); 2145 /* if a->val are the same */ 2146 ierr = PetscArraycmp(a->val,b->val,a->sliidx[totalslices],flg);CHKERRQ(ierr); 2147 PetscFunctionReturn(0); 2148 } 2149 2150 PetscErrorCode MatSeqSELLInvalidateDiagonal(Mat A) 2151 { 2152 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 2153 2154 PetscFunctionBegin; 2155 a->idiagvalid = PETSC_FALSE; 2156 PetscFunctionReturn(0); 2157 } 2158 2159 PetscErrorCode MatConjugate_SeqSELL(Mat A) 2160 { 2161 #if defined(PETSC_USE_COMPLEX) 2162 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 2163 PetscInt i; 2164 PetscScalar *val = a->val; 2165 2166 PetscFunctionBegin; 2167 for (i=0; i<a->sliidx[a->totalslices]; i++) { 2168 val[i] = PetscConj(val[i]); 2169 } 2170 #else 2171 PetscFunctionBegin; 2172 #endif 2173 PetscFunctionReturn(0); 2174 } 2175