1 2 /* 3 Defines the basic matrix operations for the AIJ (compressed row) 4 matrix storage format. 5 */ 6 7 8 #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/ 9 #include <petscblaslapack.h> 10 #include <petscbt.h> 11 #include <petsc/private/kernels/blocktranspose.h> 12 13 #if defined(PETSC_HAVE_IMMINTRIN_H) 14 #include <immintrin.h> 15 16 #if !defined(_MM_SCALE_8) 17 #define _MM_SCALE_8 8 18 #endif 19 20 #if defined(__AVX512F__) 21 #define AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y) \ 22 vec_idx = _mm256_load_si256((__m256i const*)aj); \ 23 vec_vals = _mm512_load_pd(aa); \ 24 vec_x = _mm512_i32gather_pd(vec_idx,x,_MM_SCALE_8); \ 25 vec_y = _mm512_fmadd_pd(vec_x,vec_vals,vec_y) 26 #elif defined(__AVX2__) 27 #define AVX2_Mult_Private(vec_idx,vec_x,vec_vals,vec_y) \ 28 vec_idx = _mm_load_si128((__m128i const*)aj); /* SSE2 */ \ 29 vec_vals = _mm256_load_pd(aa); \ 30 vec_x = _mm256_i32gather_pd(x,vec_idx,_MM_SCALE_8); \ 31 vec_y = _mm256_fmadd_pd(vec_x,vec_vals,vec_y) 32 #endif 33 #endif 34 35 PetscErrorCode MatSeqAIJSetTypeFromOptions(Mat A) 36 { 37 PetscErrorCode ierr; 38 PetscBool flg; 39 char type[256]; 40 41 PetscFunctionBegin; 42 ierr = PetscObjectOptionsBegin((PetscObject)A); 43 ierr = PetscOptionsFList("-mat_seqaij_type","Matrix SeqAIJ type","MatSeqAIJSetType",MatSeqAIJList,"seqaij",type,256,&flg);CHKERRQ(ierr); 44 if (flg) { 45 ierr = MatSeqAIJSetType(A,type);CHKERRQ(ierr); 46 } 47 ierr = PetscOptionsEnd();CHKERRQ(ierr); 48 PetscFunctionReturn(0); 49 } 50 51 PetscErrorCode MatGetColumnNorms_SeqAIJ(Mat A,NormType type,PetscReal *norms) 52 { 53 PetscErrorCode ierr; 54 PetscInt i,m,n; 55 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)A->data; 56 57 PetscFunctionBegin; 58 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 59 ierr = PetscMemzero(norms,n*sizeof(PetscReal));CHKERRQ(ierr); 60 if (type == NORM_2) { 61 for (i=0; i<aij->i[m]; i++) { 62 norms[aij->j[i]] += PetscAbsScalar(aij->a[i]*aij->a[i]); 63 } 64 } else if (type == NORM_1) { 65 for (i=0; i<aij->i[m]; i++) { 66 norms[aij->j[i]] += PetscAbsScalar(aij->a[i]); 67 } 68 } else if (type == NORM_INFINITY) { 69 for (i=0; i<aij->i[m]; i++) { 70 norms[aij->j[i]] = PetscMax(PetscAbsScalar(aij->a[i]),norms[aij->j[i]]); 71 } 72 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown NormType"); 73 74 if (type == NORM_2) { 75 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 76 } 77 PetscFunctionReturn(0); 78 } 79 80 PetscErrorCode MatFindOffBlockDiagonalEntries_SeqAIJ(Mat A,IS *is) 81 { 82 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 83 PetscInt i,m=A->rmap->n,cnt = 0, bs = A->rmap->bs; 84 const PetscInt *jj = a->j,*ii = a->i; 85 PetscInt *rows; 86 PetscErrorCode ierr; 87 88 PetscFunctionBegin; 89 for (i=0; i<m; i++) { 90 if ((ii[i] != ii[i+1]) && ((jj[ii[i]] < bs*(i/bs)) || (jj[ii[i+1]-1] > bs*((i+bs)/bs)-1))) { 91 cnt++; 92 } 93 } 94 ierr = PetscMalloc1(cnt,&rows);CHKERRQ(ierr); 95 cnt = 0; 96 for (i=0; i<m; i++) { 97 if ((ii[i] != ii[i+1]) && ((jj[ii[i]] < bs*(i/bs)) || (jj[ii[i+1]-1] > bs*((i+bs)/bs)-1))) { 98 rows[cnt] = i; 99 cnt++; 100 } 101 } 102 ierr = ISCreateGeneral(PETSC_COMM_SELF,cnt,rows,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 103 PetscFunctionReturn(0); 104 } 105 106 PetscErrorCode MatFindZeroDiagonals_SeqAIJ_Private(Mat A,PetscInt *nrows,PetscInt **zrows) 107 { 108 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 109 const MatScalar *aa = a->a; 110 PetscInt i,m=A->rmap->n,cnt = 0; 111 const PetscInt *ii = a->i,*jj = a->j,*diag; 112 PetscInt *rows; 113 PetscErrorCode ierr; 114 115 PetscFunctionBegin; 116 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 117 diag = a->diag; 118 for (i=0; i<m; i++) { 119 if ((diag[i] >= ii[i+1]) || (jj[diag[i]] != i) || (aa[diag[i]] == 0.0)) { 120 cnt++; 121 } 122 } 123 ierr = PetscMalloc1(cnt,&rows);CHKERRQ(ierr); 124 cnt = 0; 125 for (i=0; i<m; i++) { 126 if ((diag[i] >= ii[i+1]) || (jj[diag[i]] != i) || (aa[diag[i]] == 0.0)) { 127 rows[cnt++] = i; 128 } 129 } 130 *nrows = cnt; 131 *zrows = rows; 132 PetscFunctionReturn(0); 133 } 134 135 PetscErrorCode MatFindZeroDiagonals_SeqAIJ(Mat A,IS *zrows) 136 { 137 PetscInt nrows,*rows; 138 PetscErrorCode ierr; 139 140 PetscFunctionBegin; 141 *zrows = NULL; 142 ierr = MatFindZeroDiagonals_SeqAIJ_Private(A,&nrows,&rows);CHKERRQ(ierr); 143 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 144 PetscFunctionReturn(0); 145 } 146 147 PetscErrorCode MatFindNonzeroRows_SeqAIJ(Mat A,IS *keptrows) 148 { 149 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 150 const MatScalar *aa; 151 PetscInt m=A->rmap->n,cnt = 0; 152 const PetscInt *ii; 153 PetscInt n,i,j,*rows; 154 PetscErrorCode ierr; 155 156 PetscFunctionBegin; 157 *keptrows = 0; 158 ii = a->i; 159 for (i=0; i<m; i++) { 160 n = ii[i+1] - ii[i]; 161 if (!n) { 162 cnt++; 163 goto ok1; 164 } 165 aa = a->a + ii[i]; 166 for (j=0; j<n; j++) { 167 if (aa[j] != 0.0) goto ok1; 168 } 169 cnt++; 170 ok1:; 171 } 172 if (!cnt) PetscFunctionReturn(0); 173 ierr = PetscMalloc1(A->rmap->n-cnt,&rows);CHKERRQ(ierr); 174 cnt = 0; 175 for (i=0; i<m; i++) { 176 n = ii[i+1] - ii[i]; 177 if (!n) continue; 178 aa = a->a + ii[i]; 179 for (j=0; j<n; j++) { 180 if (aa[j] != 0.0) { 181 rows[cnt++] = i; 182 break; 183 } 184 } 185 } 186 ierr = ISCreateGeneral(PETSC_COMM_SELF,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 187 PetscFunctionReturn(0); 188 } 189 190 PetscErrorCode MatDiagonalSet_SeqAIJ(Mat Y,Vec D,InsertMode is) 191 { 192 PetscErrorCode ierr; 193 Mat_SeqAIJ *aij = (Mat_SeqAIJ*) Y->data; 194 PetscInt i,m = Y->rmap->n; 195 const PetscInt *diag; 196 MatScalar *aa = aij->a; 197 const PetscScalar *v; 198 PetscBool missing; 199 200 PetscFunctionBegin; 201 if (Y->assembled) { 202 ierr = MatMissingDiagonal_SeqAIJ(Y,&missing,NULL);CHKERRQ(ierr); 203 if (!missing) { 204 diag = aij->diag; 205 ierr = VecGetArrayRead(D,&v);CHKERRQ(ierr); 206 if (is == INSERT_VALUES) { 207 for (i=0; i<m; i++) { 208 aa[diag[i]] = v[i]; 209 } 210 } else { 211 for (i=0; i<m; i++) { 212 aa[diag[i]] += v[i]; 213 } 214 } 215 ierr = VecRestoreArrayRead(D,&v);CHKERRQ(ierr); 216 PetscFunctionReturn(0); 217 } 218 ierr = MatSeqAIJInvalidateDiagonal(Y);CHKERRQ(ierr); 219 } 220 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 221 PetscFunctionReturn(0); 222 } 223 224 PetscErrorCode MatGetRowIJ_SeqAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *m,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 225 { 226 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 227 PetscErrorCode ierr; 228 PetscInt i,ishift; 229 230 PetscFunctionBegin; 231 *m = A->rmap->n; 232 if (!ia) PetscFunctionReturn(0); 233 ishift = 0; 234 if (symmetric && !A->structurally_symmetric) { 235 ierr = MatToSymmetricIJ_SeqAIJ(A->rmap->n,a->i,a->j,PETSC_TRUE,ishift,oshift,(PetscInt**)ia,(PetscInt**)ja);CHKERRQ(ierr); 236 } else if (oshift == 1) { 237 PetscInt *tia; 238 PetscInt nz = a->i[A->rmap->n]; 239 /* malloc space and add 1 to i and j indices */ 240 ierr = PetscMalloc1(A->rmap->n+1,&tia);CHKERRQ(ierr); 241 for (i=0; i<A->rmap->n+1; i++) tia[i] = a->i[i] + 1; 242 *ia = tia; 243 if (ja) { 244 PetscInt *tja; 245 ierr = PetscMalloc1(nz+1,&tja);CHKERRQ(ierr); 246 for (i=0; i<nz; i++) tja[i] = a->j[i] + 1; 247 *ja = tja; 248 } 249 } else { 250 *ia = a->i; 251 if (ja) *ja = a->j; 252 } 253 PetscFunctionReturn(0); 254 } 255 256 PetscErrorCode MatRestoreRowIJ_SeqAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 257 { 258 PetscErrorCode ierr; 259 260 PetscFunctionBegin; 261 if (!ia) PetscFunctionReturn(0); 262 if ((symmetric && !A->structurally_symmetric) || oshift == 1) { 263 ierr = PetscFree(*ia);CHKERRQ(ierr); 264 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 265 } 266 PetscFunctionReturn(0); 267 } 268 269 PetscErrorCode MatGetColumnIJ_SeqAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 270 { 271 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 272 PetscErrorCode ierr; 273 PetscInt i,*collengths,*cia,*cja,n = A->cmap->n,m = A->rmap->n; 274 PetscInt nz = a->i[m],row,*jj,mr,col; 275 276 PetscFunctionBegin; 277 *nn = n; 278 if (!ia) PetscFunctionReturn(0); 279 if (symmetric) { 280 ierr = MatToSymmetricIJ_SeqAIJ(A->rmap->n,a->i,a->j,PETSC_TRUE,0,oshift,(PetscInt**)ia,(PetscInt**)ja);CHKERRQ(ierr); 281 } else { 282 ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr); 283 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 284 ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr); 285 jj = a->j; 286 for (i=0; i<nz; i++) { 287 collengths[jj[i]]++; 288 } 289 cia[0] = oshift; 290 for (i=0; i<n; i++) { 291 cia[i+1] = cia[i] + collengths[i]; 292 } 293 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 294 jj = a->j; 295 for (row=0; row<m; row++) { 296 mr = a->i[row+1] - a->i[row]; 297 for (i=0; i<mr; i++) { 298 col = *jj++; 299 300 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 301 } 302 } 303 ierr = PetscFree(collengths);CHKERRQ(ierr); 304 *ia = cia; *ja = cja; 305 } 306 PetscFunctionReturn(0); 307 } 308 309 PetscErrorCode MatRestoreColumnIJ_SeqAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 310 { 311 PetscErrorCode ierr; 312 313 PetscFunctionBegin; 314 if (!ia) PetscFunctionReturn(0); 315 316 ierr = PetscFree(*ia);CHKERRQ(ierr); 317 ierr = PetscFree(*ja);CHKERRQ(ierr); 318 PetscFunctionReturn(0); 319 } 320 321 /* 322 MatGetColumnIJ_SeqAIJ_Color() and MatRestoreColumnIJ_SeqAIJ_Color() are customized from 323 MatGetColumnIJ_SeqAIJ() and MatRestoreColumnIJ_SeqAIJ() by adding an output 324 spidx[], index of a->a, to be used in MatTransposeColoringCreate_SeqAIJ() and MatFDColoringCreate_SeqXAIJ() 325 */ 326 PetscErrorCode MatGetColumnIJ_SeqAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 327 { 328 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 329 PetscErrorCode ierr; 330 PetscInt i,*collengths,*cia,*cja,n = A->cmap->n,m = A->rmap->n; 331 PetscInt nz = a->i[m],row,*jj,mr,col; 332 PetscInt *cspidx; 333 334 PetscFunctionBegin; 335 *nn = n; 336 if (!ia) PetscFunctionReturn(0); 337 338 ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr); 339 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 340 ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr); 341 ierr = PetscMalloc1(nz+1,&cspidx);CHKERRQ(ierr); 342 jj = a->j; 343 for (i=0; i<nz; i++) { 344 collengths[jj[i]]++; 345 } 346 cia[0] = oshift; 347 for (i=0; i<n; i++) { 348 cia[i+1] = cia[i] + collengths[i]; 349 } 350 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 351 jj = a->j; 352 for (row=0; row<m; row++) { 353 mr = a->i[row+1] - a->i[row]; 354 for (i=0; i<mr; i++) { 355 col = *jj++; 356 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 357 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 358 } 359 } 360 ierr = PetscFree(collengths);CHKERRQ(ierr); 361 *ia = cia; *ja = cja; 362 *spidx = cspidx; 363 PetscFunctionReturn(0); 364 } 365 366 PetscErrorCode MatRestoreColumnIJ_SeqAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 367 { 368 PetscErrorCode ierr; 369 370 PetscFunctionBegin; 371 ierr = MatRestoreColumnIJ_SeqAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr); 372 ierr = PetscFree(*spidx);CHKERRQ(ierr); 373 PetscFunctionReturn(0); 374 } 375 376 PetscErrorCode MatSetValuesRow_SeqAIJ(Mat A,PetscInt row,const PetscScalar v[]) 377 { 378 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 379 PetscInt *ai = a->i; 380 PetscErrorCode ierr; 381 382 PetscFunctionBegin; 383 ierr = PetscMemcpy(a->a+ai[row],v,(ai[row+1]-ai[row])*sizeof(PetscScalar));CHKERRQ(ierr); 384 PetscFunctionReturn(0); 385 } 386 387 /* 388 MatSeqAIJSetValuesLocalFast - An optimized version of MatSetValuesLocal() for SeqAIJ matrices with several assumptions 389 390 - a single row of values is set with each call 391 - no row or column indices are negative or (in error) larger than the number of rows or columns 392 - the values are always added to the matrix, not set 393 - no new locations are introduced in the nonzero structure of the matrix 394 395 This does NOT assume the global column indices are sorted 396 397 */ 398 399 #include <petsc/private/isimpl.h> 400 PetscErrorCode MatSeqAIJSetValuesLocalFast(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 401 { 402 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 403 PetscInt low,high,t,row,nrow,i,col,l; 404 const PetscInt *rp,*ai = a->i,*ailen = a->ilen,*aj = a->j; 405 PetscInt lastcol = -1; 406 MatScalar *ap,value,*aa = a->a; 407 const PetscInt *ridx = A->rmap->mapping->indices,*cidx = A->cmap->mapping->indices; 408 409 row = ridx[im[0]]; 410 rp = aj + ai[row]; 411 ap = aa + ai[row]; 412 nrow = ailen[row]; 413 low = 0; 414 high = nrow; 415 for (l=0; l<n; l++) { /* loop over added columns */ 416 col = cidx[in[l]]; 417 value = v[l]; 418 419 if (col <= lastcol) low = 0; 420 else high = nrow; 421 lastcol = col; 422 while (high-low > 5) { 423 t = (low+high)/2; 424 if (rp[t] > col) high = t; 425 else low = t; 426 } 427 for (i=low; i<high; i++) { 428 if (rp[i] == col) { 429 ap[i] += value; 430 low = i + 1; 431 break; 432 } 433 } 434 } 435 return 0; 436 } 437 438 PetscErrorCode MatSetValues_SeqAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 439 { 440 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 441 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N; 442 PetscInt *imax = a->imax,*ai = a->i,*ailen = a->ilen; 443 PetscErrorCode ierr; 444 PetscInt *aj = a->j,nonew = a->nonew,lastcol = -1; 445 MatScalar *ap=NULL,value=0.0,*aa = a->a; 446 PetscBool ignorezeroentries = a->ignorezeroentries; 447 PetscBool roworiented = a->roworiented; 448 449 PetscFunctionBegin; 450 for (k=0; k<m; k++) { /* loop over added rows */ 451 row = im[k]; 452 if (row < 0) continue; 453 #if defined(PETSC_USE_DEBUG) 454 if (row >= A->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->n-1); 455 #endif 456 rp = aj + ai[row]; 457 if (!A->structure_only) ap = aa + ai[row]; 458 rmax = imax[row]; nrow = ailen[row]; 459 low = 0; 460 high = nrow; 461 for (l=0; l<n; l++) { /* loop over added columns */ 462 if (in[l] < 0) continue; 463 #if defined(PETSC_USE_DEBUG) 464 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 465 #endif 466 col = in[l]; 467 if (!A->structure_only) { 468 if (roworiented) { 469 value = v[l + k*n]; 470 } else { 471 value = v[k + l*m]; 472 } 473 } else { /* A->structure_only */ 474 value = 1; /* avoid 'continue' below? */ 475 } 476 if ((value == 0.0 && ignorezeroentries) && (is == ADD_VALUES) && row != col) continue; 477 478 if (col <= lastcol) low = 0; 479 else high = nrow; 480 lastcol = col; 481 while (high-low > 5) { 482 t = (low+high)/2; 483 if (rp[t] > col) high = t; 484 else low = t; 485 } 486 for (i=low; i<high; i++) { 487 if (rp[i] > col) break; 488 if (rp[i] == col) { 489 if (!A->structure_only) { 490 if (is == ADD_VALUES) ap[i] += value; 491 else ap[i] = value; 492 } 493 low = i + 1; 494 goto noinsert; 495 } 496 } 497 if (value == 0.0 && ignorezeroentries && row != col) goto noinsert; 498 if (nonew == 1) goto noinsert; 499 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at (%D,%D) in the matrix",row,col); 500 if (A->structure_only) { 501 MatSeqXAIJReallocateAIJ_structure_only(A,A->rmap->n,1,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar); 502 } else { 503 MatSeqXAIJReallocateAIJ(A,A->rmap->n,1,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 504 } 505 N = nrow++ - 1; a->nz++; high++; 506 /* shift up all the later entries in this row */ 507 for (ii=N; ii>=i; ii--) { 508 rp[ii+1] = rp[ii]; 509 if (!A->structure_only) ap[ii+1] = ap[ii]; 510 } 511 rp[i] = col; 512 if (!A->structure_only) ap[i] = value; 513 low = i + 1; 514 A->nonzerostate++; 515 noinsert:; 516 } 517 ailen[row] = nrow; 518 } 519 PetscFunctionReturn(0); 520 } 521 522 523 PetscErrorCode MatGetValues_SeqAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 524 { 525 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 526 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 527 PetscInt *ai = a->i,*ailen = a->ilen; 528 MatScalar *ap,*aa = a->a; 529 530 PetscFunctionBegin; 531 for (k=0; k<m; k++) { /* loop over rows */ 532 row = im[k]; 533 if (row < 0) {v += n; continue;} /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",row); */ 534 if (row >= A->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->n-1); 535 rp = aj + ai[row]; ap = aa + ai[row]; 536 nrow = ailen[row]; 537 for (l=0; l<n; l++) { /* loop over columns */ 538 if (in[l] < 0) {v++; continue;} /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",in[l]); */ 539 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 540 col = in[l]; 541 high = nrow; low = 0; /* assume unsorted */ 542 while (high-low > 5) { 543 t = (low+high)/2; 544 if (rp[t] > col) high = t; 545 else low = t; 546 } 547 for (i=low; i<high; i++) { 548 if (rp[i] > col) break; 549 if (rp[i] == col) { 550 *v++ = ap[i]; 551 goto finished; 552 } 553 } 554 *v++ = 0.0; 555 finished:; 556 } 557 } 558 PetscFunctionReturn(0); 559 } 560 561 562 PetscErrorCode MatView_SeqAIJ_Binary(Mat A,PetscViewer viewer) 563 { 564 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 565 PetscErrorCode ierr; 566 PetscInt i,*col_lens; 567 int fd; 568 FILE *file; 569 570 PetscFunctionBegin; 571 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 572 ierr = PetscMalloc1(4+A->rmap->n,&col_lens);CHKERRQ(ierr); 573 574 col_lens[0] = MAT_FILE_CLASSID; 575 col_lens[1] = A->rmap->n; 576 col_lens[2] = A->cmap->n; 577 col_lens[3] = a->nz; 578 579 /* store lengths of each row and write (including header) to file */ 580 for (i=0; i<A->rmap->n; i++) { 581 col_lens[4+i] = a->i[i+1] - a->i[i]; 582 } 583 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 584 ierr = PetscFree(col_lens);CHKERRQ(ierr); 585 586 /* store column indices (zero start index) */ 587 ierr = PetscBinaryWrite(fd,a->j,a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 588 589 /* store nonzero values */ 590 ierr = PetscBinaryWrite(fd,a->a,a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 591 592 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 593 if (file) { 594 fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(A->rmap->bs)); 595 } 596 PetscFunctionReturn(0); 597 } 598 599 static PetscErrorCode MatView_SeqAIJ_ASCII_structonly(Mat A,PetscViewer viewer) 600 { 601 PetscErrorCode ierr; 602 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 603 PetscInt i,k,m=A->rmap->N; 604 605 PetscFunctionBegin; 606 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 607 for (i=0; i<m; i++) { 608 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr); 609 for (k=a->i[i]; k<a->i[i+1]; k++) { 610 ierr = PetscViewerASCIIPrintf(viewer," (%D) ",a->j[k]);CHKERRQ(ierr); 611 } 612 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 613 } 614 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 615 PetscFunctionReturn(0); 616 } 617 618 extern PetscErrorCode MatSeqAIJFactorInfo_Matlab(Mat,PetscViewer); 619 620 PetscErrorCode MatView_SeqAIJ_ASCII(Mat A,PetscViewer viewer) 621 { 622 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 623 PetscErrorCode ierr; 624 PetscInt i,j,m = A->rmap->n; 625 const char *name; 626 PetscViewerFormat format; 627 628 PetscFunctionBegin; 629 if (A->structure_only) { 630 ierr = MatView_SeqAIJ_ASCII_structonly(A,viewer);CHKERRQ(ierr); 631 PetscFunctionReturn(0); 632 } 633 634 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 635 if (format == PETSC_VIEWER_ASCII_MATLAB) { 636 PetscInt nofinalvalue = 0; 637 if (m && ((a->i[m] == a->i[m-1]) || (a->j[a->nz-1] != A->cmap->n-1))) { 638 /* Need a dummy value to ensure the dimension of the matrix. */ 639 nofinalvalue = 1; 640 } 641 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 642 ierr = PetscViewerASCIIPrintf(viewer,"%% Size = %D %D \n",m,A->cmap->n);CHKERRQ(ierr); 643 ierr = PetscViewerASCIIPrintf(viewer,"%% Nonzeros = %D \n",a->nz);CHKERRQ(ierr); 644 #if defined(PETSC_USE_COMPLEX) 645 ierr = PetscViewerASCIIPrintf(viewer,"zzz = zeros(%D,4);\n",a->nz+nofinalvalue);CHKERRQ(ierr); 646 #else 647 ierr = PetscViewerASCIIPrintf(viewer,"zzz = zeros(%D,3);\n",a->nz+nofinalvalue);CHKERRQ(ierr); 648 #endif 649 ierr = PetscViewerASCIIPrintf(viewer,"zzz = [\n");CHKERRQ(ierr); 650 651 for (i=0; i<m; i++) { 652 for (j=a->i[i]; j<a->i[i+1]; j++) { 653 #if defined(PETSC_USE_COMPLEX) 654 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %18.16e %18.16e\n",i+1,a->j[j]+1,(double)PetscRealPart(a->a[j]),(double)PetscImaginaryPart(a->a[j]));CHKERRQ(ierr); 655 #else 656 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %18.16e\n",i+1,a->j[j]+1,(double)a->a[j]);CHKERRQ(ierr); 657 #endif 658 } 659 } 660 if (nofinalvalue) { 661 #if defined(PETSC_USE_COMPLEX) 662 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %18.16e %18.16e\n",m,A->cmap->n,0.,0.);CHKERRQ(ierr); 663 #else 664 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %18.16e\n",m,A->cmap->n,0.0);CHKERRQ(ierr); 665 #endif 666 } 667 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 668 ierr = PetscViewerASCIIPrintf(viewer,"];\n %s = spconvert(zzz);\n",name);CHKERRQ(ierr); 669 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 670 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) { 671 PetscFunctionReturn(0); 672 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 673 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 674 for (i=0; i<m; i++) { 675 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr); 676 for (j=a->i[i]; j<a->i[i+1]; j++) { 677 #if defined(PETSC_USE_COMPLEX) 678 if (PetscImaginaryPart(a->a[j]) > 0.0 && PetscRealPart(a->a[j]) != 0.0) { 679 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->j[j],(double)PetscRealPart(a->a[j]),(double)PetscImaginaryPart(a->a[j]));CHKERRQ(ierr); 680 } else if (PetscImaginaryPart(a->a[j]) < 0.0 && PetscRealPart(a->a[j]) != 0.0) { 681 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->j[j],(double)PetscRealPart(a->a[j]),(double)-PetscImaginaryPart(a->a[j]));CHKERRQ(ierr); 682 } else if (PetscRealPart(a->a[j]) != 0.0) { 683 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->j[j],(double)PetscRealPart(a->a[j]));CHKERRQ(ierr); 684 } 685 #else 686 if (a->a[j] != 0.0) {ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->j[j],(double)a->a[j]);CHKERRQ(ierr);} 687 #endif 688 } 689 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 690 } 691 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 692 } else if (format == PETSC_VIEWER_ASCII_SYMMODU) { 693 PetscInt nzd=0,fshift=1,*sptr; 694 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 695 ierr = PetscMalloc1(m+1,&sptr);CHKERRQ(ierr); 696 for (i=0; i<m; i++) { 697 sptr[i] = nzd+1; 698 for (j=a->i[i]; j<a->i[i+1]; j++) { 699 if (a->j[j] >= i) { 700 #if defined(PETSC_USE_COMPLEX) 701 if (PetscImaginaryPart(a->a[j]) != 0.0 || PetscRealPart(a->a[j]) != 0.0) nzd++; 702 #else 703 if (a->a[j] != 0.0) nzd++; 704 #endif 705 } 706 } 707 } 708 sptr[m] = nzd+1; 709 ierr = PetscViewerASCIIPrintf(viewer," %D %D\n\n",m,nzd);CHKERRQ(ierr); 710 for (i=0; i<m+1; i+=6) { 711 if (i+4<m) { 712 ierr = PetscViewerASCIIPrintf(viewer," %D %D %D %D %D %D\n",sptr[i],sptr[i+1],sptr[i+2],sptr[i+3],sptr[i+4],sptr[i+5]);CHKERRQ(ierr); 713 } else if (i+3<m) { 714 ierr = PetscViewerASCIIPrintf(viewer," %D %D %D %D %D\n",sptr[i],sptr[i+1],sptr[i+2],sptr[i+3],sptr[i+4]);CHKERRQ(ierr); 715 } else if (i+2<m) { 716 ierr = PetscViewerASCIIPrintf(viewer," %D %D %D %D\n",sptr[i],sptr[i+1],sptr[i+2],sptr[i+3]);CHKERRQ(ierr); 717 } else if (i+1<m) { 718 ierr = PetscViewerASCIIPrintf(viewer," %D %D %D\n",sptr[i],sptr[i+1],sptr[i+2]);CHKERRQ(ierr); 719 } else if (i<m) { 720 ierr = PetscViewerASCIIPrintf(viewer," %D %D\n",sptr[i],sptr[i+1]);CHKERRQ(ierr); 721 } else { 722 ierr = PetscViewerASCIIPrintf(viewer," %D\n",sptr[i]);CHKERRQ(ierr); 723 } 724 } 725 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 726 ierr = PetscFree(sptr);CHKERRQ(ierr); 727 for (i=0; i<m; i++) { 728 for (j=a->i[i]; j<a->i[i+1]; j++) { 729 if (a->j[j] >= i) {ierr = PetscViewerASCIIPrintf(viewer," %D ",a->j[j]+fshift);CHKERRQ(ierr);} 730 } 731 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 732 } 733 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 734 for (i=0; i<m; i++) { 735 for (j=a->i[i]; j<a->i[i+1]; j++) { 736 if (a->j[j] >= i) { 737 #if defined(PETSC_USE_COMPLEX) 738 if (PetscImaginaryPart(a->a[j]) != 0.0 || PetscRealPart(a->a[j]) != 0.0) { 739 ierr = PetscViewerASCIIPrintf(viewer," %18.16e %18.16e ",(double)PetscRealPart(a->a[j]),(double)PetscImaginaryPart(a->a[j]));CHKERRQ(ierr); 740 } 741 #else 742 if (a->a[j] != 0.0) {ierr = PetscViewerASCIIPrintf(viewer," %18.16e ",(double)a->a[j]);CHKERRQ(ierr);} 743 #endif 744 } 745 } 746 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 747 } 748 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 749 } else if (format == PETSC_VIEWER_ASCII_DENSE) { 750 PetscInt cnt = 0,jcnt; 751 PetscScalar value; 752 #if defined(PETSC_USE_COMPLEX) 753 PetscBool realonly = PETSC_TRUE; 754 755 for (i=0; i<a->i[m]; i++) { 756 if (PetscImaginaryPart(a->a[i]) != 0.0) { 757 realonly = PETSC_FALSE; 758 break; 759 } 760 } 761 #endif 762 763 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 764 for (i=0; i<m; i++) { 765 jcnt = 0; 766 for (j=0; j<A->cmap->n; j++) { 767 if (jcnt < a->i[i+1]-a->i[i] && j == a->j[cnt]) { 768 value = a->a[cnt++]; 769 jcnt++; 770 } else { 771 value = 0.0; 772 } 773 #if defined(PETSC_USE_COMPLEX) 774 if (realonly) { 775 ierr = PetscViewerASCIIPrintf(viewer," %7.5e ",(double)PetscRealPart(value));CHKERRQ(ierr); 776 } else { 777 ierr = PetscViewerASCIIPrintf(viewer," %7.5e+%7.5e i ",(double)PetscRealPart(value),(double)PetscImaginaryPart(value));CHKERRQ(ierr); 778 } 779 #else 780 ierr = PetscViewerASCIIPrintf(viewer," %7.5e ",(double)value);CHKERRQ(ierr); 781 #endif 782 } 783 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 784 } 785 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 786 } else if (format == PETSC_VIEWER_ASCII_MATRIXMARKET) { 787 PetscInt fshift=1; 788 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 789 #if defined(PETSC_USE_COMPLEX) 790 ierr = PetscViewerASCIIPrintf(viewer,"%%%%MatrixMarket matrix coordinate complex general\n");CHKERRQ(ierr); 791 #else 792 ierr = PetscViewerASCIIPrintf(viewer,"%%%%MatrixMarket matrix coordinate real general\n");CHKERRQ(ierr); 793 #endif 794 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %D\n", m, A->cmap->n, a->nz);CHKERRQ(ierr); 795 for (i=0; i<m; i++) { 796 for (j=a->i[i]; j<a->i[i+1]; j++) { 797 #if defined(PETSC_USE_COMPLEX) 798 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %g %g\n", i+fshift,a->j[j]+fshift,(double)PetscRealPart(a->a[j]),(double)PetscImaginaryPart(a->a[j]));CHKERRQ(ierr); 799 #else 800 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %g\n", i+fshift, a->j[j]+fshift, (double)a->a[j]);CHKERRQ(ierr); 801 #endif 802 } 803 } 804 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 805 } else { 806 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 807 if (A->factortype) { 808 for (i=0; i<m; i++) { 809 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr); 810 /* L part */ 811 for (j=a->i[i]; j<a->i[i+1]; j++) { 812 #if defined(PETSC_USE_COMPLEX) 813 if (PetscImaginaryPart(a->a[j]) > 0.0) { 814 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->j[j],(double)PetscRealPart(a->a[j]),(double)PetscImaginaryPart(a->a[j]));CHKERRQ(ierr); 815 } else if (PetscImaginaryPart(a->a[j]) < 0.0) { 816 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->j[j],(double)PetscRealPart(a->a[j]),(double)(-PetscImaginaryPart(a->a[j])));CHKERRQ(ierr); 817 } else { 818 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->j[j],(double)PetscRealPart(a->a[j]));CHKERRQ(ierr); 819 } 820 #else 821 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->j[j],(double)a->a[j]);CHKERRQ(ierr); 822 #endif 823 } 824 /* diagonal */ 825 j = a->diag[i]; 826 #if defined(PETSC_USE_COMPLEX) 827 if (PetscImaginaryPart(a->a[j]) > 0.0) { 828 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->j[j],(double)PetscRealPart(1.0/a->a[j]),(double)PetscImaginaryPart(1.0/a->a[j]));CHKERRQ(ierr); 829 } else if (PetscImaginaryPart(a->a[j]) < 0.0) { 830 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->j[j],(double)PetscRealPart(1.0/a->a[j]),(double)(-PetscImaginaryPart(1.0/a->a[j])));CHKERRQ(ierr); 831 } else { 832 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->j[j],(double)PetscRealPart(1.0/a->a[j]));CHKERRQ(ierr); 833 } 834 #else 835 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->j[j],(double)(1.0/a->a[j]));CHKERRQ(ierr); 836 #endif 837 838 /* U part */ 839 for (j=a->diag[i+1]+1; j<a->diag[i]; j++) { 840 #if defined(PETSC_USE_COMPLEX) 841 if (PetscImaginaryPart(a->a[j]) > 0.0) { 842 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->j[j],(double)PetscRealPart(a->a[j]),(double)PetscImaginaryPart(a->a[j]));CHKERRQ(ierr); 843 } else if (PetscImaginaryPart(a->a[j]) < 0.0) { 844 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->j[j],(double)PetscRealPart(a->a[j]),(double)(-PetscImaginaryPart(a->a[j])));CHKERRQ(ierr); 845 } else { 846 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->j[j],(double)PetscRealPart(a->a[j]));CHKERRQ(ierr); 847 } 848 #else 849 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->j[j],(double)a->a[j]);CHKERRQ(ierr); 850 #endif 851 } 852 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 853 } 854 } else { 855 for (i=0; i<m; i++) { 856 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr); 857 for (j=a->i[i]; j<a->i[i+1]; j++) { 858 #if defined(PETSC_USE_COMPLEX) 859 if (PetscImaginaryPart(a->a[j]) > 0.0) { 860 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->j[j],(double)PetscRealPart(a->a[j]),(double)PetscImaginaryPart(a->a[j]));CHKERRQ(ierr); 861 } else if (PetscImaginaryPart(a->a[j]) < 0.0) { 862 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->j[j],(double)PetscRealPart(a->a[j]),(double)-PetscImaginaryPart(a->a[j]));CHKERRQ(ierr); 863 } else { 864 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->j[j],(double)PetscRealPart(a->a[j]));CHKERRQ(ierr); 865 } 866 #else 867 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->j[j],(double)a->a[j]);CHKERRQ(ierr); 868 #endif 869 } 870 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 871 } 872 } 873 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 874 } 875 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 876 PetscFunctionReturn(0); 877 } 878 879 #include <petscdraw.h> 880 PetscErrorCode MatView_SeqAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 881 { 882 Mat A = (Mat) Aa; 883 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 884 PetscErrorCode ierr; 885 PetscInt i,j,m = A->rmap->n; 886 int color; 887 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 888 PetscViewer viewer; 889 PetscViewerFormat format; 890 891 PetscFunctionBegin; 892 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 893 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 894 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 895 896 /* loop over matrix elements drawing boxes */ 897 898 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 899 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 900 /* Blue for negative, Cyan for zero and Red for positive */ 901 color = PETSC_DRAW_BLUE; 902 for (i=0; i<m; i++) { 903 y_l = m - i - 1.0; y_r = y_l + 1.0; 904 for (j=a->i[i]; j<a->i[i+1]; j++) { 905 x_l = a->j[j]; x_r = x_l + 1.0; 906 if (PetscRealPart(a->a[j]) >= 0.) continue; 907 ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr); 908 } 909 } 910 color = PETSC_DRAW_CYAN; 911 for (i=0; i<m; i++) { 912 y_l = m - i - 1.0; y_r = y_l + 1.0; 913 for (j=a->i[i]; j<a->i[i+1]; j++) { 914 x_l = a->j[j]; x_r = x_l + 1.0; 915 if (a->a[j] != 0.) continue; 916 ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr); 917 } 918 } 919 color = PETSC_DRAW_RED; 920 for (i=0; i<m; i++) { 921 y_l = m - i - 1.0; y_r = y_l + 1.0; 922 for (j=a->i[i]; j<a->i[i+1]; j++) { 923 x_l = a->j[j]; x_r = x_l + 1.0; 924 if (PetscRealPart(a->a[j]) <= 0.) continue; 925 ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr); 926 } 927 } 928 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 929 } else { 930 /* use contour shading to indicate magnitude of values */ 931 /* first determine max of all nonzero values */ 932 PetscReal minv = 0.0, maxv = 0.0; 933 PetscInt nz = a->nz, count = 0; 934 PetscDraw popup; 935 936 for (i=0; i<nz; i++) { 937 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 938 } 939 if (minv >= maxv) maxv = minv + PETSC_SMALL; 940 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 941 ierr = PetscDrawScalePopup(popup,minv,maxv);CHKERRQ(ierr); 942 943 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 944 for (i=0; i<m; i++) { 945 y_l = m - i - 1.0; 946 y_r = y_l + 1.0; 947 for (j=a->i[i]; j<a->i[i+1]; j++) { 948 x_l = a->j[j]; 949 x_r = x_l + 1.0; 950 color = PetscDrawRealToColor(PetscAbsScalar(a->a[count]),minv,maxv); 951 ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr); 952 count++; 953 } 954 } 955 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 956 } 957 PetscFunctionReturn(0); 958 } 959 960 #include <petscdraw.h> 961 PetscErrorCode MatView_SeqAIJ_Draw(Mat A,PetscViewer viewer) 962 { 963 PetscErrorCode ierr; 964 PetscDraw draw; 965 PetscReal xr,yr,xl,yl,h,w; 966 PetscBool isnull; 967 968 PetscFunctionBegin; 969 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 970 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 971 if (isnull) PetscFunctionReturn(0); 972 973 xr = A->cmap->n; yr = A->rmap->n; h = yr/10.0; w = xr/10.0; 974 xr += w; yr += h; xl = -w; yl = -h; 975 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 976 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 977 ierr = PetscDrawZoom(draw,MatView_SeqAIJ_Draw_Zoom,A);CHKERRQ(ierr); 978 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 979 ierr = PetscDrawSave(draw);CHKERRQ(ierr); 980 PetscFunctionReturn(0); 981 } 982 983 PetscErrorCode MatView_SeqAIJ(Mat A,PetscViewer viewer) 984 { 985 PetscErrorCode ierr; 986 PetscBool iascii,isbinary,isdraw; 987 988 PetscFunctionBegin; 989 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 990 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 991 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 992 if (iascii) { 993 ierr = MatView_SeqAIJ_ASCII(A,viewer);CHKERRQ(ierr); 994 } else if (isbinary) { 995 ierr = MatView_SeqAIJ_Binary(A,viewer);CHKERRQ(ierr); 996 } else if (isdraw) { 997 ierr = MatView_SeqAIJ_Draw(A,viewer);CHKERRQ(ierr); 998 } 999 ierr = MatView_SeqAIJ_Inode(A,viewer);CHKERRQ(ierr); 1000 PetscFunctionReturn(0); 1001 } 1002 1003 PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat A,MatAssemblyType mode) 1004 { 1005 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1006 PetscErrorCode ierr; 1007 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1008 PetscInt m = A->rmap->n,*ip,N,*ailen = a->ilen,rmax = 0; 1009 MatScalar *aa = a->a,*ap; 1010 PetscReal ratio = 0.6; 1011 1012 PetscFunctionBegin; 1013 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1014 1015 if (m) rmax = ailen[0]; /* determine row with most nonzeros */ 1016 for (i=1; i<m; i++) { 1017 /* move each row back by the amount of empty slots (fshift) before it*/ 1018 fshift += imax[i-1] - ailen[i-1]; 1019 rmax = PetscMax(rmax,ailen[i]); 1020 if (fshift) { 1021 ip = aj + ai[i]; 1022 ap = aa + ai[i]; 1023 N = ailen[i]; 1024 for (j=0; j<N; j++) { 1025 ip[j-fshift] = ip[j]; 1026 if (!A->structure_only) ap[j-fshift] = ap[j]; 1027 } 1028 } 1029 ai[i] = ai[i-1] + ailen[i-1]; 1030 } 1031 if (m) { 1032 fshift += imax[m-1] - ailen[m-1]; 1033 ai[m] = ai[m-1] + ailen[m-1]; 1034 } 1035 1036 /* reset ilen and imax for each row */ 1037 a->nonzerorowcnt = 0; 1038 if (A->structure_only) { 1039 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 1040 } else { /* !A->structure_only */ 1041 for (i=0; i<m; i++) { 1042 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1043 a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0); 1044 } 1045 } 1046 a->nz = ai[m]; 1047 if (fshift && a->nounused == -1) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D, %D unneeded", m, A->cmap->n, fshift); 1048 1049 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 1050 ierr = PetscInfo4(A,"Matrix size: %D X %D; storage space: %D unneeded,%D used\n",m,A->cmap->n,fshift,a->nz);CHKERRQ(ierr); 1051 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues() is %D\n",a->reallocs);CHKERRQ(ierr); 1052 ierr = PetscInfo1(A,"Maximum nonzeros in any row is %D\n",rmax);CHKERRQ(ierr); 1053 1054 A->info.mallocs += a->reallocs; 1055 a->reallocs = 0; 1056 A->info.nz_unneeded = (PetscReal)fshift; 1057 a->rmax = rmax; 1058 1059 if (!A->structure_only) { 1060 ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,m,ratio);CHKERRQ(ierr); 1061 } 1062 ierr = MatAssemblyEnd_SeqAIJ_Inode(A,mode);CHKERRQ(ierr); 1063 ierr = MatSeqAIJInvalidateDiagonal(A);CHKERRQ(ierr); 1064 PetscFunctionReturn(0); 1065 } 1066 1067 PetscErrorCode MatRealPart_SeqAIJ(Mat A) 1068 { 1069 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1070 PetscInt i,nz = a->nz; 1071 MatScalar *aa = a->a; 1072 PetscErrorCode ierr; 1073 1074 PetscFunctionBegin; 1075 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 1076 ierr = MatSeqAIJInvalidateDiagonal(A);CHKERRQ(ierr); 1077 PetscFunctionReturn(0); 1078 } 1079 1080 PetscErrorCode MatImaginaryPart_SeqAIJ(Mat A) 1081 { 1082 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1083 PetscInt i,nz = a->nz; 1084 MatScalar *aa = a->a; 1085 PetscErrorCode ierr; 1086 1087 PetscFunctionBegin; 1088 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 1089 ierr = MatSeqAIJInvalidateDiagonal(A);CHKERRQ(ierr); 1090 PetscFunctionReturn(0); 1091 } 1092 1093 PetscErrorCode MatZeroEntries_SeqAIJ(Mat A) 1094 { 1095 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1096 PetscErrorCode ierr; 1097 1098 PetscFunctionBegin; 1099 ierr = PetscMemzero(a->a,(a->i[A->rmap->n])*sizeof(PetscScalar));CHKERRQ(ierr); 1100 ierr = MatSeqAIJInvalidateDiagonal(A);CHKERRQ(ierr); 1101 PetscFunctionReturn(0); 1102 } 1103 1104 PetscErrorCode MatDestroy_SeqAIJ(Mat A) 1105 { 1106 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1107 PetscErrorCode ierr; 1108 1109 PetscFunctionBegin; 1110 #if defined(PETSC_USE_LOG) 1111 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->n,A->cmap->n,a->nz); 1112 #endif 1113 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1114 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1115 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1116 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1117 ierr = PetscFree(a->ibdiag);CHKERRQ(ierr); 1118 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 1119 ierr = PetscFree(a->ipre);CHKERRQ(ierr); 1120 ierr = PetscFree3(a->idiag,a->mdiag,a->ssor_work);CHKERRQ(ierr); 1121 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1122 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1123 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1124 ierr = ISColoringDestroy(&a->coloring);CHKERRQ(ierr); 1125 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1126 ierr = PetscFree(a->matmult_abdense);CHKERRQ(ierr); 1127 1128 ierr = MatDestroy_SeqAIJ_Inode(A);CHKERRQ(ierr); 1129 ierr = PetscFree(A->data);CHKERRQ(ierr); 1130 1131 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1132 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqAIJSetColumnIndices_C",NULL);CHKERRQ(ierr); 1133 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1134 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1135 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqaij_seqsbaij_C",NULL);CHKERRQ(ierr); 1136 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqaij_seqbaij_C",NULL);CHKERRQ(ierr); 1137 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqaij_seqaijperm_C",NULL);CHKERRQ(ierr); 1138 #if defined(PETSC_HAVE_ELEMENTAL) 1139 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqaij_elemental_C",NULL);CHKERRQ(ierr); 1140 #endif 1141 #if defined(PETSC_HAVE_HYPRE) 1142 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqaij_hypre_C",NULL);CHKERRQ(ierr); 1143 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMatMatMult_transpose_seqaij_seqaij_C",NULL);CHKERRQ(ierr); 1144 #endif 1145 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqaij_seqdense_C",NULL);CHKERRQ(ierr); 1146 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1147 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1148 ierr = PetscObjectComposeFunction((PetscObject)A,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1149 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1150 ierr = PetscObjectComposeFunction((PetscObject)A,"MatReorderForNonzeroDiagonal_C",NULL);CHKERRQ(ierr); 1151 PetscFunctionReturn(0); 1152 } 1153 1154 PetscErrorCode MatSetOption_SeqAIJ(Mat A,MatOption op,PetscBool flg) 1155 { 1156 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1157 PetscErrorCode ierr; 1158 1159 PetscFunctionBegin; 1160 switch (op) { 1161 case MAT_ROW_ORIENTED: 1162 a->roworiented = flg; 1163 break; 1164 case MAT_KEEP_NONZERO_PATTERN: 1165 a->keepnonzeropattern = flg; 1166 break; 1167 case MAT_NEW_NONZERO_LOCATIONS: 1168 a->nonew = (flg ? 0 : 1); 1169 break; 1170 case MAT_NEW_NONZERO_LOCATION_ERR: 1171 a->nonew = (flg ? -1 : 0); 1172 break; 1173 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1174 a->nonew = (flg ? -2 : 0); 1175 break; 1176 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1177 a->nounused = (flg ? -1 : 0); 1178 break; 1179 case MAT_IGNORE_ZERO_ENTRIES: 1180 a->ignorezeroentries = flg; 1181 break; 1182 case MAT_SPD: 1183 case MAT_SYMMETRIC: 1184 case MAT_STRUCTURALLY_SYMMETRIC: 1185 case MAT_HERMITIAN: 1186 case MAT_SYMMETRY_ETERNAL: 1187 case MAT_STRUCTURE_ONLY: 1188 /* These options are handled directly by MatSetOption() */ 1189 break; 1190 case MAT_NEW_DIAGONALS: 1191 case MAT_IGNORE_OFF_PROC_ENTRIES: 1192 case MAT_USE_HASH_TABLE: 1193 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1194 break; 1195 case MAT_USE_INODES: 1196 /* Not an error because MatSetOption_SeqAIJ_Inode handles this one */ 1197 break; 1198 case MAT_SUBMAT_SINGLEIS: 1199 A->submat_singleis = flg; 1200 break; 1201 default: 1202 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1203 } 1204 ierr = MatSetOption_SeqAIJ_Inode(A,op,flg);CHKERRQ(ierr); 1205 PetscFunctionReturn(0); 1206 } 1207 1208 PetscErrorCode MatGetDiagonal_SeqAIJ(Mat A,Vec v) 1209 { 1210 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1211 PetscErrorCode ierr; 1212 PetscInt i,j,n,*ai=a->i,*aj=a->j,nz; 1213 PetscScalar *aa=a->a,*x,zero=0.0; 1214 1215 PetscFunctionBegin; 1216 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1217 if (n != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1218 1219 if (A->factortype == MAT_FACTOR_ILU || A->factortype == MAT_FACTOR_LU) { 1220 PetscInt *diag=a->diag; 1221 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1222 for (i=0; i<n; i++) x[i] = 1.0/aa[diag[i]]; 1223 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1224 PetscFunctionReturn(0); 1225 } 1226 1227 ierr = VecSet(v,zero);CHKERRQ(ierr); 1228 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1229 for (i=0; i<n; i++) { 1230 nz = ai[i+1] - ai[i]; 1231 if (!nz) x[i] = 0.0; 1232 for (j=ai[i]; j<ai[i+1]; j++) { 1233 if (aj[j] == i) { 1234 x[i] = aa[j]; 1235 break; 1236 } 1237 } 1238 } 1239 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1240 PetscFunctionReturn(0); 1241 } 1242 1243 #include <../src/mat/impls/aij/seq/ftn-kernels/fmult.h> 1244 PetscErrorCode MatMultTransposeAdd_SeqAIJ(Mat A,Vec xx,Vec zz,Vec yy) 1245 { 1246 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1247 PetscScalar *y; 1248 const PetscScalar *x; 1249 PetscErrorCode ierr; 1250 PetscInt m = A->rmap->n; 1251 #if !defined(PETSC_USE_FORTRAN_KERNEL_MULTTRANSPOSEAIJ) 1252 const MatScalar *v; 1253 PetscScalar alpha; 1254 PetscInt n,i,j; 1255 const PetscInt *idx,*ii,*ridx=NULL; 1256 Mat_CompressedRow cprow = a->compressedrow; 1257 PetscBool usecprow = cprow.use; 1258 #endif 1259 1260 PetscFunctionBegin; 1261 if (zz != yy) {ierr = VecCopy(zz,yy);CHKERRQ(ierr);} 1262 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1263 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1264 1265 #if defined(PETSC_USE_FORTRAN_KERNEL_MULTTRANSPOSEAIJ) 1266 fortranmulttransposeaddaij_(&m,x,a->i,a->j,a->a,y); 1267 #else 1268 if (usecprow) { 1269 m = cprow.nrows; 1270 ii = cprow.i; 1271 ridx = cprow.rindex; 1272 } else { 1273 ii = a->i; 1274 } 1275 for (i=0; i<m; i++) { 1276 idx = a->j + ii[i]; 1277 v = a->a + ii[i]; 1278 n = ii[i+1] - ii[i]; 1279 if (usecprow) { 1280 alpha = x[ridx[i]]; 1281 } else { 1282 alpha = x[i]; 1283 } 1284 for (j=0; j<n; j++) y[idx[j]] += alpha*v[j]; 1285 } 1286 #endif 1287 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1288 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1289 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1290 PetscFunctionReturn(0); 1291 } 1292 1293 PetscErrorCode MatMultTranspose_SeqAIJ(Mat A,Vec xx,Vec yy) 1294 { 1295 PetscErrorCode ierr; 1296 1297 PetscFunctionBegin; 1298 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1299 ierr = MatMultTransposeAdd_SeqAIJ(A,xx,yy,yy);CHKERRQ(ierr); 1300 PetscFunctionReturn(0); 1301 } 1302 1303 #include <../src/mat/impls/aij/seq/ftn-kernels/fmult.h> 1304 1305 PetscErrorCode MatMult_SeqAIJ(Mat A,Vec xx,Vec yy) 1306 { 1307 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1308 PetscScalar *y; 1309 const PetscScalar *x; 1310 const MatScalar *aa; 1311 PetscErrorCode ierr; 1312 PetscInt m=A->rmap->n; 1313 const PetscInt *aj,*ii,*ridx=NULL; 1314 PetscInt n,i; 1315 PetscScalar sum; 1316 PetscBool usecprow=a->compressedrow.use; 1317 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 1318 __m512d vec_x,vec_y,vec_vals; 1319 __m256i vec_idx; 1320 __mmask8 mask; 1321 PetscInt j; 1322 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) 1323 __m128i vec_idx; 1324 __m256d vec_x,vec_y,vec_y2,vec_vals; 1325 PetscInt j; 1326 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) 1327 __m128d vec_x_tmp; 1328 __m256d vec_x,vec_y,vec_y2,vec_vals; 1329 PetscInt j; 1330 #endif 1331 1332 #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 1333 #pragma disjoint(*x,*y,*aa) 1334 #endif 1335 1336 PetscFunctionBegin; 1337 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1338 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1339 ii = a->i; 1340 if (usecprow) { /* use compressed row format */ 1341 ierr = PetscMemzero(y,m*sizeof(PetscScalar));CHKERRQ(ierr); 1342 m = a->compressedrow.nrows; 1343 ii = a->compressedrow.i; 1344 ridx = a->compressedrow.rindex; 1345 for (i=0; i<m; i++) { 1346 n = ii[i+1] - ii[i]; 1347 aj = a->j + ii[i]; 1348 aa = a->a + ii[i]; 1349 sum = 0.0; 1350 PetscSparseDensePlusDot(sum,x,aa,aj,n); 1351 /* for (j=0; j<n; j++) sum += (*aa++)*x[*aj++]; */ 1352 y[*ridx++] = sum; 1353 } 1354 } else { /* do not use compressed row format */ 1355 #if defined(PETSC_USE_FORTRAN_KERNEL_MULTAIJ) 1356 aj = a->j; 1357 aa = a->a; 1358 fortranmultaij_(&m,x,ii,aj,aa,y); 1359 #else 1360 for (i=0; i<m; i++) { 1361 n = ii[i+1] - ii[i]; 1362 aj = a->j + ii[i]; 1363 aa = a->a + ii[i]; 1364 sum = 0.0; 1365 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 1366 vec_y = _mm512_setzero_pd(); 1367 for (j=0; j<(n>>3); j++) { 1368 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 1369 aj += 8; aa += 8; 1370 } 1371 /* masked load does not work on KNL, it requires avx512vl */ 1372 if ((n&0x07)>2) { 1373 mask = (__mmask8)(0xff >> (8-(n&0x07))); 1374 vec_idx = _mm256_load_si256((__m256i const*)aj); 1375 vec_vals = _mm512_load_pd(aa); 1376 vec_x = _mm512_mask_i32gather_pd(vec_x,mask,vec_idx,x,_MM_SCALE_8); 1377 vec_y = _mm512_mask3_fmadd_pd(vec_x,vec_vals,vec_y,mask); 1378 } else if ((n&0x07)==2) { 1379 sum += aa[0]*x[aj[0]]; 1380 sum += aa[1]*x[aj[1]]; 1381 } else if ((n&0x07)==1) { 1382 sum += aa[0]*x[aj[0]]; 1383 } 1384 if (n>2) sum += _mm512_reduce_add_pd(vec_y); 1385 /* 1386 for(j=0;j<(n&0x07);j++) sum += aa[j]*x[aj[j]]; 1387 */ 1388 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) 1389 vec_y = _mm256_setzero_pd(); 1390 for (j=0; j<(n>>2); j++) { 1391 AVX2_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 1392 aj += 4; aa += 4; 1393 } 1394 vec_y2 = _mm256_add_pd(vec_y, _mm256_permute2f128_pd(vec_y,vec_y,0x1) ); 1395 _mm_store_sd(&sum, _mm_hadd_pd( _mm256_castpd256_pd128(vec_y2), _mm256_castpd256_pd128(vec_y2) ) ); 1396 if ((n&0x03)==3) { 1397 sum += aa[0]*x[aj[0]]; 1398 sum += aa[1]*x[aj[1]]; 1399 sum += aa[2]*x[aj[2]]; 1400 } else if ((n&0x03)==2) { 1401 sum += aa[0]*x[aj[0]]; 1402 sum += aa[1]*x[aj[1]]; 1403 } else if ((n&0x03)==1) { 1404 sum += aa[0]*x[aj[0]]; 1405 } 1406 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) 1407 vec_y = _mm256_setzero_pd(); 1408 for (j=0; j<(n>>2); j++) { 1409 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *aj++); 1410 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *aj++); 1411 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0); 1412 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *aj++); 1413 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *aj++); 1414 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1); 1415 vec_vals = _mm256_loadu_pd(aa); 1416 vec_y = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y); 1417 aa += 4; 1418 } 1419 vec_y2 = _mm256_add_pd(vec_y, _mm256_permute2f128_pd(vec_y,vec_y,0x1) ); 1420 _mm_store_sd(&sum, _mm_hadd_pd( _mm256_castpd256_pd128(vec_y2), _mm256_castpd256_pd128(vec_y2) ) ); 1421 if ((n&0x03)==3) { 1422 sum += aa[0]*x[aj[0]]; 1423 sum += aa[1]*x[aj[1]]; 1424 sum += aa[2]*x[aj[2]]; 1425 } else if ((n&0x03)==2) { 1426 sum += aa[0]*x[aj[0]]; 1427 sum += aa[1]*x[aj[1]]; 1428 } else if ((n&0x03)==1) { 1429 sum += aa[0]*x[aj[0]]; 1430 } 1431 #else 1432 PetscSparseDensePlusDot(sum,x,aa,aj,n); 1433 #endif 1434 y[i] = sum; 1435 } 1436 #endif 1437 } 1438 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 1439 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1440 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1441 PetscFunctionReturn(0); 1442 } 1443 1444 PetscErrorCode MatMultMax_SeqAIJ(Mat A,Vec xx,Vec yy) 1445 { 1446 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1447 PetscScalar *y; 1448 const PetscScalar *x; 1449 const MatScalar *aa; 1450 PetscErrorCode ierr; 1451 PetscInt m=A->rmap->n; 1452 const PetscInt *aj,*ii,*ridx=NULL; 1453 PetscInt n,i,nonzerorow=0; 1454 PetscScalar sum; 1455 PetscBool usecprow=a->compressedrow.use; 1456 1457 #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 1458 #pragma disjoint(*x,*y,*aa) 1459 #endif 1460 1461 PetscFunctionBegin; 1462 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1463 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1464 if (usecprow) { /* use compressed row format */ 1465 m = a->compressedrow.nrows; 1466 ii = a->compressedrow.i; 1467 ridx = a->compressedrow.rindex; 1468 for (i=0; i<m; i++) { 1469 n = ii[i+1] - ii[i]; 1470 aj = a->j + ii[i]; 1471 aa = a->a + ii[i]; 1472 sum = 0.0; 1473 nonzerorow += (n>0); 1474 PetscSparseDenseMaxDot(sum,x,aa,aj,n); 1475 /* for (j=0; j<n; j++) sum += (*aa++)*x[*aj++]; */ 1476 y[*ridx++] = sum; 1477 } 1478 } else { /* do not use compressed row format */ 1479 ii = a->i; 1480 for (i=0; i<m; i++) { 1481 n = ii[i+1] - ii[i]; 1482 aj = a->j + ii[i]; 1483 aa = a->a + ii[i]; 1484 sum = 0.0; 1485 nonzerorow += (n>0); 1486 PetscSparseDenseMaxDot(sum,x,aa,aj,n); 1487 y[i] = sum; 1488 } 1489 } 1490 ierr = PetscLogFlops(2.0*a->nz - nonzerorow);CHKERRQ(ierr); 1491 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1492 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1493 PetscFunctionReturn(0); 1494 } 1495 1496 PetscErrorCode MatMultAddMax_SeqAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1497 { 1498 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1499 PetscScalar *y,*z; 1500 const PetscScalar *x; 1501 const MatScalar *aa; 1502 PetscErrorCode ierr; 1503 PetscInt m = A->rmap->n,*aj,*ii; 1504 PetscInt n,i,*ridx=NULL; 1505 PetscScalar sum; 1506 PetscBool usecprow=a->compressedrow.use; 1507 1508 PetscFunctionBegin; 1509 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1510 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 1511 if (usecprow) { /* use compressed row format */ 1512 if (zz != yy) { 1513 ierr = PetscMemcpy(z,y,m*sizeof(PetscScalar));CHKERRQ(ierr); 1514 } 1515 m = a->compressedrow.nrows; 1516 ii = a->compressedrow.i; 1517 ridx = a->compressedrow.rindex; 1518 for (i=0; i<m; i++) { 1519 n = ii[i+1] - ii[i]; 1520 aj = a->j + ii[i]; 1521 aa = a->a + ii[i]; 1522 sum = y[*ridx]; 1523 PetscSparseDenseMaxDot(sum,x,aa,aj,n); 1524 z[*ridx++] = sum; 1525 } 1526 } else { /* do not use compressed row format */ 1527 ii = a->i; 1528 for (i=0; i<m; i++) { 1529 n = ii[i+1] - ii[i]; 1530 aj = a->j + ii[i]; 1531 aa = a->a + ii[i]; 1532 sum = y[i]; 1533 PetscSparseDenseMaxDot(sum,x,aa,aj,n); 1534 z[i] = sum; 1535 } 1536 } 1537 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1538 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1539 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 1540 PetscFunctionReturn(0); 1541 } 1542 1543 #include <../src/mat/impls/aij/seq/ftn-kernels/fmultadd.h> 1544 PetscErrorCode MatMultAdd_SeqAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1545 { 1546 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1547 PetscScalar *y,*z; 1548 const PetscScalar *x; 1549 const MatScalar *aa; 1550 PetscErrorCode ierr; 1551 const PetscInt *aj,*ii,*ridx=NULL; 1552 PetscInt m = A->rmap->n,n,i; 1553 PetscScalar sum; 1554 PetscBool usecprow=a->compressedrow.use; 1555 1556 PetscFunctionBegin; 1557 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1558 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 1559 if (usecprow) { /* use compressed row format */ 1560 if (zz != yy) { 1561 ierr = PetscMemcpy(z,y,m*sizeof(PetscScalar));CHKERRQ(ierr); 1562 } 1563 m = a->compressedrow.nrows; 1564 ii = a->compressedrow.i; 1565 ridx = a->compressedrow.rindex; 1566 for (i=0; i<m; i++) { 1567 n = ii[i+1] - ii[i]; 1568 aj = a->j + ii[i]; 1569 aa = a->a + ii[i]; 1570 sum = y[*ridx]; 1571 PetscSparseDensePlusDot(sum,x,aa,aj,n); 1572 z[*ridx++] = sum; 1573 } 1574 } else { /* do not use compressed row format */ 1575 ii = a->i; 1576 #if defined(PETSC_USE_FORTRAN_KERNEL_MULTADDAIJ) 1577 aj = a->j; 1578 aa = a->a; 1579 fortranmultaddaij_(&m,x,ii,aj,aa,y,z); 1580 #else 1581 for (i=0; i<m; i++) { 1582 n = ii[i+1] - ii[i]; 1583 aj = a->j + ii[i]; 1584 aa = a->a + ii[i]; 1585 sum = y[i]; 1586 PetscSparseDensePlusDot(sum,x,aa,aj,n); 1587 z[i] = sum; 1588 } 1589 #endif 1590 } 1591 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1592 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1593 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 1594 #if defined(PETSC_HAVE_CUSP) 1595 /* 1596 ierr = VecView(xx,0);CHKERRQ(ierr); 1597 ierr = VecView(zz,0);CHKERRQ(ierr); 1598 ierr = MatView(A,0);CHKERRQ(ierr); 1599 */ 1600 #endif 1601 PetscFunctionReturn(0); 1602 } 1603 1604 /* 1605 Adds diagonal pointers to sparse matrix structure. 1606 */ 1607 PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat A) 1608 { 1609 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1610 PetscErrorCode ierr; 1611 PetscInt i,j,m = A->rmap->n; 1612 1613 PetscFunctionBegin; 1614 if (!a->diag) { 1615 ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr); 1616 ierr = PetscLogObjectMemory((PetscObject)A, m*sizeof(PetscInt));CHKERRQ(ierr); 1617 } 1618 for (i=0; i<A->rmap->n; i++) { 1619 a->diag[i] = a->i[i+1]; 1620 for (j=a->i[i]; j<a->i[i+1]; j++) { 1621 if (a->j[j] == i) { 1622 a->diag[i] = j; 1623 break; 1624 } 1625 } 1626 } 1627 PetscFunctionReturn(0); 1628 } 1629 1630 /* 1631 Checks for missing diagonals 1632 */ 1633 PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat A,PetscBool *missing,PetscInt *d) 1634 { 1635 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1636 PetscInt *diag,*ii = a->i,i; 1637 1638 PetscFunctionBegin; 1639 *missing = PETSC_FALSE; 1640 if (A->rmap->n > 0 && !ii) { 1641 *missing = PETSC_TRUE; 1642 if (d) *d = 0; 1643 PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n"); 1644 } else { 1645 diag = a->diag; 1646 for (i=0; i<A->rmap->n; i++) { 1647 if (diag[i] >= ii[i+1]) { 1648 *missing = PETSC_TRUE; 1649 if (d) *d = i; 1650 PetscInfo1(A,"Matrix is missing diagonal number %D\n",i); 1651 break; 1652 } 1653 } 1654 } 1655 PetscFunctionReturn(0); 1656 } 1657 1658 /* 1659 Negative shift indicates do not generate an error if there is a zero diagonal, just invert it anyways 1660 */ 1661 PetscErrorCode MatInvertDiagonal_SeqAIJ(Mat A,PetscScalar omega,PetscScalar fshift) 1662 { 1663 Mat_SeqAIJ *a = (Mat_SeqAIJ*) A->data; 1664 PetscErrorCode ierr; 1665 PetscInt i,*diag,m = A->rmap->n; 1666 MatScalar *v = a->a; 1667 PetscScalar *idiag,*mdiag; 1668 1669 PetscFunctionBegin; 1670 if (a->idiagvalid) PetscFunctionReturn(0); 1671 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 1672 diag = a->diag; 1673 if (!a->idiag) { 1674 ierr = PetscMalloc3(m,&a->idiag,m,&a->mdiag,m,&a->ssor_work);CHKERRQ(ierr); 1675 ierr = PetscLogObjectMemory((PetscObject)A, 3*m*sizeof(PetscScalar));CHKERRQ(ierr); 1676 v = a->a; 1677 } 1678 mdiag = a->mdiag; 1679 idiag = a->idiag; 1680 1681 if (omega == 1.0 && PetscRealPart(fshift) <= 0.0) { 1682 for (i=0; i<m; i++) { 1683 mdiag[i] = v[diag[i]]; 1684 if (!PetscAbsScalar(mdiag[i])) { /* zero diagonal */ 1685 if (PetscRealPart(fshift)) { 1686 ierr = PetscInfo1(A,"Zero diagonal on row %D\n",i);CHKERRQ(ierr); 1687 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1688 A->factorerror_zeropivot_value = 0.0; 1689 A->factorerror_zeropivot_row = i; 1690 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Zero diagonal on row %D",i); 1691 } 1692 idiag[i] = 1.0/v[diag[i]]; 1693 } 1694 ierr = PetscLogFlops(m);CHKERRQ(ierr); 1695 } else { 1696 for (i=0; i<m; i++) { 1697 mdiag[i] = v[diag[i]]; 1698 idiag[i] = omega/(fshift + v[diag[i]]); 1699 } 1700 ierr = PetscLogFlops(2.0*m);CHKERRQ(ierr); 1701 } 1702 a->idiagvalid = PETSC_TRUE; 1703 PetscFunctionReturn(0); 1704 } 1705 1706 #include <../src/mat/impls/aij/seq/ftn-kernels/frelax.h> 1707 PetscErrorCode MatSOR_SeqAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1708 { 1709 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1710 PetscScalar *x,d,sum,*t,scale; 1711 const MatScalar *v,*idiag=0,*mdiag; 1712 const PetscScalar *b, *bs,*xb, *ts; 1713 PetscErrorCode ierr; 1714 PetscInt n,m = A->rmap->n,i; 1715 const PetscInt *idx,*diag; 1716 1717 PetscFunctionBegin; 1718 its = its*lits; 1719 1720 if (fshift != a->fshift || omega != a->omega) a->idiagvalid = PETSC_FALSE; /* must recompute idiag[] */ 1721 if (!a->idiagvalid) {ierr = MatInvertDiagonal_SeqAIJ(A,omega,fshift);CHKERRQ(ierr);} 1722 a->fshift = fshift; 1723 a->omega = omega; 1724 1725 diag = a->diag; 1726 t = a->ssor_work; 1727 idiag = a->idiag; 1728 mdiag = a->mdiag; 1729 1730 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1731 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1732 /* We count flops by assuming the upper triangular and lower triangular parts have the same number of nonzeros */ 1733 if (flag == SOR_APPLY_UPPER) { 1734 /* apply (U + D/omega) to the vector */ 1735 bs = b; 1736 for (i=0; i<m; i++) { 1737 d = fshift + mdiag[i]; 1738 n = a->i[i+1] - diag[i] - 1; 1739 idx = a->j + diag[i] + 1; 1740 v = a->a + diag[i] + 1; 1741 sum = b[i]*d/omega; 1742 PetscSparseDensePlusDot(sum,bs,v,idx,n); 1743 x[i] = sum; 1744 } 1745 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1746 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1747 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 1748 PetscFunctionReturn(0); 1749 } 1750 1751 if (flag == SOR_APPLY_LOWER) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SOR_APPLY_LOWER is not implemented"); 1752 else if (flag & SOR_EISENSTAT) { 1753 /* Let A = L + U + D; where L is lower trianglar, 1754 U is upper triangular, E = D/omega; This routine applies 1755 1756 (L + E)^{-1} A (U + E)^{-1} 1757 1758 to a vector efficiently using Eisenstat's trick. 1759 */ 1760 scale = (2.0/omega) - 1.0; 1761 1762 /* x = (E + U)^{-1} b */ 1763 for (i=m-1; i>=0; i--) { 1764 n = a->i[i+1] - diag[i] - 1; 1765 idx = a->j + diag[i] + 1; 1766 v = a->a + diag[i] + 1; 1767 sum = b[i]; 1768 PetscSparseDenseMinusDot(sum,x,v,idx,n); 1769 x[i] = sum*idiag[i]; 1770 } 1771 1772 /* t = b - (2*E - D)x */ 1773 v = a->a; 1774 for (i=0; i<m; i++) t[i] = b[i] - scale*(v[*diag++])*x[i]; 1775 1776 /* t = (E + L)^{-1}t */ 1777 ts = t; 1778 diag = a->diag; 1779 for (i=0; i<m; i++) { 1780 n = diag[i] - a->i[i]; 1781 idx = a->j + a->i[i]; 1782 v = a->a + a->i[i]; 1783 sum = t[i]; 1784 PetscSparseDenseMinusDot(sum,ts,v,idx,n); 1785 t[i] = sum*idiag[i]; 1786 /* x = x + t */ 1787 x[i] += t[i]; 1788 } 1789 1790 ierr = PetscLogFlops(6.0*m-1 + 2.0*a->nz);CHKERRQ(ierr); 1791 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1792 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1793 PetscFunctionReturn(0); 1794 } 1795 if (flag & SOR_ZERO_INITIAL_GUESS) { 1796 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 1797 for (i=0; i<m; i++) { 1798 n = diag[i] - a->i[i]; 1799 idx = a->j + a->i[i]; 1800 v = a->a + a->i[i]; 1801 sum = b[i]; 1802 PetscSparseDenseMinusDot(sum,x,v,idx,n); 1803 t[i] = sum; 1804 x[i] = sum*idiag[i]; 1805 } 1806 xb = t; 1807 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 1808 } else xb = b; 1809 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1810 for (i=m-1; i>=0; i--) { 1811 n = a->i[i+1] - diag[i] - 1; 1812 idx = a->j + diag[i] + 1; 1813 v = a->a + diag[i] + 1; 1814 sum = xb[i]; 1815 PetscSparseDenseMinusDot(sum,x,v,idx,n); 1816 if (xb == b) { 1817 x[i] = sum*idiag[i]; 1818 } else { 1819 x[i] = (1-omega)*x[i] + sum*idiag[i]; /* omega in idiag */ 1820 } 1821 } 1822 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); /* assumes 1/2 in upper */ 1823 } 1824 its--; 1825 } 1826 while (its--) { 1827 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 1828 for (i=0; i<m; i++) { 1829 /* lower */ 1830 n = diag[i] - a->i[i]; 1831 idx = a->j + a->i[i]; 1832 v = a->a + a->i[i]; 1833 sum = b[i]; 1834 PetscSparseDenseMinusDot(sum,x,v,idx,n); 1835 t[i] = sum; /* save application of the lower-triangular part */ 1836 /* upper */ 1837 n = a->i[i+1] - diag[i] - 1; 1838 idx = a->j + diag[i] + 1; 1839 v = a->a + diag[i] + 1; 1840 PetscSparseDenseMinusDot(sum,x,v,idx,n); 1841 x[i] = (1. - omega)*x[i] + sum*idiag[i]; /* omega in idiag */ 1842 } 1843 xb = t; 1844 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1845 } else xb = b; 1846 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1847 for (i=m-1; i>=0; i--) { 1848 sum = xb[i]; 1849 if (xb == b) { 1850 /* whole matrix (no checkpointing available) */ 1851 n = a->i[i+1] - a->i[i]; 1852 idx = a->j + a->i[i]; 1853 v = a->a + a->i[i]; 1854 PetscSparseDenseMinusDot(sum,x,v,idx,n); 1855 x[i] = (1. - omega)*x[i] + (sum + mdiag[i]*x[i])*idiag[i]; 1856 } else { /* lower-triangular part has been saved, so only apply upper-triangular */ 1857 n = a->i[i+1] - diag[i] - 1; 1858 idx = a->j + diag[i] + 1; 1859 v = a->a + diag[i] + 1; 1860 PetscSparseDenseMinusDot(sum,x,v,idx,n); 1861 x[i] = (1. - omega)*x[i] + sum*idiag[i]; /* omega in idiag */ 1862 } 1863 } 1864 if (xb == b) { 1865 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1866 } else { 1867 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); /* assumes 1/2 in upper */ 1868 } 1869 } 1870 } 1871 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1872 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1873 PetscFunctionReturn(0); 1874 } 1875 1876 1877 PetscErrorCode MatGetInfo_SeqAIJ(Mat A,MatInfoType flag,MatInfo *info) 1878 { 1879 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1880 1881 PetscFunctionBegin; 1882 info->block_size = 1.0; 1883 info->nz_allocated = (double)a->maxnz; 1884 info->nz_used = (double)a->nz; 1885 info->nz_unneeded = (double)(a->maxnz - a->nz); 1886 info->assemblies = (double)A->num_ass; 1887 info->mallocs = (double)A->info.mallocs; 1888 info->memory = ((PetscObject)A)->mem; 1889 if (A->factortype) { 1890 info->fill_ratio_given = A->info.fill_ratio_given; 1891 info->fill_ratio_needed = A->info.fill_ratio_needed; 1892 info->factor_mallocs = A->info.factor_mallocs; 1893 } else { 1894 info->fill_ratio_given = 0; 1895 info->fill_ratio_needed = 0; 1896 info->factor_mallocs = 0; 1897 } 1898 PetscFunctionReturn(0); 1899 } 1900 1901 PetscErrorCode MatZeroRows_SeqAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1902 { 1903 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1904 PetscInt i,m = A->rmap->n - 1; 1905 PetscErrorCode ierr; 1906 const PetscScalar *xx; 1907 PetscScalar *bb; 1908 PetscInt d = 0; 1909 1910 PetscFunctionBegin; 1911 if (x && b) { 1912 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 1913 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1914 for (i=0; i<N; i++) { 1915 if (rows[i] < 0 || rows[i] > m) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range", rows[i]); 1916 bb[rows[i]] = diag*xx[rows[i]]; 1917 } 1918 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 1919 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1920 } 1921 1922 if (a->keepnonzeropattern) { 1923 for (i=0; i<N; i++) { 1924 if (rows[i] < 0 || rows[i] > m) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range", rows[i]); 1925 ierr = PetscMemzero(&a->a[a->i[rows[i]]],a->ilen[rows[i]]*sizeof(PetscScalar));CHKERRQ(ierr); 1926 } 1927 if (diag != 0.0) { 1928 for (i=0; i<N; i++) { 1929 d = rows[i]; 1930 if (a->diag[d] >= a->i[d+1]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry in the zeroed row %D",d); 1931 } 1932 for (i=0; i<N; i++) { 1933 a->a[a->diag[rows[i]]] = diag; 1934 } 1935 } 1936 } else { 1937 if (diag != 0.0) { 1938 for (i=0; i<N; i++) { 1939 if (rows[i] < 0 || rows[i] > m) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range", rows[i]); 1940 if (a->ilen[rows[i]] > 0) { 1941 a->ilen[rows[i]] = 1; 1942 a->a[a->i[rows[i]]] = diag; 1943 a->j[a->i[rows[i]]] = rows[i]; 1944 } else { /* in case row was completely empty */ 1945 ierr = MatSetValues_SeqAIJ(A,1,&rows[i],1,&rows[i],&diag,INSERT_VALUES);CHKERRQ(ierr); 1946 } 1947 } 1948 } else { 1949 for (i=0; i<N; i++) { 1950 if (rows[i] < 0 || rows[i] > m) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range", rows[i]); 1951 a->ilen[rows[i]] = 0; 1952 } 1953 } 1954 A->nonzerostate++; 1955 } 1956 ierr = (*A->ops->assemblyend)(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1957 PetscFunctionReturn(0); 1958 } 1959 1960 PetscErrorCode MatZeroRowsColumns_SeqAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1961 { 1962 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1963 PetscInt i,j,m = A->rmap->n - 1,d = 0; 1964 PetscErrorCode ierr; 1965 PetscBool missing,*zeroed,vecs = PETSC_FALSE; 1966 const PetscScalar *xx; 1967 PetscScalar *bb; 1968 1969 PetscFunctionBegin; 1970 if (x && b) { 1971 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 1972 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1973 vecs = PETSC_TRUE; 1974 } 1975 ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr); 1976 for (i=0; i<N; i++) { 1977 if (rows[i] < 0 || rows[i] > m) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range", rows[i]); 1978 ierr = PetscMemzero(&a->a[a->i[rows[i]]],a->ilen[rows[i]]*sizeof(PetscScalar));CHKERRQ(ierr); 1979 1980 zeroed[rows[i]] = PETSC_TRUE; 1981 } 1982 for (i=0; i<A->rmap->n; i++) { 1983 if (!zeroed[i]) { 1984 for (j=a->i[i]; j<a->i[i+1]; j++) { 1985 if (zeroed[a->j[j]]) { 1986 if (vecs) bb[i] -= a->a[j]*xx[a->j[j]]; 1987 a->a[j] = 0.0; 1988 } 1989 } 1990 } else if (vecs) bb[i] = diag*xx[i]; 1991 } 1992 if (x && b) { 1993 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 1994 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1995 } 1996 ierr = PetscFree(zeroed);CHKERRQ(ierr); 1997 if (diag != 0.0) { 1998 ierr = MatMissingDiagonal_SeqAIJ(A,&missing,&d);CHKERRQ(ierr); 1999 if (missing) { 2000 if (a->nonew) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry in row %D",d); 2001 else { 2002 for (i=0; i<N; i++) { 2003 ierr = MatSetValues_SeqAIJ(A,1,&rows[i],1,&rows[i],&diag,INSERT_VALUES);CHKERRQ(ierr); 2004 } 2005 } 2006 } else { 2007 for (i=0; i<N; i++) { 2008 a->a[a->diag[rows[i]]] = diag; 2009 } 2010 } 2011 } 2012 ierr = (*A->ops->assemblyend)(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2013 PetscFunctionReturn(0); 2014 } 2015 2016 PetscErrorCode MatGetRow_SeqAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 2017 { 2018 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2019 PetscInt *itmp; 2020 2021 PetscFunctionBegin; 2022 if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row); 2023 2024 *nz = a->i[row+1] - a->i[row]; 2025 if (v) *v = a->a + a->i[row]; 2026 if (idx) { 2027 itmp = a->j + a->i[row]; 2028 if (*nz) *idx = itmp; 2029 else *idx = 0; 2030 } 2031 PetscFunctionReturn(0); 2032 } 2033 2034 /* remove this function? */ 2035 PetscErrorCode MatRestoreRow_SeqAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 2036 { 2037 PetscFunctionBegin; 2038 PetscFunctionReturn(0); 2039 } 2040 2041 PetscErrorCode MatNorm_SeqAIJ(Mat A,NormType type,PetscReal *nrm) 2042 { 2043 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2044 MatScalar *v = a->a; 2045 PetscReal sum = 0.0; 2046 PetscErrorCode ierr; 2047 PetscInt i,j; 2048 2049 PetscFunctionBegin; 2050 if (type == NORM_FROBENIUS) { 2051 #if defined(PETSC_USE_REAL___FP16) 2052 PetscBLASInt one = 1,nz = a->nz; 2053 *nrm = BLASnrm2_(&nz,v,&one); 2054 #else 2055 for (i=0; i<a->nz; i++) { 2056 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2057 } 2058 *nrm = PetscSqrtReal(sum); 2059 #endif 2060 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 2061 } else if (type == NORM_1) { 2062 PetscReal *tmp; 2063 PetscInt *jj = a->j; 2064 ierr = PetscCalloc1(A->cmap->n+1,&tmp);CHKERRQ(ierr); 2065 *nrm = 0.0; 2066 for (j=0; j<a->nz; j++) { 2067 tmp[*jj++] += PetscAbsScalar(*v); v++; 2068 } 2069 for (j=0; j<A->cmap->n; j++) { 2070 if (tmp[j] > *nrm) *nrm = tmp[j]; 2071 } 2072 ierr = PetscFree(tmp);CHKERRQ(ierr); 2073 ierr = PetscLogFlops(PetscMax(a->nz-1,0));CHKERRQ(ierr); 2074 } else if (type == NORM_INFINITY) { 2075 *nrm = 0.0; 2076 for (j=0; j<A->rmap->n; j++) { 2077 v = a->a + a->i[j]; 2078 sum = 0.0; 2079 for (i=0; i<a->i[j+1]-a->i[j]; i++) { 2080 sum += PetscAbsScalar(*v); v++; 2081 } 2082 if (sum > *nrm) *nrm = sum; 2083 } 2084 ierr = PetscLogFlops(PetscMax(a->nz-1,0));CHKERRQ(ierr); 2085 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for two norm"); 2086 PetscFunctionReturn(0); 2087 } 2088 2089 /* Merged from MatGetSymbolicTranspose_SeqAIJ() - replace MatGetSymbolicTranspose_SeqAIJ()? */ 2090 PetscErrorCode MatTransposeSymbolic_SeqAIJ(Mat A,Mat *B) 2091 { 2092 PetscErrorCode ierr; 2093 PetscInt i,j,anzj; 2094 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b; 2095 PetscInt an=A->cmap->N,am=A->rmap->N; 2096 PetscInt *ati,*atj,*atfill,*ai=a->i,*aj=a->j; 2097 2098 PetscFunctionBegin; 2099 /* Allocate space for symbolic transpose info and work array */ 2100 ierr = PetscCalloc1(an+1,&ati);CHKERRQ(ierr); 2101 ierr = PetscMalloc1(ai[am],&atj);CHKERRQ(ierr); 2102 ierr = PetscMalloc1(an,&atfill);CHKERRQ(ierr); 2103 2104 /* Walk through aj and count ## of non-zeros in each row of A^T. */ 2105 /* Note: offset by 1 for fast conversion into csr format. */ 2106 for (i=0;i<ai[am];i++) ati[aj[i]+1] += 1; 2107 /* Form ati for csr format of A^T. */ 2108 for (i=0;i<an;i++) ati[i+1] += ati[i]; 2109 2110 /* Copy ati into atfill so we have locations of the next free space in atj */ 2111 ierr = PetscMemcpy(atfill,ati,an*sizeof(PetscInt));CHKERRQ(ierr); 2112 2113 /* Walk through A row-wise and mark nonzero entries of A^T. */ 2114 for (i=0;i<am;i++) { 2115 anzj = ai[i+1] - ai[i]; 2116 for (j=0;j<anzj;j++) { 2117 atj[atfill[*aj]] = i; 2118 atfill[*aj++] += 1; 2119 } 2120 } 2121 2122 /* Clean up temporary space and complete requests. */ 2123 ierr = PetscFree(atfill);CHKERRQ(ierr); 2124 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),an,am,ati,atj,NULL,B);CHKERRQ(ierr); 2125 ierr = MatSetBlockSizes(*B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2126 2127 b = (Mat_SeqAIJ*)((*B)->data); 2128 b->free_a = PETSC_FALSE; 2129 b->free_ij = PETSC_TRUE; 2130 b->nonew = 0; 2131 PetscFunctionReturn(0); 2132 } 2133 2134 PetscErrorCode MatTranspose_SeqAIJ(Mat A,MatReuse reuse,Mat *B) 2135 { 2136 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2137 Mat C; 2138 PetscErrorCode ierr; 2139 PetscInt i,*aj = a->j,*ai = a->i,m = A->rmap->n,len,*col; 2140 MatScalar *array = a->a; 2141 2142 PetscFunctionBegin; 2143 if (reuse == MAT_INPLACE_MATRIX && m != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2144 2145 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 2146 ierr = PetscCalloc1(1+A->cmap->n,&col);CHKERRQ(ierr); 2147 2148 for (i=0; i<ai[m]; i++) col[aj[i]] += 1; 2149 ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr); 2150 ierr = MatSetSizes(C,A->cmap->n,m,A->cmap->n,m);CHKERRQ(ierr); 2151 ierr = MatSetBlockSizes(C,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2152 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 2153 ierr = MatSeqAIJSetPreallocation_SeqAIJ(C,0,col);CHKERRQ(ierr); 2154 ierr = PetscFree(col);CHKERRQ(ierr); 2155 } else { 2156 C = *B; 2157 } 2158 2159 for (i=0; i<m; i++) { 2160 len = ai[i+1]-ai[i]; 2161 ierr = MatSetValues_SeqAIJ(C,len,aj,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 2162 array += len; 2163 aj += len; 2164 } 2165 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2166 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2167 2168 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 2169 *B = C; 2170 } else { 2171 ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr); 2172 } 2173 PetscFunctionReturn(0); 2174 } 2175 2176 PetscErrorCode MatIsTranspose_SeqAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 2177 { 2178 Mat_SeqAIJ *aij = (Mat_SeqAIJ*) A->data,*bij = (Mat_SeqAIJ*) B->data; 2179 PetscInt *adx,*bdx,*aii,*bii,*aptr,*bptr; 2180 MatScalar *va,*vb; 2181 PetscErrorCode ierr; 2182 PetscInt ma,na,mb,nb, i; 2183 2184 PetscFunctionBegin; 2185 ierr = MatGetSize(A,&ma,&na);CHKERRQ(ierr); 2186 ierr = MatGetSize(B,&mb,&nb);CHKERRQ(ierr); 2187 if (ma!=nb || na!=mb) { 2188 *f = PETSC_FALSE; 2189 PetscFunctionReturn(0); 2190 } 2191 aii = aij->i; bii = bij->i; 2192 adx = aij->j; bdx = bij->j; 2193 va = aij->a; vb = bij->a; 2194 ierr = PetscMalloc1(ma,&aptr);CHKERRQ(ierr); 2195 ierr = PetscMalloc1(mb,&bptr);CHKERRQ(ierr); 2196 for (i=0; i<ma; i++) aptr[i] = aii[i]; 2197 for (i=0; i<mb; i++) bptr[i] = bii[i]; 2198 2199 *f = PETSC_TRUE; 2200 for (i=0; i<ma; i++) { 2201 while (aptr[i]<aii[i+1]) { 2202 PetscInt idc,idr; 2203 PetscScalar vc,vr; 2204 /* column/row index/value */ 2205 idc = adx[aptr[i]]; 2206 idr = bdx[bptr[idc]]; 2207 vc = va[aptr[i]]; 2208 vr = vb[bptr[idc]]; 2209 if (i!=idr || PetscAbsScalar(vc-vr) > tol) { 2210 *f = PETSC_FALSE; 2211 goto done; 2212 } else { 2213 aptr[i]++; 2214 if (B || i!=idc) bptr[idc]++; 2215 } 2216 } 2217 } 2218 done: 2219 ierr = PetscFree(aptr);CHKERRQ(ierr); 2220 ierr = PetscFree(bptr);CHKERRQ(ierr); 2221 PetscFunctionReturn(0); 2222 } 2223 2224 PetscErrorCode MatIsHermitianTranspose_SeqAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 2225 { 2226 Mat_SeqAIJ *aij = (Mat_SeqAIJ*) A->data,*bij = (Mat_SeqAIJ*) B->data; 2227 PetscInt *adx,*bdx,*aii,*bii,*aptr,*bptr; 2228 MatScalar *va,*vb; 2229 PetscErrorCode ierr; 2230 PetscInt ma,na,mb,nb, i; 2231 2232 PetscFunctionBegin; 2233 ierr = MatGetSize(A,&ma,&na);CHKERRQ(ierr); 2234 ierr = MatGetSize(B,&mb,&nb);CHKERRQ(ierr); 2235 if (ma!=nb || na!=mb) { 2236 *f = PETSC_FALSE; 2237 PetscFunctionReturn(0); 2238 } 2239 aii = aij->i; bii = bij->i; 2240 adx = aij->j; bdx = bij->j; 2241 va = aij->a; vb = bij->a; 2242 ierr = PetscMalloc1(ma,&aptr);CHKERRQ(ierr); 2243 ierr = PetscMalloc1(mb,&bptr);CHKERRQ(ierr); 2244 for (i=0; i<ma; i++) aptr[i] = aii[i]; 2245 for (i=0; i<mb; i++) bptr[i] = bii[i]; 2246 2247 *f = PETSC_TRUE; 2248 for (i=0; i<ma; i++) { 2249 while (aptr[i]<aii[i+1]) { 2250 PetscInt idc,idr; 2251 PetscScalar vc,vr; 2252 /* column/row index/value */ 2253 idc = adx[aptr[i]]; 2254 idr = bdx[bptr[idc]]; 2255 vc = va[aptr[i]]; 2256 vr = vb[bptr[idc]]; 2257 if (i!=idr || PetscAbsScalar(vc-PetscConj(vr)) > tol) { 2258 *f = PETSC_FALSE; 2259 goto done; 2260 } else { 2261 aptr[i]++; 2262 if (B || i!=idc) bptr[idc]++; 2263 } 2264 } 2265 } 2266 done: 2267 ierr = PetscFree(aptr);CHKERRQ(ierr); 2268 ierr = PetscFree(bptr);CHKERRQ(ierr); 2269 PetscFunctionReturn(0); 2270 } 2271 2272 PetscErrorCode MatIsSymmetric_SeqAIJ(Mat A,PetscReal tol,PetscBool *f) 2273 { 2274 PetscErrorCode ierr; 2275 2276 PetscFunctionBegin; 2277 ierr = MatIsTranspose_SeqAIJ(A,A,tol,f);CHKERRQ(ierr); 2278 PetscFunctionReturn(0); 2279 } 2280 2281 PetscErrorCode MatIsHermitian_SeqAIJ(Mat A,PetscReal tol,PetscBool *f) 2282 { 2283 PetscErrorCode ierr; 2284 2285 PetscFunctionBegin; 2286 ierr = MatIsHermitianTranspose_SeqAIJ(A,A,tol,f);CHKERRQ(ierr); 2287 PetscFunctionReturn(0); 2288 } 2289 2290 PetscErrorCode MatDiagonalScale_SeqAIJ(Mat A,Vec ll,Vec rr) 2291 { 2292 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2293 const PetscScalar *l,*r; 2294 PetscScalar x; 2295 MatScalar *v; 2296 PetscErrorCode ierr; 2297 PetscInt i,j,m = A->rmap->n,n = A->cmap->n,M,nz = a->nz; 2298 const PetscInt *jj; 2299 2300 PetscFunctionBegin; 2301 if (ll) { 2302 /* The local size is used so that VecMPI can be passed to this routine 2303 by MatDiagonalScale_MPIAIJ */ 2304 ierr = VecGetLocalSize(ll,&m);CHKERRQ(ierr); 2305 if (m != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length"); 2306 ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr); 2307 v = a->a; 2308 for (i=0; i<m; i++) { 2309 x = l[i]; 2310 M = a->i[i+1] - a->i[i]; 2311 for (j=0; j<M; j++) (*v++) *= x; 2312 } 2313 ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr); 2314 ierr = PetscLogFlops(nz);CHKERRQ(ierr); 2315 } 2316 if (rr) { 2317 ierr = VecGetLocalSize(rr,&n);CHKERRQ(ierr); 2318 if (n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length"); 2319 ierr = VecGetArrayRead(rr,&r);CHKERRQ(ierr); 2320 v = a->a; jj = a->j; 2321 for (i=0; i<nz; i++) (*v++) *= r[*jj++]; 2322 ierr = VecRestoreArrayRead(rr,&r);CHKERRQ(ierr); 2323 ierr = PetscLogFlops(nz);CHKERRQ(ierr); 2324 } 2325 ierr = MatSeqAIJInvalidateDiagonal(A);CHKERRQ(ierr); 2326 PetscFunctionReturn(0); 2327 } 2328 2329 PetscErrorCode MatCreateSubMatrix_SeqAIJ(Mat A,IS isrow,IS iscol,PetscInt csize,MatReuse scall,Mat *B) 2330 { 2331 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*c; 2332 PetscErrorCode ierr; 2333 PetscInt *smap,i,k,kstart,kend,oldcols = A->cmap->n,*lens; 2334 PetscInt row,mat_i,*mat_j,tcol,first,step,*mat_ilen,sum,lensi; 2335 const PetscInt *irow,*icol; 2336 PetscInt nrows,ncols; 2337 PetscInt *starts,*j_new,*i_new,*aj = a->j,*ai = a->i,ii,*ailen = a->ilen; 2338 MatScalar *a_new,*mat_a; 2339 Mat C; 2340 PetscBool stride; 2341 2342 PetscFunctionBegin; 2343 2344 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 2345 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 2346 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 2347 2348 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&stride);CHKERRQ(ierr); 2349 if (stride) { 2350 ierr = ISStrideGetInfo(iscol,&first,&step);CHKERRQ(ierr); 2351 } else { 2352 first = 0; 2353 step = 0; 2354 } 2355 if (stride && step == 1) { 2356 /* special case of contiguous rows */ 2357 ierr = PetscMalloc2(nrows,&lens,nrows,&starts);CHKERRQ(ierr); 2358 /* loop over new rows determining lens and starting points */ 2359 for (i=0; i<nrows; i++) { 2360 kstart = ai[irow[i]]; 2361 kend = kstart + ailen[irow[i]]; 2362 starts[i] = kstart; 2363 for (k=kstart; k<kend; k++) { 2364 if (aj[k] >= first) { 2365 starts[i] = k; 2366 break; 2367 } 2368 } 2369 sum = 0; 2370 while (k < kend) { 2371 if (aj[k++] >= first+ncols) break; 2372 sum++; 2373 } 2374 lens[i] = sum; 2375 } 2376 /* create submatrix */ 2377 if (scall == MAT_REUSE_MATRIX) { 2378 PetscInt n_cols,n_rows; 2379 ierr = MatGetSize(*B,&n_rows,&n_cols);CHKERRQ(ierr); 2380 if (n_rows != nrows || n_cols != ncols) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); 2381 ierr = MatZeroEntries(*B);CHKERRQ(ierr); 2382 C = *B; 2383 } else { 2384 PetscInt rbs,cbs; 2385 ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr); 2386 ierr = MatSetSizes(C,nrows,ncols,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2387 ierr = ISGetBlockSize(isrow,&rbs);CHKERRQ(ierr); 2388 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 2389 ierr = MatSetBlockSizes(C,rbs,cbs);CHKERRQ(ierr); 2390 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 2391 ierr = MatSeqAIJSetPreallocation_SeqAIJ(C,0,lens);CHKERRQ(ierr); 2392 } 2393 c = (Mat_SeqAIJ*)C->data; 2394 2395 /* loop over rows inserting into submatrix */ 2396 a_new = c->a; 2397 j_new = c->j; 2398 i_new = c->i; 2399 2400 for (i=0; i<nrows; i++) { 2401 ii = starts[i]; 2402 lensi = lens[i]; 2403 for (k=0; k<lensi; k++) { 2404 *j_new++ = aj[ii+k] - first; 2405 } 2406 ierr = PetscMemcpy(a_new,a->a + starts[i],lensi*sizeof(PetscScalar));CHKERRQ(ierr); 2407 a_new += lensi; 2408 i_new[i+1] = i_new[i] + lensi; 2409 c->ilen[i] = lensi; 2410 } 2411 ierr = PetscFree2(lens,starts);CHKERRQ(ierr); 2412 } else { 2413 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 2414 ierr = PetscCalloc1(oldcols,&smap);CHKERRQ(ierr); 2415 ierr = PetscMalloc1(1+nrows,&lens);CHKERRQ(ierr); 2416 for (i=0; i<ncols; i++) { 2417 #if defined(PETSC_USE_DEBUG) 2418 if (icol[i] >= oldcols) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Requesting column beyond largest column icol[%D] %D <= A->cmap->n %D",i,icol[i],oldcols); 2419 #endif 2420 smap[icol[i]] = i+1; 2421 } 2422 2423 /* determine lens of each row */ 2424 for (i=0; i<nrows; i++) { 2425 kstart = ai[irow[i]]; 2426 kend = kstart + a->ilen[irow[i]]; 2427 lens[i] = 0; 2428 for (k=kstart; k<kend; k++) { 2429 if (smap[aj[k]]) { 2430 lens[i]++; 2431 } 2432 } 2433 } 2434 /* Create and fill new matrix */ 2435 if (scall == MAT_REUSE_MATRIX) { 2436 PetscBool equal; 2437 2438 c = (Mat_SeqAIJ*)((*B)->data); 2439 if ((*B)->rmap->n != nrows || (*B)->cmap->n != ncols) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong size"); 2440 ierr = PetscMemcmp(c->ilen,lens,(*B)->rmap->n*sizeof(PetscInt),&equal);CHKERRQ(ierr); 2441 if (!equal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros"); 2442 ierr = PetscMemzero(c->ilen,(*B)->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 2443 C = *B; 2444 } else { 2445 PetscInt rbs,cbs; 2446 ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr); 2447 ierr = MatSetSizes(C,nrows,ncols,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2448 ierr = ISGetBlockSize(isrow,&rbs);CHKERRQ(ierr); 2449 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 2450 ierr = MatSetBlockSizes(C,rbs,cbs);CHKERRQ(ierr); 2451 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 2452 ierr = MatSeqAIJSetPreallocation_SeqAIJ(C,0,lens);CHKERRQ(ierr); 2453 } 2454 c = (Mat_SeqAIJ*)(C->data); 2455 for (i=0; i<nrows; i++) { 2456 row = irow[i]; 2457 kstart = ai[row]; 2458 kend = kstart + a->ilen[row]; 2459 mat_i = c->i[i]; 2460 mat_j = c->j + mat_i; 2461 mat_a = c->a + mat_i; 2462 mat_ilen = c->ilen + i; 2463 for (k=kstart; k<kend; k++) { 2464 if ((tcol=smap[a->j[k]])) { 2465 *mat_j++ = tcol - 1; 2466 *mat_a++ = a->a[k]; 2467 (*mat_ilen)++; 2468 2469 } 2470 } 2471 } 2472 /* Free work space */ 2473 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 2474 ierr = PetscFree(smap);CHKERRQ(ierr); 2475 ierr = PetscFree(lens);CHKERRQ(ierr); 2476 /* sort */ 2477 for (i = 0; i < nrows; i++) { 2478 PetscInt ilen; 2479 2480 mat_i = c->i[i]; 2481 mat_j = c->j + mat_i; 2482 mat_a = c->a + mat_i; 2483 ilen = c->ilen[i]; 2484 ierr = PetscSortIntWithScalarArray(ilen,mat_j,mat_a);CHKERRQ(ierr); 2485 } 2486 } 2487 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2488 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2489 2490 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 2491 *B = C; 2492 PetscFunctionReturn(0); 2493 } 2494 2495 PetscErrorCode MatGetMultiProcBlock_SeqAIJ(Mat mat,MPI_Comm subComm,MatReuse scall,Mat *subMat) 2496 { 2497 PetscErrorCode ierr; 2498 Mat B; 2499 2500 PetscFunctionBegin; 2501 if (scall == MAT_INITIAL_MATRIX) { 2502 ierr = MatCreate(subComm,&B);CHKERRQ(ierr); 2503 ierr = MatSetSizes(B,mat->rmap->n,mat->cmap->n,mat->rmap->n,mat->cmap->n);CHKERRQ(ierr); 2504 ierr = MatSetBlockSizesFromMats(B,mat,mat);CHKERRQ(ierr); 2505 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2506 ierr = MatDuplicateNoCreate_SeqAIJ(B,mat,MAT_COPY_VALUES,PETSC_TRUE);CHKERRQ(ierr); 2507 *subMat = B; 2508 } else { 2509 ierr = MatCopy_SeqAIJ(mat,*subMat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2510 } 2511 PetscFunctionReturn(0); 2512 } 2513 2514 PetscErrorCode MatILUFactor_SeqAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2515 { 2516 Mat_SeqAIJ *a = (Mat_SeqAIJ*)inA->data; 2517 PetscErrorCode ierr; 2518 Mat outA; 2519 PetscBool row_identity,col_identity; 2520 2521 PetscFunctionBegin; 2522 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels=0 supported for in-place ilu"); 2523 2524 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2525 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2526 2527 outA = inA; 2528 outA->factortype = MAT_FACTOR_LU; 2529 ierr = PetscFree(inA->solvertype);CHKERRQ(ierr); 2530 ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr); 2531 2532 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2533 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2534 2535 a->row = row; 2536 2537 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2538 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2539 2540 a->col = col; 2541 2542 /* Create the inverse permutation so that it can be used in MatLUFactorNumeric() */ 2543 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2544 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2545 ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr); 2546 2547 if (!a->solve_work) { /* this matrix may have been factored before */ 2548 ierr = PetscMalloc1(inA->rmap->n+1,&a->solve_work);CHKERRQ(ierr); 2549 ierr = PetscLogObjectMemory((PetscObject)inA, (inA->rmap->n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 2550 } 2551 2552 ierr = MatMarkDiagonal_SeqAIJ(inA);CHKERRQ(ierr); 2553 if (row_identity && col_identity) { 2554 ierr = MatLUFactorNumeric_SeqAIJ_inplace(outA,inA,info);CHKERRQ(ierr); 2555 } else { 2556 ierr = MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(outA,inA,info);CHKERRQ(ierr); 2557 } 2558 PetscFunctionReturn(0); 2559 } 2560 2561 PetscErrorCode MatScale_SeqAIJ(Mat inA,PetscScalar alpha) 2562 { 2563 Mat_SeqAIJ *a = (Mat_SeqAIJ*)inA->data; 2564 PetscScalar oalpha = alpha; 2565 PetscErrorCode ierr; 2566 PetscBLASInt one = 1,bnz; 2567 2568 PetscFunctionBegin; 2569 ierr = PetscBLASIntCast(a->nz,&bnz);CHKERRQ(ierr); 2570 PetscStackCallBLAS("BLASscal",BLASscal_(&bnz,&oalpha,a->a,&one)); 2571 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 2572 ierr = MatSeqAIJInvalidateDiagonal(inA);CHKERRQ(ierr); 2573 PetscFunctionReturn(0); 2574 } 2575 2576 PetscErrorCode MatDestroySubMatrix_Private(Mat_SubSppt *submatj) 2577 { 2578 PetscErrorCode ierr; 2579 PetscInt i; 2580 2581 PetscFunctionBegin; 2582 if (!submatj->id) { /* delete data that are linked only to submats[id=0] */ 2583 ierr = PetscFree4(submatj->sbuf1,submatj->ptr,submatj->tmp,submatj->ctr);CHKERRQ(ierr); 2584 2585 for (i=0; i<submatj->nrqr; ++i) { 2586 ierr = PetscFree(submatj->sbuf2[i]);CHKERRQ(ierr); 2587 } 2588 ierr = PetscFree3(submatj->sbuf2,submatj->req_size,submatj->req_source1);CHKERRQ(ierr); 2589 2590 if (submatj->rbuf1) { 2591 ierr = PetscFree(submatj->rbuf1[0]);CHKERRQ(ierr); 2592 ierr = PetscFree(submatj->rbuf1);CHKERRQ(ierr); 2593 } 2594 2595 for (i=0; i<submatj->nrqs; ++i) { 2596 ierr = PetscFree(submatj->rbuf3[i]);CHKERRQ(ierr); 2597 } 2598 ierr = PetscFree3(submatj->req_source2,submatj->rbuf2,submatj->rbuf3);CHKERRQ(ierr); 2599 ierr = PetscFree(submatj->pa);CHKERRQ(ierr); 2600 } 2601 2602 #if defined(PETSC_USE_CTABLE) 2603 ierr = PetscTableDestroy((PetscTable*)&submatj->rmap);CHKERRQ(ierr); 2604 if (submatj->cmap_loc) {ierr = PetscFree(submatj->cmap_loc);CHKERRQ(ierr);} 2605 ierr = PetscFree(submatj->rmap_loc);CHKERRQ(ierr); 2606 #else 2607 ierr = PetscFree(submatj->rmap);CHKERRQ(ierr); 2608 #endif 2609 2610 if (!submatj->allcolumns) { 2611 #if defined(PETSC_USE_CTABLE) 2612 ierr = PetscTableDestroy((PetscTable*)&submatj->cmap);CHKERRQ(ierr); 2613 #else 2614 ierr = PetscFree(submatj->cmap);CHKERRQ(ierr); 2615 #endif 2616 } 2617 ierr = PetscFree(submatj->row2proc);CHKERRQ(ierr); 2618 2619 ierr = PetscFree(submatj);CHKERRQ(ierr); 2620 PetscFunctionReturn(0); 2621 } 2622 2623 PetscErrorCode MatDestroySubMatrix_SeqAIJ(Mat C) 2624 { 2625 PetscErrorCode ierr; 2626 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2627 Mat_SubSppt *submatj = c->submatis1; 2628 2629 PetscFunctionBegin; 2630 ierr = submatj->destroy(C);CHKERRQ(ierr); 2631 ierr = MatDestroySubMatrix_Private(submatj);CHKERRQ(ierr); 2632 PetscFunctionReturn(0); 2633 } 2634 2635 PetscErrorCode MatDestroySubMatrices_SeqAIJ(PetscInt n,Mat *mat[]) 2636 { 2637 PetscErrorCode ierr; 2638 PetscInt i; 2639 Mat C; 2640 Mat_SeqAIJ *c; 2641 Mat_SubSppt *submatj; 2642 2643 PetscFunctionBegin; 2644 for (i=0; i<n; i++) { 2645 C = (*mat)[i]; 2646 c = (Mat_SeqAIJ*)C->data; 2647 submatj = c->submatis1; 2648 if (submatj) { 2649 if (--((PetscObject)C)->refct <= 0) { 2650 ierr = (submatj->destroy)(C);CHKERRQ(ierr); 2651 ierr = MatDestroySubMatrix_Private(submatj);CHKERRQ(ierr); 2652 ierr = PetscLayoutDestroy(&C->rmap);CHKERRQ(ierr); 2653 ierr = PetscLayoutDestroy(&C->cmap);CHKERRQ(ierr); 2654 ierr = PetscHeaderDestroy(&C);CHKERRQ(ierr); 2655 } 2656 } else { 2657 ierr = MatDestroy(&C);CHKERRQ(ierr); 2658 } 2659 } 2660 2661 ierr = PetscFree(*mat);CHKERRQ(ierr); 2662 PetscFunctionReturn(0); 2663 } 2664 2665 PetscErrorCode MatCreateSubMatrices_SeqAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 2666 { 2667 PetscErrorCode ierr; 2668 PetscInt i; 2669 2670 PetscFunctionBegin; 2671 if (scall == MAT_INITIAL_MATRIX) { 2672 ierr = PetscCalloc1(n+1,B);CHKERRQ(ierr); 2673 } 2674 2675 for (i=0; i<n; i++) { 2676 ierr = MatCreateSubMatrix_SeqAIJ(A,irow[i],icol[i],PETSC_DECIDE,scall,&(*B)[i]);CHKERRQ(ierr); 2677 } 2678 PetscFunctionReturn(0); 2679 } 2680 2681 PetscErrorCode MatIncreaseOverlap_SeqAIJ(Mat A,PetscInt is_max,IS is[],PetscInt ov) 2682 { 2683 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2684 PetscErrorCode ierr; 2685 PetscInt row,i,j,k,l,m,n,*nidx,isz,val; 2686 const PetscInt *idx; 2687 PetscInt start,end,*ai,*aj; 2688 PetscBT table; 2689 2690 PetscFunctionBegin; 2691 m = A->rmap->n; 2692 ai = a->i; 2693 aj = a->j; 2694 2695 if (ov < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"illegal negative overlap value used"); 2696 2697 ierr = PetscMalloc1(m+1,&nidx);CHKERRQ(ierr); 2698 ierr = PetscBTCreate(m,&table);CHKERRQ(ierr); 2699 2700 for (i=0; i<is_max; i++) { 2701 /* Initialize the two local arrays */ 2702 isz = 0; 2703 ierr = PetscBTMemzero(m,table);CHKERRQ(ierr); 2704 2705 /* Extract the indices, assume there can be duplicate entries */ 2706 ierr = ISGetIndices(is[i],&idx);CHKERRQ(ierr); 2707 ierr = ISGetLocalSize(is[i],&n);CHKERRQ(ierr); 2708 2709 /* Enter these into the temp arrays. I.e., mark table[row], enter row into new index */ 2710 for (j=0; j<n; ++j) { 2711 if (!PetscBTLookupSet(table,idx[j])) nidx[isz++] = idx[j]; 2712 } 2713 ierr = ISRestoreIndices(is[i],&idx);CHKERRQ(ierr); 2714 ierr = ISDestroy(&is[i]);CHKERRQ(ierr); 2715 2716 k = 0; 2717 for (j=0; j<ov; j++) { /* for each overlap */ 2718 n = isz; 2719 for (; k<n; k++) { /* do only those rows in nidx[k], which are not done yet */ 2720 row = nidx[k]; 2721 start = ai[row]; 2722 end = ai[row+1]; 2723 for (l = start; l<end; l++) { 2724 val = aj[l]; 2725 if (!PetscBTLookupSet(table,val)) nidx[isz++] = val; 2726 } 2727 } 2728 } 2729 ierr = ISCreateGeneral(PETSC_COMM_SELF,isz,nidx,PETSC_COPY_VALUES,(is+i));CHKERRQ(ierr); 2730 } 2731 ierr = PetscBTDestroy(&table);CHKERRQ(ierr); 2732 ierr = PetscFree(nidx);CHKERRQ(ierr); 2733 PetscFunctionReturn(0); 2734 } 2735 2736 /* -------------------------------------------------------------- */ 2737 PetscErrorCode MatPermute_SeqAIJ(Mat A,IS rowp,IS colp,Mat *B) 2738 { 2739 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2740 PetscErrorCode ierr; 2741 PetscInt i,nz = 0,m = A->rmap->n,n = A->cmap->n; 2742 const PetscInt *row,*col; 2743 PetscInt *cnew,j,*lens; 2744 IS icolp,irowp; 2745 PetscInt *cwork = NULL; 2746 PetscScalar *vwork = NULL; 2747 2748 PetscFunctionBegin; 2749 ierr = ISInvertPermutation(rowp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 2750 ierr = ISGetIndices(irowp,&row);CHKERRQ(ierr); 2751 ierr = ISInvertPermutation(colp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 2752 ierr = ISGetIndices(icolp,&col);CHKERRQ(ierr); 2753 2754 /* determine lengths of permuted rows */ 2755 ierr = PetscMalloc1(m+1,&lens);CHKERRQ(ierr); 2756 for (i=0; i<m; i++) lens[row[i]] = a->i[i+1] - a->i[i]; 2757 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 2758 ierr = MatSetSizes(*B,m,n,m,n);CHKERRQ(ierr); 2759 ierr = MatSetBlockSizesFromMats(*B,A,A);CHKERRQ(ierr); 2760 ierr = MatSetType(*B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2761 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*B,0,lens);CHKERRQ(ierr); 2762 ierr = PetscFree(lens);CHKERRQ(ierr); 2763 2764 ierr = PetscMalloc1(n,&cnew);CHKERRQ(ierr); 2765 for (i=0; i<m; i++) { 2766 ierr = MatGetRow_SeqAIJ(A,i,&nz,&cwork,&vwork);CHKERRQ(ierr); 2767 for (j=0; j<nz; j++) cnew[j] = col[cwork[j]]; 2768 ierr = MatSetValues_SeqAIJ(*B,1,&row[i],nz,cnew,vwork,INSERT_VALUES);CHKERRQ(ierr); 2769 ierr = MatRestoreRow_SeqAIJ(A,i,&nz,&cwork,&vwork);CHKERRQ(ierr); 2770 } 2771 ierr = PetscFree(cnew);CHKERRQ(ierr); 2772 2773 (*B)->assembled = PETSC_FALSE; 2774 2775 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2776 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2777 ierr = ISRestoreIndices(irowp,&row);CHKERRQ(ierr); 2778 ierr = ISRestoreIndices(icolp,&col);CHKERRQ(ierr); 2779 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 2780 ierr = ISDestroy(&icolp);CHKERRQ(ierr); 2781 PetscFunctionReturn(0); 2782 } 2783 2784 PetscErrorCode MatCopy_SeqAIJ(Mat A,Mat B,MatStructure str) 2785 { 2786 PetscErrorCode ierr; 2787 2788 PetscFunctionBegin; 2789 /* If the two matrices have the same copy implementation, use fast copy. */ 2790 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2791 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2792 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2793 2794 if (a->i[A->rmap->n] != b->i[B->rmap->n]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 2795 ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap->n])*sizeof(PetscScalar));CHKERRQ(ierr); 2796 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2797 } else { 2798 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2799 } 2800 PetscFunctionReturn(0); 2801 } 2802 2803 PetscErrorCode MatSetUp_SeqAIJ(Mat A) 2804 { 2805 PetscErrorCode ierr; 2806 2807 PetscFunctionBegin; 2808 ierr = MatSeqAIJSetPreallocation_SeqAIJ(A,PETSC_DEFAULT,0);CHKERRQ(ierr); 2809 PetscFunctionReturn(0); 2810 } 2811 2812 PetscErrorCode MatSeqAIJGetArray_SeqAIJ(Mat A,PetscScalar *array[]) 2813 { 2814 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2815 2816 PetscFunctionBegin; 2817 *array = a->a; 2818 PetscFunctionReturn(0); 2819 } 2820 2821 PetscErrorCode MatSeqAIJRestoreArray_SeqAIJ(Mat A,PetscScalar *array[]) 2822 { 2823 PetscFunctionBegin; 2824 PetscFunctionReturn(0); 2825 } 2826 2827 /* 2828 Computes the number of nonzeros per row needed for preallocation when X and Y 2829 have different nonzero structure. 2830 */ 2831 PetscErrorCode MatAXPYGetPreallocation_SeqX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *yi,const PetscInt *yj,PetscInt *nnz) 2832 { 2833 PetscInt i,j,k,nzx,nzy; 2834 2835 PetscFunctionBegin; 2836 /* Set the number of nonzeros in the new matrix */ 2837 for (i=0; i<m; i++) { 2838 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2839 nzx = xi[i+1] - xi[i]; 2840 nzy = yi[i+1] - yi[i]; 2841 nnz[i] = 0; 2842 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2843 for (; k<nzy && yjj[k]<xjj[j]; k++) nnz[i]++; /* Catch up to X */ 2844 if (k<nzy && yjj[k]==xjj[j]) k++; /* Skip duplicate */ 2845 nnz[i]++; 2846 } 2847 for (; k<nzy; k++) nnz[i]++; 2848 } 2849 PetscFunctionReturn(0); 2850 } 2851 2852 PetscErrorCode MatAXPYGetPreallocation_SeqAIJ(Mat Y,Mat X,PetscInt *nnz) 2853 { 2854 PetscInt m = Y->rmap->N; 2855 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2856 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2857 PetscErrorCode ierr; 2858 2859 PetscFunctionBegin; 2860 /* Set the number of nonzeros in the new matrix */ 2861 ierr = MatAXPYGetPreallocation_SeqX_private(m,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr); 2862 PetscFunctionReturn(0); 2863 } 2864 2865 PetscErrorCode MatAXPY_SeqAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2866 { 2867 PetscErrorCode ierr; 2868 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data,*y = (Mat_SeqAIJ*)Y->data; 2869 PetscBLASInt one=1,bnz; 2870 2871 PetscFunctionBegin; 2872 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2873 if (str == SAME_NONZERO_PATTERN) { 2874 PetscScalar alpha = a; 2875 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2876 ierr = MatSeqAIJInvalidateDiagonal(Y);CHKERRQ(ierr); 2877 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2878 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2879 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2880 } else { 2881 Mat B; 2882 PetscInt *nnz; 2883 ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr); 2884 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2885 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2886 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2887 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2888 ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr); 2889 ierr = MatAXPYGetPreallocation_SeqAIJ(Y,X,nnz);CHKERRQ(ierr); 2890 ierr = MatSeqAIJSetPreallocation(B,0,nnz);CHKERRQ(ierr); 2891 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2892 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2893 ierr = PetscFree(nnz);CHKERRQ(ierr); 2894 } 2895 PetscFunctionReturn(0); 2896 } 2897 2898 PetscErrorCode MatConjugate_SeqAIJ(Mat mat) 2899 { 2900 #if defined(PETSC_USE_COMPLEX) 2901 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)mat->data; 2902 PetscInt i,nz; 2903 PetscScalar *a; 2904 2905 PetscFunctionBegin; 2906 nz = aij->nz; 2907 a = aij->a; 2908 for (i=0; i<nz; i++) a[i] = PetscConj(a[i]); 2909 #else 2910 PetscFunctionBegin; 2911 #endif 2912 PetscFunctionReturn(0); 2913 } 2914 2915 PetscErrorCode MatGetRowMaxAbs_SeqAIJ(Mat A,Vec v,PetscInt idx[]) 2916 { 2917 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2918 PetscErrorCode ierr; 2919 PetscInt i,j,m = A->rmap->n,*ai,*aj,ncols,n; 2920 PetscReal atmp; 2921 PetscScalar *x; 2922 MatScalar *aa; 2923 2924 PetscFunctionBegin; 2925 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2926 aa = a->a; 2927 ai = a->i; 2928 aj = a->j; 2929 2930 ierr = VecSet(v,0.0);CHKERRQ(ierr); 2931 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2932 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2933 if (n != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2934 for (i=0; i<m; i++) { 2935 ncols = ai[1] - ai[0]; ai++; 2936 x[i] = 0.0; 2937 for (j=0; j<ncols; j++) { 2938 atmp = PetscAbsScalar(*aa); 2939 if (PetscAbsScalar(x[i]) < atmp) {x[i] = atmp; if (idx) idx[i] = *aj;} 2940 aa++; aj++; 2941 } 2942 } 2943 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2944 PetscFunctionReturn(0); 2945 } 2946 2947 PetscErrorCode MatGetRowMax_SeqAIJ(Mat A,Vec v,PetscInt idx[]) 2948 { 2949 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2950 PetscErrorCode ierr; 2951 PetscInt i,j,m = A->rmap->n,*ai,*aj,ncols,n; 2952 PetscScalar *x; 2953 MatScalar *aa; 2954 2955 PetscFunctionBegin; 2956 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2957 aa = a->a; 2958 ai = a->i; 2959 aj = a->j; 2960 2961 ierr = VecSet(v,0.0);CHKERRQ(ierr); 2962 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2963 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2964 if (n != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2965 for (i=0; i<m; i++) { 2966 ncols = ai[1] - ai[0]; ai++; 2967 if (ncols == A->cmap->n) { /* row is dense */ 2968 x[i] = *aa; if (idx) idx[i] = 0; 2969 } else { /* row is sparse so already KNOW maximum is 0.0 or higher */ 2970 x[i] = 0.0; 2971 if (idx) { 2972 idx[i] = 0; /* in case ncols is zero */ 2973 for (j=0;j<ncols;j++) { /* find first implicit 0.0 in the row */ 2974 if (aj[j] > j) { 2975 idx[i] = j; 2976 break; 2977 } 2978 } 2979 } 2980 } 2981 for (j=0; j<ncols; j++) { 2982 if (PetscRealPart(x[i]) < PetscRealPart(*aa)) {x[i] = *aa; if (idx) idx[i] = *aj;} 2983 aa++; aj++; 2984 } 2985 } 2986 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2987 PetscFunctionReturn(0); 2988 } 2989 2990 PetscErrorCode MatGetRowMinAbs_SeqAIJ(Mat A,Vec v,PetscInt idx[]) 2991 { 2992 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2993 PetscErrorCode ierr; 2994 PetscInt i,j,m = A->rmap->n,*ai,*aj,ncols,n; 2995 PetscReal atmp; 2996 PetscScalar *x; 2997 MatScalar *aa; 2998 2999 PetscFunctionBegin; 3000 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3001 aa = a->a; 3002 ai = a->i; 3003 aj = a->j; 3004 3005 ierr = VecSet(v,0.0);CHKERRQ(ierr); 3006 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 3007 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 3008 if (n != A->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector, %D vs. %D rows", A->rmap->n, n); 3009 for (i=0; i<m; i++) { 3010 ncols = ai[1] - ai[0]; ai++; 3011 if (ncols) { 3012 /* Get first nonzero */ 3013 for (j = 0; j < ncols; j++) { 3014 atmp = PetscAbsScalar(aa[j]); 3015 if (atmp > 1.0e-12) { 3016 x[i] = atmp; 3017 if (idx) idx[i] = aj[j]; 3018 break; 3019 } 3020 } 3021 if (j == ncols) {x[i] = PetscAbsScalar(*aa); if (idx) idx[i] = *aj;} 3022 } else { 3023 x[i] = 0.0; if (idx) idx[i] = 0; 3024 } 3025 for (j = 0; j < ncols; j++) { 3026 atmp = PetscAbsScalar(*aa); 3027 if (atmp > 1.0e-12 && PetscAbsScalar(x[i]) > atmp) {x[i] = atmp; if (idx) idx[i] = *aj;} 3028 aa++; aj++; 3029 } 3030 } 3031 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 3032 PetscFunctionReturn(0); 3033 } 3034 3035 PetscErrorCode MatGetRowMin_SeqAIJ(Mat A,Vec v,PetscInt idx[]) 3036 { 3037 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3038 PetscErrorCode ierr; 3039 PetscInt i,j,m = A->rmap->n,ncols,n; 3040 const PetscInt *ai,*aj; 3041 PetscScalar *x; 3042 const MatScalar *aa; 3043 3044 PetscFunctionBegin; 3045 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3046 aa = a->a; 3047 ai = a->i; 3048 aj = a->j; 3049 3050 ierr = VecSet(v,0.0);CHKERRQ(ierr); 3051 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 3052 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 3053 if (n != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 3054 for (i=0; i<m; i++) { 3055 ncols = ai[1] - ai[0]; ai++; 3056 if (ncols == A->cmap->n) { /* row is dense */ 3057 x[i] = *aa; if (idx) idx[i] = 0; 3058 } else { /* row is sparse so already KNOW minimum is 0.0 or lower */ 3059 x[i] = 0.0; 3060 if (idx) { /* find first implicit 0.0 in the row */ 3061 idx[i] = 0; /* in case ncols is zero */ 3062 for (j=0; j<ncols; j++) { 3063 if (aj[j] > j) { 3064 idx[i] = j; 3065 break; 3066 } 3067 } 3068 } 3069 } 3070 for (j=0; j<ncols; j++) { 3071 if (PetscRealPart(x[i]) > PetscRealPart(*aa)) {x[i] = *aa; if (idx) idx[i] = *aj;} 3072 aa++; aj++; 3073 } 3074 } 3075 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 3076 PetscFunctionReturn(0); 3077 } 3078 3079 #include <petscblaslapack.h> 3080 #include <petsc/private/kernels/blockinvert.h> 3081 3082 PetscErrorCode MatInvertBlockDiagonal_SeqAIJ(Mat A,const PetscScalar **values) 3083 { 3084 Mat_SeqAIJ *a = (Mat_SeqAIJ*) A->data; 3085 PetscErrorCode ierr; 3086 PetscInt i,bs = PetscAbs(A->rmap->bs),mbs = A->rmap->n/bs,ipvt[5],bs2 = bs*bs,*v_pivots,ij[7],*IJ,j; 3087 MatScalar *diag,work[25],*v_work; 3088 PetscReal shift = 0.0; 3089 PetscBool allowzeropivot,zeropivotdetected=PETSC_FALSE; 3090 3091 PetscFunctionBegin; 3092 allowzeropivot = PetscNot(A->erroriffailure); 3093 if (a->ibdiagvalid) { 3094 if (values) *values = a->ibdiag; 3095 PetscFunctionReturn(0); 3096 } 3097 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 3098 if (!a->ibdiag) { 3099 ierr = PetscMalloc1(bs2*mbs,&a->ibdiag);CHKERRQ(ierr); 3100 ierr = PetscLogObjectMemory((PetscObject)A,bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 3101 } 3102 diag = a->ibdiag; 3103 if (values) *values = a->ibdiag; 3104 /* factor and invert each block */ 3105 switch (bs) { 3106 case 1: 3107 for (i=0; i<mbs; i++) { 3108 ierr = MatGetValues(A,1,&i,1,&i,diag+i);CHKERRQ(ierr); 3109 if (PetscAbsScalar(diag[i] + shift) < PETSC_MACHINE_EPSILON) { 3110 if (allowzeropivot) { 3111 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 3112 A->factorerror_zeropivot_value = PetscAbsScalar(diag[i]); 3113 A->factorerror_zeropivot_row = i; 3114 ierr = PetscInfo3(A,"Zero pivot, row %D pivot %g tolerance %g\n",i,(double)PetscAbsScalar(diag[i]),(double)PETSC_MACHINE_EPSILON);CHKERRQ(ierr); 3115 } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot %g tolerance %g",i,(double)PetscAbsScalar(diag[i]),(double)PETSC_MACHINE_EPSILON); 3116 } 3117 diag[i] = (PetscScalar)1.0 / (diag[i] + shift); 3118 } 3119 break; 3120 case 2: 3121 for (i=0; i<mbs; i++) { 3122 ij[0] = 2*i; ij[1] = 2*i + 1; 3123 ierr = MatGetValues(A,2,ij,2,ij,diag);CHKERRQ(ierr); 3124 ierr = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 3125 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 3126 ierr = PetscKernel_A_gets_transpose_A_2(diag);CHKERRQ(ierr); 3127 diag += 4; 3128 } 3129 break; 3130 case 3: 3131 for (i=0; i<mbs; i++) { 3132 ij[0] = 3*i; ij[1] = 3*i + 1; ij[2] = 3*i + 2; 3133 ierr = MatGetValues(A,3,ij,3,ij,diag);CHKERRQ(ierr); 3134 ierr = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 3135 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 3136 ierr = PetscKernel_A_gets_transpose_A_3(diag);CHKERRQ(ierr); 3137 diag += 9; 3138 } 3139 break; 3140 case 4: 3141 for (i=0; i<mbs; i++) { 3142 ij[0] = 4*i; ij[1] = 4*i + 1; ij[2] = 4*i + 2; ij[3] = 4*i + 3; 3143 ierr = MatGetValues(A,4,ij,4,ij,diag);CHKERRQ(ierr); 3144 ierr = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 3145 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 3146 ierr = PetscKernel_A_gets_transpose_A_4(diag);CHKERRQ(ierr); 3147 diag += 16; 3148 } 3149 break; 3150 case 5: 3151 for (i=0; i<mbs; i++) { 3152 ij[0] = 5*i; ij[1] = 5*i + 1; ij[2] = 5*i + 2; ij[3] = 5*i + 3; ij[4] = 5*i + 4; 3153 ierr = MatGetValues(A,5,ij,5,ij,diag);CHKERRQ(ierr); 3154 ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 3155 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 3156 ierr = PetscKernel_A_gets_transpose_A_5(diag);CHKERRQ(ierr); 3157 diag += 25; 3158 } 3159 break; 3160 case 6: 3161 for (i=0; i<mbs; i++) { 3162 ij[0] = 6*i; ij[1] = 6*i + 1; ij[2] = 6*i + 2; ij[3] = 6*i + 3; ij[4] = 6*i + 4; ij[5] = 6*i + 5; 3163 ierr = MatGetValues(A,6,ij,6,ij,diag);CHKERRQ(ierr); 3164 ierr = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 3165 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 3166 ierr = PetscKernel_A_gets_transpose_A_6(diag);CHKERRQ(ierr); 3167 diag += 36; 3168 } 3169 break; 3170 case 7: 3171 for (i=0; i<mbs; i++) { 3172 ij[0] = 7*i; ij[1] = 7*i + 1; ij[2] = 7*i + 2; ij[3] = 7*i + 3; ij[4] = 7*i + 4; ij[5] = 7*i + 5; ij[5] = 7*i + 6; 3173 ierr = MatGetValues(A,7,ij,7,ij,diag);CHKERRQ(ierr); 3174 ierr = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 3175 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 3176 ierr = PetscKernel_A_gets_transpose_A_7(diag);CHKERRQ(ierr); 3177 diag += 49; 3178 } 3179 break; 3180 default: 3181 ierr = PetscMalloc3(bs,&v_work,bs,&v_pivots,bs,&IJ);CHKERRQ(ierr); 3182 for (i=0; i<mbs; i++) { 3183 for (j=0; j<bs; j++) { 3184 IJ[j] = bs*i + j; 3185 } 3186 ierr = MatGetValues(A,bs,IJ,bs,IJ,diag);CHKERRQ(ierr); 3187 ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 3188 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 3189 ierr = PetscKernel_A_gets_transpose_A_N(diag,bs);CHKERRQ(ierr); 3190 diag += bs2; 3191 } 3192 ierr = PetscFree3(v_work,v_pivots,IJ);CHKERRQ(ierr); 3193 } 3194 a->ibdiagvalid = PETSC_TRUE; 3195 PetscFunctionReturn(0); 3196 } 3197 3198 static PetscErrorCode MatSetRandom_SeqAIJ(Mat x,PetscRandom rctx) 3199 { 3200 PetscErrorCode ierr; 3201 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)x->data; 3202 PetscScalar a; 3203 PetscInt m,n,i,j,col; 3204 3205 PetscFunctionBegin; 3206 if (!x->assembled) { 3207 ierr = MatGetSize(x,&m,&n);CHKERRQ(ierr); 3208 for (i=0; i<m; i++) { 3209 for (j=0; j<aij->imax[i]; j++) { 3210 ierr = PetscRandomGetValue(rctx,&a);CHKERRQ(ierr); 3211 col = (PetscInt)(n*PetscRealPart(a)); 3212 ierr = MatSetValues(x,1,&i,1,&col,&a,ADD_VALUES);CHKERRQ(ierr); 3213 } 3214 } 3215 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet coded"); 3216 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3217 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3218 PetscFunctionReturn(0); 3219 } 3220 3221 PetscErrorCode MatShift_SeqAIJ(Mat Y,PetscScalar a) 3222 { 3223 PetscErrorCode ierr; 3224 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)Y->data; 3225 3226 PetscFunctionBegin; 3227 if (!Y->preallocated || !aij->nz) { 3228 ierr = MatSeqAIJSetPreallocation(Y,1,NULL);CHKERRQ(ierr); 3229 } 3230 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 3231 PetscFunctionReturn(0); 3232 } 3233 3234 /* -------------------------------------------------------------------*/ 3235 static struct _MatOps MatOps_Values = { MatSetValues_SeqAIJ, 3236 MatGetRow_SeqAIJ, 3237 MatRestoreRow_SeqAIJ, 3238 MatMult_SeqAIJ, 3239 /* 4*/ MatMultAdd_SeqAIJ, 3240 MatMultTranspose_SeqAIJ, 3241 MatMultTransposeAdd_SeqAIJ, 3242 0, 3243 0, 3244 0, 3245 /* 10*/ 0, 3246 MatLUFactor_SeqAIJ, 3247 0, 3248 MatSOR_SeqAIJ, 3249 MatTranspose_SeqAIJ, 3250 /*1 5*/ MatGetInfo_SeqAIJ, 3251 MatEqual_SeqAIJ, 3252 MatGetDiagonal_SeqAIJ, 3253 MatDiagonalScale_SeqAIJ, 3254 MatNorm_SeqAIJ, 3255 /* 20*/ 0, 3256 MatAssemblyEnd_SeqAIJ, 3257 MatSetOption_SeqAIJ, 3258 MatZeroEntries_SeqAIJ, 3259 /* 24*/ MatZeroRows_SeqAIJ, 3260 0, 3261 0, 3262 0, 3263 0, 3264 /* 29*/ MatSetUp_SeqAIJ, 3265 0, 3266 0, 3267 0, 3268 0, 3269 /* 34*/ MatDuplicate_SeqAIJ, 3270 0, 3271 0, 3272 MatILUFactor_SeqAIJ, 3273 0, 3274 /* 39*/ MatAXPY_SeqAIJ, 3275 MatCreateSubMatrices_SeqAIJ, 3276 MatIncreaseOverlap_SeqAIJ, 3277 MatGetValues_SeqAIJ, 3278 MatCopy_SeqAIJ, 3279 /* 44*/ MatGetRowMax_SeqAIJ, 3280 MatScale_SeqAIJ, 3281 MatShift_SeqAIJ, 3282 MatDiagonalSet_SeqAIJ, 3283 MatZeroRowsColumns_SeqAIJ, 3284 /* 49*/ MatSetRandom_SeqAIJ, 3285 MatGetRowIJ_SeqAIJ, 3286 MatRestoreRowIJ_SeqAIJ, 3287 MatGetColumnIJ_SeqAIJ, 3288 MatRestoreColumnIJ_SeqAIJ, 3289 /* 54*/ MatFDColoringCreate_SeqXAIJ, 3290 0, 3291 0, 3292 MatPermute_SeqAIJ, 3293 0, 3294 /* 59*/ 0, 3295 MatDestroy_SeqAIJ, 3296 MatView_SeqAIJ, 3297 0, 3298 MatMatMatMult_SeqAIJ_SeqAIJ_SeqAIJ, 3299 /* 64*/ MatMatMatMultSymbolic_SeqAIJ_SeqAIJ_SeqAIJ, 3300 MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqAIJ, 3301 0, 3302 0, 3303 0, 3304 /* 69*/ MatGetRowMaxAbs_SeqAIJ, 3305 MatGetRowMinAbs_SeqAIJ, 3306 0, 3307 0, 3308 0, 3309 /* 74*/ 0, 3310 MatFDColoringApply_AIJ, 3311 0, 3312 0, 3313 0, 3314 /* 79*/ MatFindZeroDiagonals_SeqAIJ, 3315 0, 3316 0, 3317 0, 3318 MatLoad_SeqAIJ, 3319 /* 84*/ MatIsSymmetric_SeqAIJ, 3320 MatIsHermitian_SeqAIJ, 3321 0, 3322 0, 3323 0, 3324 /* 89*/ MatMatMult_SeqAIJ_SeqAIJ, 3325 MatMatMultSymbolic_SeqAIJ_SeqAIJ, 3326 MatMatMultNumeric_SeqAIJ_SeqAIJ, 3327 MatPtAP_SeqAIJ_SeqAIJ, 3328 MatPtAPSymbolic_SeqAIJ_SeqAIJ_SparseAxpy, 3329 /* 94*/ MatPtAPNumeric_SeqAIJ_SeqAIJ_SparseAxpy, 3330 MatMatTransposeMult_SeqAIJ_SeqAIJ, 3331 MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ, 3332 MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ, 3333 0, 3334 /* 99*/ 0, 3335 0, 3336 0, 3337 MatConjugate_SeqAIJ, 3338 0, 3339 /*104*/ MatSetValuesRow_SeqAIJ, 3340 MatRealPart_SeqAIJ, 3341 MatImaginaryPart_SeqAIJ, 3342 0, 3343 0, 3344 /*109*/ MatMatSolve_SeqAIJ, 3345 0, 3346 MatGetRowMin_SeqAIJ, 3347 0, 3348 MatMissingDiagonal_SeqAIJ, 3349 /*114*/ 0, 3350 0, 3351 0, 3352 0, 3353 0, 3354 /*119*/ 0, 3355 0, 3356 0, 3357 0, 3358 MatGetMultiProcBlock_SeqAIJ, 3359 /*124*/ MatFindNonzeroRows_SeqAIJ, 3360 MatGetColumnNorms_SeqAIJ, 3361 MatInvertBlockDiagonal_SeqAIJ, 3362 0, 3363 0, 3364 /*129*/ 0, 3365 MatTransposeMatMult_SeqAIJ_SeqAIJ, 3366 MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ, 3367 MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ, 3368 MatTransposeColoringCreate_SeqAIJ, 3369 /*134*/ MatTransColoringApplySpToDen_SeqAIJ, 3370 MatTransColoringApplyDenToSp_SeqAIJ, 3371 MatRARt_SeqAIJ_SeqAIJ, 3372 MatRARtSymbolic_SeqAIJ_SeqAIJ, 3373 MatRARtNumeric_SeqAIJ_SeqAIJ, 3374 /*139*/0, 3375 0, 3376 0, 3377 MatFDColoringSetUp_SeqXAIJ, 3378 MatFindOffBlockDiagonalEntries_SeqAIJ, 3379 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqAIJ, 3380 MatDestroySubMatrices_SeqAIJ 3381 }; 3382 3383 PetscErrorCode MatSeqAIJSetColumnIndices_SeqAIJ(Mat mat,PetscInt *indices) 3384 { 3385 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)mat->data; 3386 PetscInt i,nz,n; 3387 3388 PetscFunctionBegin; 3389 nz = aij->maxnz; 3390 n = mat->rmap->n; 3391 for (i=0; i<nz; i++) { 3392 aij->j[i] = indices[i]; 3393 } 3394 aij->nz = nz; 3395 for (i=0; i<n; i++) { 3396 aij->ilen[i] = aij->imax[i]; 3397 } 3398 PetscFunctionReturn(0); 3399 } 3400 3401 /*@ 3402 MatSeqAIJSetColumnIndices - Set the column indices for all the rows 3403 in the matrix. 3404 3405 Input Parameters: 3406 + mat - the SeqAIJ matrix 3407 - indices - the column indices 3408 3409 Level: advanced 3410 3411 Notes: 3412 This can be called if you have precomputed the nonzero structure of the 3413 matrix and want to provide it to the matrix object to improve the performance 3414 of the MatSetValues() operation. 3415 3416 You MUST have set the correct numbers of nonzeros per row in the call to 3417 MatCreateSeqAIJ(), and the columns indices MUST be sorted. 3418 3419 MUST be called before any calls to MatSetValues(); 3420 3421 The indices should start with zero, not one. 3422 3423 @*/ 3424 PetscErrorCode MatSeqAIJSetColumnIndices(Mat mat,PetscInt *indices) 3425 { 3426 PetscErrorCode ierr; 3427 3428 PetscFunctionBegin; 3429 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 3430 PetscValidPointer(indices,2); 3431 ierr = PetscUseMethod(mat,"MatSeqAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr); 3432 PetscFunctionReturn(0); 3433 } 3434 3435 /* ----------------------------------------------------------------------------------------*/ 3436 3437 PetscErrorCode MatStoreValues_SeqAIJ(Mat mat) 3438 { 3439 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)mat->data; 3440 PetscErrorCode ierr; 3441 size_t nz = aij->i[mat->rmap->n]; 3442 3443 PetscFunctionBegin; 3444 if (!aij->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3445 3446 /* allocate space for values if not already there */ 3447 if (!aij->saved_values) { 3448 ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr); 3449 ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 3450 } 3451 3452 /* copy values over */ 3453 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 3454 PetscFunctionReturn(0); 3455 } 3456 3457 /*@ 3458 MatStoreValues - Stashes a copy of the matrix values; this allows, for 3459 example, reuse of the linear part of a Jacobian, while recomputing the 3460 nonlinear portion. 3461 3462 Collect on Mat 3463 3464 Input Parameters: 3465 . mat - the matrix (currently only AIJ matrices support this option) 3466 3467 Level: advanced 3468 3469 Common Usage, with SNESSolve(): 3470 $ Create Jacobian matrix 3471 $ Set linear terms into matrix 3472 $ Apply boundary conditions to matrix, at this time matrix must have 3473 $ final nonzero structure (i.e. setting the nonlinear terms and applying 3474 $ boundary conditions again will not change the nonzero structure 3475 $ ierr = MatSetOption(mat,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE); 3476 $ ierr = MatStoreValues(mat); 3477 $ Call SNESSetJacobian() with matrix 3478 $ In your Jacobian routine 3479 $ ierr = MatRetrieveValues(mat); 3480 $ Set nonlinear terms in matrix 3481 3482 Common Usage without SNESSolve(), i.e. when you handle nonlinear solve yourself: 3483 $ // build linear portion of Jacobian 3484 $ ierr = MatSetOption(mat,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE); 3485 $ ierr = MatStoreValues(mat); 3486 $ loop over nonlinear iterations 3487 $ ierr = MatRetrieveValues(mat); 3488 $ // call MatSetValues(mat,...) to set nonliner portion of Jacobian 3489 $ // call MatAssemblyBegin/End() on matrix 3490 $ Solve linear system with Jacobian 3491 $ endloop 3492 3493 Notes: 3494 Matrix must already be assemblied before calling this routine 3495 Must set the matrix option MatSetOption(mat,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE); before 3496 calling this routine. 3497 3498 When this is called multiple times it overwrites the previous set of stored values 3499 and does not allocated additional space. 3500 3501 .seealso: MatRetrieveValues() 3502 3503 @*/ 3504 PetscErrorCode MatStoreValues(Mat mat) 3505 { 3506 PetscErrorCode ierr; 3507 3508 PetscFunctionBegin; 3509 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 3510 if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 3511 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3512 ierr = PetscUseMethod(mat,"MatStoreValues_C",(Mat),(mat));CHKERRQ(ierr); 3513 PetscFunctionReturn(0); 3514 } 3515 3516 PetscErrorCode MatRetrieveValues_SeqAIJ(Mat mat) 3517 { 3518 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)mat->data; 3519 PetscErrorCode ierr; 3520 PetscInt nz = aij->i[mat->rmap->n]; 3521 3522 PetscFunctionBegin; 3523 if (!aij->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3524 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 3525 /* copy values over */ 3526 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 3527 PetscFunctionReturn(0); 3528 } 3529 3530 /*@ 3531 MatRetrieveValues - Retrieves the copy of the matrix values; this allows, for 3532 example, reuse of the linear part of a Jacobian, while recomputing the 3533 nonlinear portion. 3534 3535 Collect on Mat 3536 3537 Input Parameters: 3538 . mat - the matrix (currently only AIJ matrices support this option) 3539 3540 Level: advanced 3541 3542 .seealso: MatStoreValues() 3543 3544 @*/ 3545 PetscErrorCode MatRetrieveValues(Mat mat) 3546 { 3547 PetscErrorCode ierr; 3548 3549 PetscFunctionBegin; 3550 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 3551 if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 3552 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3553 ierr = PetscUseMethod(mat,"MatRetrieveValues_C",(Mat),(mat));CHKERRQ(ierr); 3554 PetscFunctionReturn(0); 3555 } 3556 3557 3558 /* --------------------------------------------------------------------------------*/ 3559 /*@C 3560 MatCreateSeqAIJ - Creates a sparse matrix in AIJ (compressed row) format 3561 (the default parallel PETSc format). For good matrix assembly performance 3562 the user should preallocate the matrix storage by setting the parameter nz 3563 (or the array nnz). By setting these parameters accurately, performance 3564 during matrix assembly can be increased by more than a factor of 50. 3565 3566 Collective on MPI_Comm 3567 3568 Input Parameters: 3569 + comm - MPI communicator, set to PETSC_COMM_SELF 3570 . m - number of rows 3571 . n - number of columns 3572 . nz - number of nonzeros per row (same for all rows) 3573 - nnz - array containing the number of nonzeros in the various rows 3574 (possibly different for each row) or NULL 3575 3576 Output Parameter: 3577 . A - the matrix 3578 3579 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3580 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3581 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3582 3583 Notes: 3584 If nnz is given then nz is ignored 3585 3586 The AIJ format (also called the Yale sparse matrix format or 3587 compressed row storage), is fully compatible with standard Fortran 77 3588 storage. That is, the stored row and column indices can begin at 3589 either one (as in Fortran) or zero. See the users' manual for details. 3590 3591 Specify the preallocated storage with either nz or nnz (not both). 3592 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3593 allocation. For large problems you MUST preallocate memory or you 3594 will get TERRIBLE performance, see the users' manual chapter on matrices. 3595 3596 By default, this format uses inodes (identical nodes) when possible, to 3597 improve numerical efficiency of matrix-vector products and solves. We 3598 search for consecutive rows with the same nonzero structure, thereby 3599 reusing matrix information to achieve increased efficiency. 3600 3601 Options Database Keys: 3602 + -mat_no_inode - Do not use inodes 3603 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3604 3605 Level: intermediate 3606 3607 .seealso: MatCreate(), MatCreateAIJ(), MatSetValues(), MatSeqAIJSetColumnIndices(), MatCreateSeqAIJWithArrays() 3608 3609 @*/ 3610 PetscErrorCode MatCreateSeqAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3611 { 3612 PetscErrorCode ierr; 3613 3614 PetscFunctionBegin; 3615 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3616 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3617 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3618 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr); 3619 PetscFunctionReturn(0); 3620 } 3621 3622 /*@C 3623 MatSeqAIJSetPreallocation - For good matrix assembly performance 3624 the user should preallocate the matrix storage by setting the parameter nz 3625 (or the array nnz). By setting these parameters accurately, performance 3626 during matrix assembly can be increased by more than a factor of 50. 3627 3628 Collective on MPI_Comm 3629 3630 Input Parameters: 3631 + B - The matrix 3632 . nz - number of nonzeros per row (same for all rows) 3633 - nnz - array containing the number of nonzeros in the various rows 3634 (possibly different for each row) or NULL 3635 3636 Notes: 3637 If nnz is given then nz is ignored 3638 3639 The AIJ format (also called the Yale sparse matrix format or 3640 compressed row storage), is fully compatible with standard Fortran 77 3641 storage. That is, the stored row and column indices can begin at 3642 either one (as in Fortran) or zero. See the users' manual for details. 3643 3644 Specify the preallocated storage with either nz or nnz (not both). 3645 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3646 allocation. For large problems you MUST preallocate memory or you 3647 will get TERRIBLE performance, see the users' manual chapter on matrices. 3648 3649 You can call MatGetInfo() to get information on how effective the preallocation was; 3650 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3651 You can also run with the option -info and look for messages with the string 3652 malloc in them to see if additional memory allocation was needed. 3653 3654 Developers: Use nz of MAT_SKIP_ALLOCATION to not allocate any space for the matrix 3655 entries or columns indices 3656 3657 By default, this format uses inodes (identical nodes) when possible, to 3658 improve numerical efficiency of matrix-vector products and solves. We 3659 search for consecutive rows with the same nonzero structure, thereby 3660 reusing matrix information to achieve increased efficiency. 3661 3662 Options Database Keys: 3663 + -mat_no_inode - Do not use inodes 3664 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3665 3666 Level: intermediate 3667 3668 .seealso: MatCreate(), MatCreateAIJ(), MatSetValues(), MatSeqAIJSetColumnIndices(), MatCreateSeqAIJWithArrays(), MatGetInfo() 3669 3670 @*/ 3671 PetscErrorCode MatSeqAIJSetPreallocation(Mat B,PetscInt nz,const PetscInt nnz[]) 3672 { 3673 PetscErrorCode ierr; 3674 3675 PetscFunctionBegin; 3676 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3677 PetscValidType(B,1); 3678 ierr = PetscTryMethod(B,"MatSeqAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[]),(B,nz,nnz));CHKERRQ(ierr); 3679 PetscFunctionReturn(0); 3680 } 3681 3682 PetscErrorCode MatSeqAIJSetPreallocation_SeqAIJ(Mat B,PetscInt nz,const PetscInt *nnz) 3683 { 3684 Mat_SeqAIJ *b; 3685 PetscBool skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 3686 PetscErrorCode ierr; 3687 PetscInt i; 3688 3689 PetscFunctionBegin; 3690 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 3691 if (nz == MAT_SKIP_ALLOCATION) { 3692 skipallocation = PETSC_TRUE; 3693 nz = 0; 3694 } 3695 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3696 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3697 3698 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3699 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 3700 if (nnz) { 3701 for (i=0; i<B->rmap->n; i++) { 3702 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 3703 if (nnz[i] > B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than row length: local row %D value %d rowlength %D",i,nnz[i],B->cmap->n); 3704 } 3705 } 3706 3707 B->preallocated = PETSC_TRUE; 3708 3709 b = (Mat_SeqAIJ*)B->data; 3710 3711 if (!skipallocation) { 3712 if (!b->imax) { 3713 ierr = PetscMalloc2(B->rmap->n,&b->imax,B->rmap->n,&b->ilen);CHKERRQ(ierr); 3714 ierr = PetscLogObjectMemory((PetscObject)B,2*B->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 3715 } 3716 if (!b->ipre) { 3717 ierr = PetscMalloc1(B->rmap->n,&b->ipre);CHKERRQ(ierr); 3718 ierr = PetscLogObjectMemory((PetscObject)B,B->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 3719 } 3720 if (!nnz) { 3721 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 10; 3722 else if (nz < 0) nz = 1; 3723 for (i=0; i<B->rmap->n; i++) b->imax[i] = nz; 3724 nz = nz*B->rmap->n; 3725 } else { 3726 nz = 0; 3727 for (i=0; i<B->rmap->n; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 3728 } 3729 /* b->ilen will count nonzeros in each row so far. */ 3730 for (i=0; i<B->rmap->n; i++) b->ilen[i] = 0; 3731 3732 /* allocate the matrix space */ 3733 /* FIXME: should B's old memory be unlogged? */ 3734 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 3735 if (B->structure_only) { 3736 ierr = PetscMalloc1(nz,&b->j);CHKERRQ(ierr); 3737 ierr = PetscMalloc1(B->rmap->n+1,&b->i);CHKERRQ(ierr); 3738 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->n+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));CHKERRQ(ierr); 3739 } else { 3740 ierr = PetscMalloc3(nz,&b->a,nz,&b->j,B->rmap->n+1,&b->i);CHKERRQ(ierr); 3741 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->n+1)*sizeof(PetscInt)+nz*(sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 3742 } 3743 b->i[0] = 0; 3744 for (i=1; i<B->rmap->n+1; i++) { 3745 b->i[i] = b->i[i-1] + b->imax[i-1]; 3746 } 3747 if (B->structure_only) { 3748 b->singlemalloc = PETSC_FALSE; 3749 b->free_a = PETSC_FALSE; 3750 } else { 3751 b->singlemalloc = PETSC_TRUE; 3752 b->free_a = PETSC_TRUE; 3753 } 3754 b->free_ij = PETSC_TRUE; 3755 } else { 3756 b->free_a = PETSC_FALSE; 3757 b->free_ij = PETSC_FALSE; 3758 } 3759 3760 if (b->ipre && nnz != b->ipre && b->imax) { 3761 /* reserve user-requested sparsity */ 3762 ierr = PetscMemcpy(b->ipre,b->imax,B->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 3763 } 3764 3765 3766 b->nz = 0; 3767 b->maxnz = nz; 3768 B->info.nz_unneeded = (double)b->maxnz; 3769 if (realalloc) { 3770 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3771 } 3772 B->was_assembled = PETSC_FALSE; 3773 B->assembled = PETSC_FALSE; 3774 PetscFunctionReturn(0); 3775 } 3776 3777 3778 PetscErrorCode MatResetPreallocation_SeqAIJ(Mat A) 3779 { 3780 Mat_SeqAIJ *a; 3781 PetscInt i; 3782 PetscErrorCode ierr; 3783 3784 PetscFunctionBegin; 3785 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 3786 a = (Mat_SeqAIJ*)A->data; 3787 /* if no saved info, we error out */ 3788 if (!a->ipre) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_ARG_NULL,"No saved preallocation info \n"); 3789 3790 if (!a->i || !a->j || !a->a || !a->imax || !a->ilen) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_ARG_NULL,"Memory info is incomplete, and can not reset preallocation \n"); 3791 3792 ierr = PetscMemcpy(a->imax,a->ipre,A->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 3793 ierr = PetscMemzero(a->ilen,A->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 3794 a->i[0] = 0; 3795 for (i=1; i<A->rmap->n+1; i++) { 3796 a->i[i] = a->i[i-1] + a->imax[i-1]; 3797 } 3798 A->preallocated = PETSC_TRUE; 3799 a->nz = 0; 3800 a->maxnz = a->i[A->rmap->n]; 3801 A->info.nz_unneeded = (double)a->maxnz; 3802 A->was_assembled = PETSC_FALSE; 3803 A->assembled = PETSC_FALSE; 3804 PetscFunctionReturn(0); 3805 } 3806 3807 /*@ 3808 MatSeqAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format. 3809 3810 Input Parameters: 3811 + B - the matrix 3812 . i - the indices into j for the start of each row (starts with zero) 3813 . j - the column indices for each row (starts with zero) these must be sorted for each row 3814 - v - optional values in the matrix 3815 3816 Level: developer 3817 3818 The i,j,v values are COPIED with this routine; to avoid the copy use MatCreateSeqAIJWithArrays() 3819 3820 .keywords: matrix, aij, compressed row, sparse, sequential 3821 3822 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatSeqAIJSetPreallocation(), MatCreateSeqAIJ(), SeqAIJ 3823 @*/ 3824 PetscErrorCode MatSeqAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[],const PetscScalar v[]) 3825 { 3826 PetscErrorCode ierr; 3827 3828 PetscFunctionBegin; 3829 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3830 PetscValidType(B,1); 3831 ierr = PetscTryMethod(B,"MatSeqAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3832 PetscFunctionReturn(0); 3833 } 3834 3835 PetscErrorCode MatSeqAIJSetPreallocationCSR_SeqAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3836 { 3837 PetscInt i; 3838 PetscInt m,n; 3839 PetscInt nz; 3840 PetscInt *nnz, nz_max = 0; 3841 PetscScalar *values; 3842 PetscErrorCode ierr; 3843 3844 PetscFunctionBegin; 3845 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Ii[0] must be 0 it is %D", Ii[0]); 3846 3847 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3848 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3849 3850 ierr = MatGetSize(B, &m, &n);CHKERRQ(ierr); 3851 ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr); 3852 for (i = 0; i < m; i++) { 3853 nz = Ii[i+1]- Ii[i]; 3854 nz_max = PetscMax(nz_max, nz); 3855 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D", i, nnz); 3856 nnz[i] = nz; 3857 } 3858 ierr = MatSeqAIJSetPreallocation(B, 0, nnz);CHKERRQ(ierr); 3859 ierr = PetscFree(nnz);CHKERRQ(ierr); 3860 3861 if (v) { 3862 values = (PetscScalar*) v; 3863 } else { 3864 ierr = PetscCalloc1(nz_max, &values);CHKERRQ(ierr); 3865 } 3866 3867 for (i = 0; i < m; i++) { 3868 nz = Ii[i+1] - Ii[i]; 3869 ierr = MatSetValues_SeqAIJ(B, 1, &i, nz, J+Ii[i], values + (v ? Ii[i] : 0), INSERT_VALUES);CHKERRQ(ierr); 3870 } 3871 3872 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3873 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3874 3875 if (!v) { 3876 ierr = PetscFree(values);CHKERRQ(ierr); 3877 } 3878 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3879 PetscFunctionReturn(0); 3880 } 3881 3882 #include <../src/mat/impls/dense/seq/dense.h> 3883 #include <petsc/private/kernels/petscaxpy.h> 3884 3885 /* 3886 Computes (B'*A')' since computing B*A directly is untenable 3887 3888 n p p 3889 ( ) ( ) ( ) 3890 m ( A ) * n ( B ) = m ( C ) 3891 ( ) ( ) ( ) 3892 3893 */ 3894 PetscErrorCode MatMatMultNumeric_SeqDense_SeqAIJ(Mat A,Mat B,Mat C) 3895 { 3896 PetscErrorCode ierr; 3897 Mat_SeqDense *sub_a = (Mat_SeqDense*)A->data; 3898 Mat_SeqAIJ *sub_b = (Mat_SeqAIJ*)B->data; 3899 Mat_SeqDense *sub_c = (Mat_SeqDense*)C->data; 3900 PetscInt i,n,m,q,p; 3901 const PetscInt *ii,*idx; 3902 const PetscScalar *b,*a,*a_q; 3903 PetscScalar *c,*c_q; 3904 3905 PetscFunctionBegin; 3906 m = A->rmap->n; 3907 n = A->cmap->n; 3908 p = B->cmap->n; 3909 a = sub_a->v; 3910 b = sub_b->a; 3911 c = sub_c->v; 3912 ierr = PetscMemzero(c,m*p*sizeof(PetscScalar));CHKERRQ(ierr); 3913 3914 ii = sub_b->i; 3915 idx = sub_b->j; 3916 for (i=0; i<n; i++) { 3917 q = ii[i+1] - ii[i]; 3918 while (q-->0) { 3919 c_q = c + m*(*idx); 3920 a_q = a + m*i; 3921 PetscKernelAXPY(c_q,*b,a_q,m); 3922 idx++; 3923 b++; 3924 } 3925 } 3926 PetscFunctionReturn(0); 3927 } 3928 3929 PetscErrorCode MatMatMultSymbolic_SeqDense_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 3930 { 3931 PetscErrorCode ierr; 3932 PetscInt m=A->rmap->n,n=B->cmap->n; 3933 Mat Cmat; 3934 3935 PetscFunctionBegin; 3936 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %D != B->rmap->n %D\n",A->cmap->n,B->rmap->n); 3937 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 3938 ierr = MatSetSizes(Cmat,m,n,m,n);CHKERRQ(ierr); 3939 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 3940 ierr = MatSetType(Cmat,MATSEQDENSE);CHKERRQ(ierr); 3941 ierr = MatSeqDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 3942 3943 Cmat->ops->matmultnumeric = MatMatMultNumeric_SeqDense_SeqAIJ; 3944 3945 *C = Cmat; 3946 PetscFunctionReturn(0); 3947 } 3948 3949 /* ----------------------------------------------------------------*/ 3950 PETSC_INTERN PetscErrorCode MatMatMult_SeqDense_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 3951 { 3952 PetscErrorCode ierr; 3953 3954 PetscFunctionBegin; 3955 if (scall == MAT_INITIAL_MATRIX) { 3956 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 3957 ierr = MatMatMultSymbolic_SeqDense_SeqAIJ(A,B,fill,C);CHKERRQ(ierr); 3958 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 3959 } 3960 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 3961 ierr = MatMatMultNumeric_SeqDense_SeqAIJ(A,B,*C);CHKERRQ(ierr); 3962 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 3963 PetscFunctionReturn(0); 3964 } 3965 3966 3967 /*MC 3968 MATSEQAIJ - MATSEQAIJ = "seqaij" - A matrix type to be used for sequential sparse matrices, 3969 based on compressed sparse row format. 3970 3971 Options Database Keys: 3972 . -mat_type seqaij - sets the matrix type to "seqaij" during a call to MatSetFromOptions() 3973 3974 Level: beginner 3975 3976 .seealso: MatCreateSeqAIJ(), MatSetFromOptions(), MatSetType(), MatCreate(), MatType 3977 M*/ 3978 3979 /*MC 3980 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 3981 3982 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 3983 and MATMPIAIJ otherwise. As a result, for single process communicators, 3984 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 3985 for communicators controlling multiple processes. It is recommended that you call both of 3986 the above preallocation routines for simplicity. 3987 3988 Options Database Keys: 3989 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 3990 3991 Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 3992 enough exist. 3993 3994 Level: beginner 3995 3996 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 3997 M*/ 3998 3999 /*MC 4000 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 4001 4002 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 4003 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 4004 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 4005 for communicators controlling multiple processes. It is recommended that you call both of 4006 the above preallocation routines for simplicity. 4007 4008 Options Database Keys: 4009 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 4010 4011 Level: beginner 4012 4013 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 4014 M*/ 4015 4016 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJCRL(Mat,MatType,MatReuse,Mat*); 4017 #if defined(PETSC_HAVE_ELEMENTAL) 4018 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 4019 #endif 4020 #if defined(PETSC_HAVE_HYPRE) 4021 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat A,MatType,MatReuse,Mat*); 4022 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*); 4023 #endif 4024 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqDense(Mat,MatType,MatReuse,Mat*); 4025 4026 #if defined(PETSC_HAVE_MATLAB_ENGINE) 4027 PETSC_EXTERN PetscErrorCode MatlabEnginePut_SeqAIJ(PetscObject,void*); 4028 PETSC_EXTERN PetscErrorCode MatlabEngineGet_SeqAIJ(PetscObject,void*); 4029 #endif 4030 4031 PETSC_EXTERN PetscErrorCode MatConvert_SeqAIJ_SeqSELL(Mat,MatType,MatReuse,Mat*); 4032 4033 /*@C 4034 MatSeqAIJGetArray - gives access to the array where the data for a MATSEQAIJ matrix is stored 4035 4036 Not Collective 4037 4038 Input Parameter: 4039 . mat - a MATSEQAIJ matrix 4040 4041 Output Parameter: 4042 . array - pointer to the data 4043 4044 Level: intermediate 4045 4046 .seealso: MatSeqAIJRestoreArray(), MatSeqAIJGetArrayF90() 4047 @*/ 4048 PetscErrorCode MatSeqAIJGetArray(Mat A,PetscScalar **array) 4049 { 4050 PetscErrorCode ierr; 4051 4052 PetscFunctionBegin; 4053 ierr = PetscUseMethod(A,"MatSeqAIJGetArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr); 4054 PetscFunctionReturn(0); 4055 } 4056 4057 /*@C 4058 MatSeqAIJGetMaxRowNonzeros - returns the maximum number of nonzeros in any row 4059 4060 Not Collective 4061 4062 Input Parameter: 4063 . mat - a MATSEQAIJ matrix 4064 4065 Output Parameter: 4066 . nz - the maximum number of nonzeros in any row 4067 4068 Level: intermediate 4069 4070 .seealso: MatSeqAIJRestoreArray(), MatSeqAIJGetArrayF90() 4071 @*/ 4072 PetscErrorCode MatSeqAIJGetMaxRowNonzeros(Mat A,PetscInt *nz) 4073 { 4074 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)A->data; 4075 4076 PetscFunctionBegin; 4077 *nz = aij->rmax; 4078 PetscFunctionReturn(0); 4079 } 4080 4081 /*@C 4082 MatSeqAIJRestoreArray - returns access to the array where the data for a MATSEQAIJ matrix is stored obtained by MatSeqAIJGetArray() 4083 4084 Not Collective 4085 4086 Input Parameters: 4087 . mat - a MATSEQAIJ matrix 4088 . array - pointer to the data 4089 4090 Level: intermediate 4091 4092 .seealso: MatSeqAIJGetArray(), MatSeqAIJRestoreArrayF90() 4093 @*/ 4094 PetscErrorCode MatSeqAIJRestoreArray(Mat A,PetscScalar **array) 4095 { 4096 PetscErrorCode ierr; 4097 4098 PetscFunctionBegin; 4099 ierr = PetscUseMethod(A,"MatSeqAIJRestoreArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr); 4100 PetscFunctionReturn(0); 4101 } 4102 4103 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJ(Mat B) 4104 { 4105 Mat_SeqAIJ *b; 4106 PetscErrorCode ierr; 4107 PetscMPIInt size; 4108 4109 PetscFunctionBegin; 4110 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 4111 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Comm must be of size 1"); 4112 4113 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 4114 4115 B->data = (void*)b; 4116 4117 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 4118 4119 b->row = 0; 4120 b->col = 0; 4121 b->icol = 0; 4122 b->reallocs = 0; 4123 b->ignorezeroentries = PETSC_FALSE; 4124 b->roworiented = PETSC_TRUE; 4125 b->nonew = 0; 4126 b->diag = 0; 4127 b->solve_work = 0; 4128 B->spptr = 0; 4129 b->saved_values = 0; 4130 b->idiag = 0; 4131 b->mdiag = 0; 4132 b->ssor_work = 0; 4133 b->omega = 1.0; 4134 b->fshift = 0.0; 4135 b->idiagvalid = PETSC_FALSE; 4136 b->ibdiagvalid = PETSC_FALSE; 4137 b->keepnonzeropattern = PETSC_FALSE; 4138 4139 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJ);CHKERRQ(ierr); 4140 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqAIJGetArray_C",MatSeqAIJGetArray_SeqAIJ);CHKERRQ(ierr); 4141 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqAIJRestoreArray_C",MatSeqAIJRestoreArray_SeqAIJ);CHKERRQ(ierr); 4142 4143 #if defined(PETSC_HAVE_MATLAB_ENGINE) 4144 ierr = PetscObjectComposeFunction((PetscObject)B,"PetscMatlabEnginePut_C",MatlabEnginePut_SeqAIJ);CHKERRQ(ierr); 4145 ierr = PetscObjectComposeFunction((PetscObject)B,"PetscMatlabEngineGet_C",MatlabEngineGet_SeqAIJ);CHKERRQ(ierr); 4146 #endif 4147 4148 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqAIJSetColumnIndices_C",MatSeqAIJSetColumnIndices_SeqAIJ);CHKERRQ(ierr); 4149 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqAIJ);CHKERRQ(ierr); 4150 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqAIJ);CHKERRQ(ierr); 4151 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_seqsbaij_C",MatConvert_SeqAIJ_SeqSBAIJ);CHKERRQ(ierr); 4152 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_seqbaij_C",MatConvert_SeqAIJ_SeqBAIJ);CHKERRQ(ierr); 4153 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_seqaijperm_C",MatConvert_SeqAIJ_SeqAIJPERM);CHKERRQ(ierr); 4154 #if defined(PETSC_HAVE_MKL_SPARSE) 4155 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_seqaijmkl_C",MatConvert_SeqAIJ_SeqAIJMKL);CHKERRQ(ierr); 4156 #endif 4157 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_seqaijcrl_C",MatConvert_SeqAIJ_SeqAIJCRL);CHKERRQ(ierr); 4158 #if defined(PETSC_HAVE_ELEMENTAL) 4159 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_elemental_C",MatConvert_SeqAIJ_Elemental);CHKERRQ(ierr); 4160 #endif 4161 #if defined(PETSC_HAVE_HYPRE) 4162 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 4163 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_seqaij_seqaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr); 4164 #endif 4165 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_seqdense_C",MatConvert_SeqAIJ_SeqDense);CHKERRQ(ierr); 4166 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_seqsell_C",MatConvert_SeqAIJ_SeqSELL);CHKERRQ(ierr); 4167 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqAIJ);CHKERRQ(ierr); 4168 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsHermitianTranspose_C",MatIsTranspose_SeqAIJ);CHKERRQ(ierr); 4169 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqAIJSetPreallocation_C",MatSeqAIJSetPreallocation_SeqAIJ);CHKERRQ(ierr); 4170 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_SeqAIJ);CHKERRQ(ierr); 4171 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqAIJSetPreallocationCSR_C",MatSeqAIJSetPreallocationCSR_SeqAIJ);CHKERRQ(ierr); 4172 ierr = PetscObjectComposeFunction((PetscObject)B,"MatReorderForNonzeroDiagonal_C",MatReorderForNonzeroDiagonal_SeqAIJ);CHKERRQ(ierr); 4173 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaij_C",MatMatMult_SeqDense_SeqAIJ);CHKERRQ(ierr); 4174 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaij_C",MatMatMultSymbolic_SeqDense_SeqAIJ);CHKERRQ(ierr); 4175 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaij_C",MatMatMultNumeric_SeqDense_SeqAIJ);CHKERRQ(ierr); 4176 ierr = MatCreate_SeqAIJ_Inode(B);CHKERRQ(ierr); 4177 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJ);CHKERRQ(ierr); 4178 ierr = MatSeqAIJSetTypeFromOptions(B);CHKERRQ(ierr); /* this allows changing the matrix subtype to say MATSEQAIJPERM */ 4179 PetscFunctionReturn(0); 4180 } 4181 4182 /* 4183 Given a matrix generated with MatGetFactor() duplicates all the information in A into B 4184 */ 4185 PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 4186 { 4187 Mat_SeqAIJ *c,*a = (Mat_SeqAIJ*)A->data; 4188 PetscErrorCode ierr; 4189 PetscInt i,m = A->rmap->n; 4190 4191 PetscFunctionBegin; 4192 c = (Mat_SeqAIJ*)C->data; 4193 4194 C->factortype = A->factortype; 4195 c->row = 0; 4196 c->col = 0; 4197 c->icol = 0; 4198 c->reallocs = 0; 4199 4200 C->assembled = PETSC_TRUE; 4201 4202 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 4203 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 4204 4205 ierr = PetscMalloc2(m,&c->imax,m,&c->ilen);CHKERRQ(ierr); 4206 ierr = PetscLogObjectMemory((PetscObject)C, 2*m*sizeof(PetscInt));CHKERRQ(ierr); 4207 for (i=0; i<m; i++) { 4208 c->imax[i] = a->imax[i]; 4209 c->ilen[i] = a->ilen[i]; 4210 } 4211 4212 /* allocate the matrix space */ 4213 if (mallocmatspace) { 4214 ierr = PetscMalloc3(a->i[m],&c->a,a->i[m],&c->j,m+1,&c->i);CHKERRQ(ierr); 4215 ierr = PetscLogObjectMemory((PetscObject)C, a->i[m]*(sizeof(PetscScalar)+sizeof(PetscInt))+(m+1)*sizeof(PetscInt));CHKERRQ(ierr); 4216 4217 c->singlemalloc = PETSC_TRUE; 4218 4219 ierr = PetscMemcpy(c->i,a->i,(m+1)*sizeof(PetscInt));CHKERRQ(ierr); 4220 if (m > 0) { 4221 ierr = PetscMemcpy(c->j,a->j,(a->i[m])*sizeof(PetscInt));CHKERRQ(ierr); 4222 if (cpvalues == MAT_COPY_VALUES) { 4223 ierr = PetscMemcpy(c->a,a->a,(a->i[m])*sizeof(PetscScalar));CHKERRQ(ierr); 4224 } else { 4225 ierr = PetscMemzero(c->a,(a->i[m])*sizeof(PetscScalar));CHKERRQ(ierr); 4226 } 4227 } 4228 } 4229 4230 c->ignorezeroentries = a->ignorezeroentries; 4231 c->roworiented = a->roworiented; 4232 c->nonew = a->nonew; 4233 if (a->diag) { 4234 ierr = PetscMalloc1(m+1,&c->diag);CHKERRQ(ierr); 4235 ierr = PetscLogObjectMemory((PetscObject)C,(m+1)*sizeof(PetscInt));CHKERRQ(ierr); 4236 for (i=0; i<m; i++) { 4237 c->diag[i] = a->diag[i]; 4238 } 4239 } else c->diag = 0; 4240 4241 c->solve_work = 0; 4242 c->saved_values = 0; 4243 c->idiag = 0; 4244 c->ssor_work = 0; 4245 c->keepnonzeropattern = a->keepnonzeropattern; 4246 c->free_a = PETSC_TRUE; 4247 c->free_ij = PETSC_TRUE; 4248 4249 c->rmax = a->rmax; 4250 c->nz = a->nz; 4251 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 4252 C->preallocated = PETSC_TRUE; 4253 4254 c->compressedrow.use = a->compressedrow.use; 4255 c->compressedrow.nrows = a->compressedrow.nrows; 4256 if (a->compressedrow.use) { 4257 i = a->compressedrow.nrows; 4258 ierr = PetscMalloc2(i+1,&c->compressedrow.i,i,&c->compressedrow.rindex);CHKERRQ(ierr); 4259 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 4260 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 4261 } else { 4262 c->compressedrow.use = PETSC_FALSE; 4263 c->compressedrow.i = NULL; 4264 c->compressedrow.rindex = NULL; 4265 } 4266 c->nonzerorowcnt = a->nonzerorowcnt; 4267 C->nonzerostate = A->nonzerostate; 4268 4269 ierr = MatDuplicate_SeqAIJ_Inode(A,cpvalues,&C);CHKERRQ(ierr); 4270 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 4271 PetscFunctionReturn(0); 4272 } 4273 4274 PetscErrorCode MatDuplicate_SeqAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 4275 { 4276 PetscErrorCode ierr; 4277 4278 PetscFunctionBegin; 4279 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 4280 ierr = MatSetSizes(*B,A->rmap->n,A->cmap->n,A->rmap->n,A->cmap->n);CHKERRQ(ierr); 4281 if (!(A->rmap->n % A->rmap->bs) && !(A->cmap->n % A->cmap->bs)) { 4282 ierr = MatSetBlockSizesFromMats(*B,A,A);CHKERRQ(ierr); 4283 } 4284 ierr = MatSetType(*B,((PetscObject)A)->type_name);CHKERRQ(ierr); 4285 ierr = MatDuplicateNoCreate_SeqAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 4286 PetscFunctionReturn(0); 4287 } 4288 4289 PetscErrorCode MatLoad_SeqAIJ(Mat newMat, PetscViewer viewer) 4290 { 4291 Mat_SeqAIJ *a; 4292 PetscErrorCode ierr; 4293 PetscInt i,sum,nz,header[4],*rowlengths = 0,M,N,rows,cols; 4294 int fd; 4295 PetscMPIInt size; 4296 MPI_Comm comm; 4297 PetscInt bs = newMat->rmap->bs; 4298 4299 PetscFunctionBegin; 4300 /* force binary viewer to load .info file if it has not yet done so */ 4301 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 4302 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 4303 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4304 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"view must have one processor"); 4305 4306 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 4307 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 4308 ierr = PetscOptionsEnd();CHKERRQ(ierr); 4309 if (bs < 0) bs = 1; 4310 ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr); 4311 4312 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 4313 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 4314 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object in file"); 4315 M = header[1]; N = header[2]; nz = header[3]; 4316 4317 if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as SeqAIJ"); 4318 4319 /* read in row lengths */ 4320 ierr = PetscMalloc1(M,&rowlengths);CHKERRQ(ierr); 4321 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 4322 4323 /* check if sum of rowlengths is same as nz */ 4324 for (i=0,sum=0; i< M; i++) sum +=rowlengths[i]; 4325 if (sum != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_READ,"Inconsistant matrix data in file. no-nonzeros = %dD, sum-row-lengths = %D\n",nz,sum); 4326 4327 /* set global size if not set already*/ 4328 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) { 4329 ierr = MatSetSizes(newMat,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 4330 } else { 4331 /* if sizes and type are already set, check if the matrix global sizes are correct */ 4332 ierr = MatGetSize(newMat,&rows,&cols);CHKERRQ(ierr); 4333 if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */ 4334 ierr = MatGetLocalSize(newMat,&rows,&cols);CHKERRQ(ierr); 4335 } 4336 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different length (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols); 4337 } 4338 ierr = MatSeqAIJSetPreallocation_SeqAIJ(newMat,0,rowlengths);CHKERRQ(ierr); 4339 a = (Mat_SeqAIJ*)newMat->data; 4340 4341 ierr = PetscBinaryRead(fd,a->j,nz,PETSC_INT);CHKERRQ(ierr); 4342 4343 /* read in nonzero values */ 4344 ierr = PetscBinaryRead(fd,a->a,nz,PETSC_SCALAR);CHKERRQ(ierr); 4345 4346 /* set matrix "i" values */ 4347 a->i[0] = 0; 4348 for (i=1; i<= M; i++) { 4349 a->i[i] = a->i[i-1] + rowlengths[i-1]; 4350 a->ilen[i-1] = rowlengths[i-1]; 4351 } 4352 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 4353 4354 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4355 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4356 PetscFunctionReturn(0); 4357 } 4358 4359 PetscErrorCode MatEqual_SeqAIJ(Mat A,Mat B,PetscBool * flg) 4360 { 4361 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ*)B->data; 4362 PetscErrorCode ierr; 4363 #if defined(PETSC_USE_COMPLEX) 4364 PetscInt k; 4365 #endif 4366 4367 PetscFunctionBegin; 4368 /* If the matrix dimensions are not equal,or no of nonzeros */ 4369 if ((A->rmap->n != B->rmap->n) || (A->cmap->n != B->cmap->n) ||(a->nz != b->nz)) { 4370 *flg = PETSC_FALSE; 4371 PetscFunctionReturn(0); 4372 } 4373 4374 /* if the a->i are the same */ 4375 ierr = PetscMemcmp(a->i,b->i,(A->rmap->n+1)*sizeof(PetscInt),flg);CHKERRQ(ierr); 4376 if (!*flg) PetscFunctionReturn(0); 4377 4378 /* if a->j are the same */ 4379 ierr = PetscMemcmp(a->j,b->j,(a->nz)*sizeof(PetscInt),flg);CHKERRQ(ierr); 4380 if (!*flg) PetscFunctionReturn(0); 4381 4382 /* if a->a are the same */ 4383 #if defined(PETSC_USE_COMPLEX) 4384 for (k=0; k<a->nz; k++) { 4385 if (PetscRealPart(a->a[k]) != PetscRealPart(b->a[k]) || PetscImaginaryPart(a->a[k]) != PetscImaginaryPart(b->a[k])) { 4386 *flg = PETSC_FALSE; 4387 PetscFunctionReturn(0); 4388 } 4389 } 4390 #else 4391 ierr = PetscMemcmp(a->a,b->a,(a->nz)*sizeof(PetscScalar),flg);CHKERRQ(ierr); 4392 #endif 4393 PetscFunctionReturn(0); 4394 } 4395 4396 /*@ 4397 MatCreateSeqAIJWithArrays - Creates an sequential AIJ matrix using matrix elements (in CSR format) 4398 provided by the user. 4399 4400 Collective on MPI_Comm 4401 4402 Input Parameters: 4403 + comm - must be an MPI communicator of size 1 4404 . m - number of rows 4405 . n - number of columns 4406 . i - row indices 4407 . j - column indices 4408 - a - matrix values 4409 4410 Output Parameter: 4411 . mat - the matrix 4412 4413 Level: intermediate 4414 4415 Notes: 4416 The i, j, and a arrays are not copied by this routine, the user must free these arrays 4417 once the matrix is destroyed and not before 4418 4419 You cannot set new nonzero locations into this matrix, that will generate an error. 4420 4421 The i and j indices are 0 based 4422 4423 The format which is used for the sparse matrix input, is equivalent to a 4424 row-major ordering.. i.e for the following matrix, the input data expected is 4425 as shown 4426 4427 $ 1 0 0 4428 $ 2 0 3 4429 $ 4 5 6 4430 $ 4431 $ i = {0,1,3,6} [size = nrow+1 = 3+1] 4432 $ j = {0,0,2,0,1,2} [size = 6]; values must be sorted for each row 4433 $ v = {1,2,3,4,5,6} [size = 6] 4434 4435 4436 .seealso: MatCreate(), MatCreateAIJ(), MatCreateSeqAIJ(), MatCreateMPIAIJWithArrays(), MatMPIAIJSetPreallocationCSR() 4437 4438 @*/ 4439 PetscErrorCode MatCreateSeqAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat) 4440 { 4441 PetscErrorCode ierr; 4442 PetscInt ii; 4443 Mat_SeqAIJ *aij; 4444 #if defined(PETSC_USE_DEBUG) 4445 PetscInt jj; 4446 #endif 4447 4448 PetscFunctionBegin; 4449 if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4450 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4451 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 4452 /* ierr = MatSetBlockSizes(*mat,,);CHKERRQ(ierr); */ 4453 ierr = MatSetType(*mat,MATSEQAIJ);CHKERRQ(ierr); 4454 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*mat,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 4455 aij = (Mat_SeqAIJ*)(*mat)->data; 4456 ierr = PetscMalloc2(m,&aij->imax,m,&aij->ilen);CHKERRQ(ierr); 4457 4458 aij->i = i; 4459 aij->j = j; 4460 aij->a = a; 4461 aij->singlemalloc = PETSC_FALSE; 4462 aij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 4463 aij->free_a = PETSC_FALSE; 4464 aij->free_ij = PETSC_FALSE; 4465 4466 for (ii=0; ii<m; ii++) { 4467 aij->ilen[ii] = aij->imax[ii] = i[ii+1] - i[ii]; 4468 #if defined(PETSC_USE_DEBUG) 4469 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %D length = %D",ii,i[ii+1] - i[ii]); 4470 for (jj=i[ii]+1; jj<i[ii+1]; jj++) { 4471 if (j[jj] < j[jj-1]) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column entry number %D (actual colum %D) in row %D is not sorted",jj-i[ii],j[jj],ii); 4472 if (j[jj] == j[jj]-1) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column entry number %D (actual colum %D) in row %D is identical to previous entry",jj-i[ii],j[jj],ii); 4473 } 4474 #endif 4475 } 4476 #if defined(PETSC_USE_DEBUG) 4477 for (ii=0; ii<aij->i[m]; ii++) { 4478 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %D index = %D",ii,j[ii]); 4479 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %D index = %D",ii,j[ii]); 4480 } 4481 #endif 4482 4483 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4484 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4485 PetscFunctionReturn(0); 4486 } 4487 /*@C 4488 MatCreateSeqAIJFromTriple - Creates an sequential AIJ matrix using matrix elements (in COO format) 4489 provided by the user. 4490 4491 Collective on MPI_Comm 4492 4493 Input Parameters: 4494 + comm - must be an MPI communicator of size 1 4495 . m - number of rows 4496 . n - number of columns 4497 . i - row indices 4498 . j - column indices 4499 . a - matrix values 4500 . nz - number of nonzeros 4501 - idx - 0 or 1 based 4502 4503 Output Parameter: 4504 . mat - the matrix 4505 4506 Level: intermediate 4507 4508 Notes: 4509 The i and j indices are 0 based 4510 4511 The format which is used for the sparse matrix input, is equivalent to a 4512 row-major ordering.. i.e for the following matrix, the input data expected is 4513 as shown: 4514 4515 1 0 0 4516 2 0 3 4517 4 5 6 4518 4519 i = {0,1,1,2,2,2} 4520 j = {0,0,2,0,1,2} 4521 v = {1,2,3,4,5,6} 4522 4523 4524 .seealso: MatCreate(), MatCreateAIJ(), MatCreateSeqAIJ(), MatCreateSeqAIJWithArrays(), MatMPIAIJSetPreallocationCSR() 4525 4526 @*/ 4527 PetscErrorCode MatCreateSeqAIJFromTriple(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat,PetscInt nz,PetscBool idx) 4528 { 4529 PetscErrorCode ierr; 4530 PetscInt ii, *nnz, one = 1,row,col; 4531 4532 4533 PetscFunctionBegin; 4534 ierr = PetscCalloc1(m,&nnz);CHKERRQ(ierr); 4535 for (ii = 0; ii < nz; ii++) { 4536 nnz[i[ii] - !!idx] += 1; 4537 } 4538 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4539 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 4540 ierr = MatSetType(*mat,MATSEQAIJ);CHKERRQ(ierr); 4541 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*mat,0,nnz);CHKERRQ(ierr); 4542 for (ii = 0; ii < nz; ii++) { 4543 if (idx) { 4544 row = i[ii] - 1; 4545 col = j[ii] - 1; 4546 } else { 4547 row = i[ii]; 4548 col = j[ii]; 4549 } 4550 ierr = MatSetValues(*mat,one,&row,one,&col,&a[ii],ADD_VALUES);CHKERRQ(ierr); 4551 } 4552 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4553 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4554 ierr = PetscFree(nnz);CHKERRQ(ierr); 4555 PetscFunctionReturn(0); 4556 } 4557 4558 PetscErrorCode MatSeqAIJInvalidateDiagonal(Mat A) 4559 { 4560 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 4561 PetscErrorCode ierr; 4562 4563 PetscFunctionBegin; 4564 a->idiagvalid = PETSC_FALSE; 4565 a->ibdiagvalid = PETSC_FALSE; 4566 4567 ierr = MatSeqAIJInvalidateDiagonal_Inode(A);CHKERRQ(ierr); 4568 PetscFunctionReturn(0); 4569 } 4570 4571 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4572 { 4573 PetscErrorCode ierr; 4574 PetscMPIInt size; 4575 4576 PetscFunctionBegin; 4577 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4578 if (size == 1) { 4579 if (scall == MAT_INITIAL_MATRIX) { 4580 ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr); 4581 } else { 4582 ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4583 } 4584 } else { 4585 ierr = MatCreateMPIMatConcatenateSeqMat_MPIAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr); 4586 } 4587 PetscFunctionReturn(0); 4588 } 4589 4590 /* 4591 Permute A into C's *local* index space using rowemb,colemb. 4592 The embedding are supposed to be injections and the above implies that the range of rowemb is a subset 4593 of [0,m), colemb is in [0,n). 4594 If pattern == DIFFERENT_NONZERO_PATTERN, C is preallocated according to A. 4595 */ 4596 PetscErrorCode MatSetSeqMat_SeqAIJ(Mat C,IS rowemb,IS colemb,MatStructure pattern,Mat B) 4597 { 4598 /* If making this function public, change the error returned in this function away from _PLIB. */ 4599 PetscErrorCode ierr; 4600 Mat_SeqAIJ *Baij; 4601 PetscBool seqaij; 4602 PetscInt m,n,*nz,i,j,count; 4603 PetscScalar v; 4604 const PetscInt *rowindices,*colindices; 4605 4606 PetscFunctionBegin; 4607 if (!B) PetscFunctionReturn(0); 4608 /* Check to make sure the target matrix (and embeddings) are compatible with C and each other. */ 4609 ierr = PetscObjectBaseTypeCompare((PetscObject)B,MATSEQAIJ,&seqaij);CHKERRQ(ierr); 4610 if (!seqaij) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Input matrix is of wrong type"); 4611 if (rowemb) { 4612 ierr = ISGetLocalSize(rowemb,&m);CHKERRQ(ierr); 4613 if (m != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Row IS of size %D is incompatible with matrix row size %D",m,B->rmap->n); 4614 } else { 4615 if (C->rmap->n != B->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Input matrix is row-incompatible with the target matrix"); 4616 } 4617 if (colemb) { 4618 ierr = ISGetLocalSize(colemb,&n);CHKERRQ(ierr); 4619 if (n != B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Diag col IS of size %D is incompatible with input matrix col size %D",n,B->cmap->n); 4620 } else { 4621 if (C->cmap->n != B->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Input matrix is col-incompatible with the target matrix"); 4622 } 4623 4624 Baij = (Mat_SeqAIJ*)(B->data); 4625 if (pattern == DIFFERENT_NONZERO_PATTERN) { 4626 ierr = PetscMalloc1(B->rmap->n,&nz);CHKERRQ(ierr); 4627 for (i=0; i<B->rmap->n; i++) { 4628 nz[i] = Baij->i[i+1] - Baij->i[i]; 4629 } 4630 ierr = MatSeqAIJSetPreallocation(C,0,nz);CHKERRQ(ierr); 4631 ierr = PetscFree(nz);CHKERRQ(ierr); 4632 } 4633 if (pattern == SUBSET_NONZERO_PATTERN) { 4634 ierr = MatZeroEntries(C);CHKERRQ(ierr); 4635 } 4636 count = 0; 4637 rowindices = NULL; 4638 colindices = NULL; 4639 if (rowemb) { 4640 ierr = ISGetIndices(rowemb,&rowindices);CHKERRQ(ierr); 4641 } 4642 if (colemb) { 4643 ierr = ISGetIndices(colemb,&colindices);CHKERRQ(ierr); 4644 } 4645 for (i=0; i<B->rmap->n; i++) { 4646 PetscInt row; 4647 row = i; 4648 if (rowindices) row = rowindices[i]; 4649 for (j=Baij->i[i]; j<Baij->i[i+1]; j++) { 4650 PetscInt col; 4651 col = Baij->j[count]; 4652 if (colindices) col = colindices[col]; 4653 v = Baij->a[count]; 4654 ierr = MatSetValues(C,1,&row,1,&col,&v,INSERT_VALUES);CHKERRQ(ierr); 4655 ++count; 4656 } 4657 } 4658 /* FIXME: set C's nonzerostate correctly. */ 4659 /* Assembly for C is necessary. */ 4660 C->preallocated = PETSC_TRUE; 4661 C->assembled = PETSC_TRUE; 4662 C->was_assembled = PETSC_FALSE; 4663 PetscFunctionReturn(0); 4664 } 4665 4666 PetscFunctionList MatSeqAIJList = NULL; 4667 4668 /*@C 4669 MatSeqAIJSetType - Converts a MATSEQAIJ matrix to a subtype 4670 4671 Collective on Mat 4672 4673 Input Parameters: 4674 + mat - the matrix object 4675 - matype - matrix type 4676 4677 Options Database Key: 4678 . -mat_seqai_type <method> - for example seqaijcrl 4679 4680 4681 Level: intermediate 4682 4683 .keywords: Mat, MatType, set, method 4684 4685 .seealso: PCSetType(), VecSetType(), MatCreate(), MatType, Mat 4686 @*/ 4687 PetscErrorCode MatSeqAIJSetType(Mat mat, MatType matype) 4688 { 4689 PetscErrorCode ierr,(*r)(Mat,const MatType,MatReuse,Mat*); 4690 PetscBool sametype; 4691 4692 PetscFunctionBegin; 4693 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 4694 ierr = PetscObjectTypeCompare((PetscObject)mat,matype,&sametype);CHKERRQ(ierr); 4695 if (sametype) PetscFunctionReturn(0); 4696 4697 ierr = PetscFunctionListFind(MatSeqAIJList,matype,&r);CHKERRQ(ierr); 4698 if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown Mat type given: %s",matype); 4699 ierr = (*r)(mat,matype,MAT_INPLACE_MATRIX,&mat);CHKERRQ(ierr); 4700 PetscFunctionReturn(0); 4701 } 4702 4703 4704 /*@C 4705 MatSeqAIJRegister - - Adds a new sub-matrix type for sequential AIJ matrices 4706 4707 Not Collective 4708 4709 Input Parameters: 4710 + name - name of a new user-defined matrix type, for example MATSEQAIJCRL 4711 - function - routine to convert to subtype 4712 4713 Notes: 4714 MatSeqAIJRegister() may be called multiple times to add several user-defined solvers. 4715 4716 4717 Then, your matrix can be chosen with the procedural interface at runtime via the option 4718 $ -mat_seqaij_type my_mat 4719 4720 Level: advanced 4721 4722 .keywords: Mat, register 4723 4724 .seealso: MatSeqAIJRegisterAll() 4725 4726 4727 Level: advanced 4728 @*/ 4729 PetscErrorCode MatSeqAIJRegister(const char sname[],PetscErrorCode (*function)(Mat,MatType,MatReuse,Mat *)) 4730 { 4731 PetscErrorCode ierr; 4732 4733 PetscFunctionBegin; 4734 ierr = PetscFunctionListAdd(&MatSeqAIJList,sname,function);CHKERRQ(ierr); 4735 PetscFunctionReturn(0); 4736 } 4737 4738 PetscBool MatSeqAIJRegisterAllCalled = PETSC_FALSE; 4739 4740 /*@C 4741 MatSeqAIJRegisterAll - Registers all of the matrix subtypes of SeqAIJ 4742 4743 Not Collective 4744 4745 Level: advanced 4746 4747 Developers Note: CUSP and CUSPARSE do not yet support the MatConvert_SeqAIJ..() paradigm and thus cannot be registered here 4748 4749 .keywords: KSP, register, all 4750 4751 .seealso: MatRegisterAll(), MatSeqAIJRegister() 4752 @*/ 4753 PetscErrorCode MatSeqAIJRegisterAll(void) 4754 { 4755 PetscErrorCode ierr; 4756 4757 PetscFunctionBegin; 4758 if (MatSeqAIJRegisterAllCalled) PetscFunctionReturn(0); 4759 MatSeqAIJRegisterAllCalled = PETSC_TRUE; 4760 4761 ierr = MatSeqAIJRegister(MATSEQAIJCRL, MatConvert_SeqAIJ_SeqAIJCRL);CHKERRQ(ierr); 4762 ierr = MatSeqAIJRegister(MATSEQAIJPERM, MatConvert_SeqAIJ_SeqAIJPERM);CHKERRQ(ierr); 4763 #if defined(PETSC_HAVE_MKL_SPARSE) 4764 ierr = MatSeqAIJRegister(MATSEQAIJMKL, MatConvert_SeqAIJ_SeqAIJMKL);CHKERRQ(ierr); 4765 #endif 4766 #if defined(PETSC_HAVE_VIENNACL) && defined(PETSC_HAVE_VIENNACL_NO_CUDA) 4767 ierr = MatSeqAIJRegister(MATMPIAIJVIENNACL, MatConvert_SeqAIJ_SeqAIJViennaCL);CHKERRQ(ierr); 4768 #endif 4769 PetscFunctionReturn(0); 4770 } 4771 4772 /* 4773 Special version for direct calls from Fortran 4774 */ 4775 #include <petsc/private/fortranimpl.h> 4776 #if defined(PETSC_HAVE_FORTRAN_CAPS) 4777 #define matsetvaluesseqaij_ MATSETVALUESSEQAIJ 4778 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 4779 #define matsetvaluesseqaij_ matsetvaluesseqaij 4780 #endif 4781 4782 /* Change these macros so can be used in void function */ 4783 #undef CHKERRQ 4784 #define CHKERRQ(ierr) CHKERRABORT(PetscObjectComm((PetscObject)A),ierr) 4785 #undef SETERRQ2 4786 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 4787 #undef SETERRQ3 4788 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 4789 4790 PETSC_EXTERN void PETSC_STDCALL matsetvaluesseqaij_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[],InsertMode *isis, PetscErrorCode *_ierr) 4791 { 4792 Mat A = *AA; 4793 PetscInt m = *mm, n = *nn; 4794 InsertMode is = *isis; 4795 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 4796 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N; 4797 PetscInt *imax,*ai,*ailen; 4798 PetscErrorCode ierr; 4799 PetscInt *aj,nonew = a->nonew,lastcol = -1; 4800 MatScalar *ap,value,*aa; 4801 PetscBool ignorezeroentries = a->ignorezeroentries; 4802 PetscBool roworiented = a->roworiented; 4803 4804 PetscFunctionBegin; 4805 MatCheckPreallocated(A,1); 4806 imax = a->imax; 4807 ai = a->i; 4808 ailen = a->ilen; 4809 aj = a->j; 4810 aa = a->a; 4811 4812 for (k=0; k<m; k++) { /* loop over added rows */ 4813 row = im[k]; 4814 if (row < 0) continue; 4815 #if defined(PETSC_USE_DEBUG) 4816 if (row >= A->rmap->n) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 4817 #endif 4818 rp = aj + ai[row]; ap = aa + ai[row]; 4819 rmax = imax[row]; nrow = ailen[row]; 4820 low = 0; 4821 high = nrow; 4822 for (l=0; l<n; l++) { /* loop over added columns */ 4823 if (in[l] < 0) continue; 4824 #if defined(PETSC_USE_DEBUG) 4825 if (in[l] >= A->cmap->n) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 4826 #endif 4827 col = in[l]; 4828 if (roworiented) value = v[l + k*n]; 4829 else value = v[k + l*m]; 4830 4831 if (value == 0.0 && ignorezeroentries && (is == ADD_VALUES)) continue; 4832 4833 if (col <= lastcol) low = 0; 4834 else high = nrow; 4835 lastcol = col; 4836 while (high-low > 5) { 4837 t = (low+high)/2; 4838 if (rp[t] > col) high = t; 4839 else low = t; 4840 } 4841 for (i=low; i<high; i++) { 4842 if (rp[i] > col) break; 4843 if (rp[i] == col) { 4844 if (is == ADD_VALUES) ap[i] += value; 4845 else ap[i] = value; 4846 goto noinsert; 4847 } 4848 } 4849 if (value == 0.0 && ignorezeroentries) goto noinsert; 4850 if (nonew == 1) goto noinsert; 4851 if (nonew == -1) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 4852 MatSeqXAIJReallocateAIJ(A,A->rmap->n,1,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 4853 N = nrow++ - 1; a->nz++; high++; 4854 /* shift up all the later entries in this row */ 4855 for (ii=N; ii>=i; ii--) { 4856 rp[ii+1] = rp[ii]; 4857 ap[ii+1] = ap[ii]; 4858 } 4859 rp[i] = col; 4860 ap[i] = value; 4861 A->nonzerostate++; 4862 noinsert:; 4863 low = i + 1; 4864 } 4865 ailen[row] = nrow; 4866 } 4867 PetscFunctionReturnVoid(); 4868 } 4869 4870