1 /*$Id: baij.c,v 1.245 2001/08/07 03:02:55 balay Exp bsmith $*/ 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" 8 #include "src/vec/vecimpl.h" 9 #include "src/inline/spops.h" 10 #include "petscsys.h" /*I "petscmat.h" I*/ 11 12 /* 13 Special version for Fun3d sequential benchmark 14 */ 15 #if defined(PETSC_HAVE_FORTRAN_CAPS) 16 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 17 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 18 #define matsetvaluesblocked4_ matsetvaluesblocked4 19 #endif 20 21 #undef __FUNCT__ 22 #define __FUNCT__ "matsetvaluesblocked4_" 23 void matsetvaluesblocked4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v) 24 { 25 Mat A = *AA; 26 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 27 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 28 int *ai=a->i,*ailen=a->ilen; 29 int *aj=a->j,stepval; 30 PetscScalar *value = v; 31 MatScalar *ap,*aa = a->a,*bap; 32 33 PetscFunctionBegin; 34 stepval = (n-1)*4; 35 for (k=0; k<m; k++) { /* loop over added rows */ 36 row = im[k]; 37 rp = aj + ai[row]; 38 ap = aa + 16*ai[row]; 39 nrow = ailen[row]; 40 low = 0; 41 for (l=0; l<n; l++) { /* loop over added columns */ 42 col = in[l]; 43 value = v + k*(stepval+4)*4 + l*4; 44 low = 0; high = nrow; 45 while (high-low > 7) { 46 t = (low+high)/2; 47 if (rp[t] > col) high = t; 48 else low = t; 49 } 50 for (i=low; i<high; i++) { 51 if (rp[i] > col) break; 52 if (rp[i] == col) { 53 bap = ap + 16*i; 54 for (ii=0; ii<4; ii++,value+=stepval) { 55 for (jj=ii; jj<16; jj+=4) { 56 bap[jj] += *value++; 57 } 58 } 59 goto noinsert2; 60 } 61 } 62 N = nrow++ - 1; 63 /* shift up all the later entries in this row */ 64 for (ii=N; ii>=i; ii--) { 65 rp[ii+1] = rp[ii]; 66 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 67 } 68 if (N >= i) { 69 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 70 } 71 rp[i] = col; 72 bap = ap + 16*i; 73 for (ii=0; ii<4; ii++,value+=stepval) { 74 for (jj=ii; jj<16; jj+=4) { 75 bap[jj] = *value++; 76 } 77 } 78 noinsert2:; 79 low = i; 80 } 81 ailen[row] = nrow; 82 } 83 } 84 85 #if defined(PETSC_HAVE_FORTRAN_CAPS) 86 #define matsetvalues4_ MATSETVALUES4 87 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 88 #define matsetvalues4_ matsetvalues4 89 #endif 90 91 #undef __FUNCT__ 92 #define __FUNCT__ "MatSetValues4_" 93 void matsetvalues4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v) 94 { 95 Mat A = *AA; 96 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 97 int *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 98 int *ai=a->i,*ailen=a->ilen; 99 int *aj=a->j,brow,bcol; 100 int ridx,cidx; 101 MatScalar *ap,value,*aa=a->a,*bap; 102 103 PetscFunctionBegin; 104 for (k=0; k<m; k++) { /* loop over added rows */ 105 row = im[k]; brow = row/4; 106 rp = aj + ai[brow]; 107 ap = aa + 16*ai[brow]; 108 nrow = ailen[brow]; 109 low = 0; 110 for (l=0; l<n; l++) { /* loop over added columns */ 111 col = in[l]; bcol = col/4; 112 ridx = row % 4; cidx = col % 4; 113 value = v[l + k*n]; 114 low = 0; high = nrow; 115 while (high-low > 7) { 116 t = (low+high)/2; 117 if (rp[t] > bcol) high = t; 118 else low = t; 119 } 120 for (i=low; i<high; i++) { 121 if (rp[i] > bcol) break; 122 if (rp[i] == bcol) { 123 bap = ap + 16*i + 4*cidx + ridx; 124 *bap += value; 125 goto noinsert1; 126 } 127 } 128 N = nrow++ - 1; 129 /* shift up all the later entries in this row */ 130 for (ii=N; ii>=i; ii--) { 131 rp[ii+1] = rp[ii]; 132 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 133 } 134 if (N>=i) { 135 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 136 } 137 rp[i] = bcol; 138 ap[16*i + 4*cidx + ridx] = value; 139 noinsert1:; 140 low = i; 141 } 142 ailen[brow] = nrow; 143 } 144 } 145 146 /* UGLY, ugly, ugly 147 When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 148 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 149 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 150 converts the entries into single precision and then calls ..._MatScalar() to put them 151 into the single precision data structures. 152 */ 153 #if defined(PETSC_USE_MAT_SINGLE) 154 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 155 #else 156 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 157 #endif 158 #if defined(PETSC_HAVE_DSCPACK) 159 EXTERN int MatUseDSCPACK_MPIBAIJ(Mat); 160 #endif 161 162 #define CHUNKSIZE 10 163 164 /* 165 Checks for missing diagonals 166 */ 167 #undef __FUNCT__ 168 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 169 int MatMissingDiagonal_SeqBAIJ(Mat A) 170 { 171 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 172 int *diag,*jj = a->j,i,ierr; 173 174 PetscFunctionBegin; 175 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 176 diag = a->diag; 177 for (i=0; i<a->mbs; i++) { 178 if (jj[diag[i]] != i) { 179 SETERRQ1(1,"Matrix is missing diagonal number %d",i); 180 } 181 } 182 PetscFunctionReturn(0); 183 } 184 185 #undef __FUNCT__ 186 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 187 int MatMarkDiagonal_SeqBAIJ(Mat A) 188 { 189 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 190 int i,j,*diag,m = a->mbs,ierr; 191 192 PetscFunctionBegin; 193 if (a->diag) PetscFunctionReturn(0); 194 195 ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr); 196 PetscLogObjectMemory(A,(m+1)*sizeof(int)); 197 for (i=0; i<m; i++) { 198 diag[i] = a->i[i+1]; 199 for (j=a->i[i]; j<a->i[i+1]; j++) { 200 if (a->j[j] == i) { 201 diag[i] = j; 202 break; 203 } 204 } 205 } 206 a->diag = diag; 207 PetscFunctionReturn(0); 208 } 209 210 211 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 212 213 #undef __FUNCT__ 214 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 215 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 216 { 217 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 218 int ierr,n = a->mbs,i; 219 220 PetscFunctionBegin; 221 *nn = n; 222 if (!ia) PetscFunctionReturn(0); 223 if (symmetric) { 224 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 225 } else if (oshift == 1) { 226 /* temporarily add 1 to i and j indices */ 227 int nz = a->i[n]; 228 for (i=0; i<nz; i++) a->j[i]++; 229 for (i=0; i<n+1; i++) a->i[i]++; 230 *ia = a->i; *ja = a->j; 231 } else { 232 *ia = a->i; *ja = a->j; 233 } 234 235 PetscFunctionReturn(0); 236 } 237 238 #undef __FUNCT__ 239 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 240 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 241 { 242 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 243 int i,n = a->mbs,ierr; 244 245 PetscFunctionBegin; 246 if (!ia) PetscFunctionReturn(0); 247 if (symmetric) { 248 ierr = PetscFree(*ia);CHKERRQ(ierr); 249 ierr = PetscFree(*ja);CHKERRQ(ierr); 250 } else if (oshift == 1) { 251 int nz = a->i[n]-1; 252 for (i=0; i<nz; i++) a->j[i]--; 253 for (i=0; i<n+1; i++) a->i[i]--; 254 } 255 PetscFunctionReturn(0); 256 } 257 258 #undef __FUNCT__ 259 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ" 260 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs) 261 { 262 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 263 264 PetscFunctionBegin; 265 *bs = baij->bs; 266 PetscFunctionReturn(0); 267 } 268 269 270 #undef __FUNCT__ 271 #define __FUNCT__ "MatDestroy_SeqBAIJ" 272 int MatDestroy_SeqBAIJ(Mat A) 273 { 274 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 275 int ierr; 276 277 PetscFunctionBegin; 278 #if defined(PETSC_USE_LOG) 279 PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz); 280 #endif 281 ierr = PetscFree(a->a);CHKERRQ(ierr); 282 if (!a->singlemalloc) { 283 ierr = PetscFree(a->i);CHKERRQ(ierr); 284 ierr = PetscFree(a->j);CHKERRQ(ierr); 285 } 286 if (a->row) { 287 ierr = ISDestroy(a->row);CHKERRQ(ierr); 288 } 289 if (a->col) { 290 ierr = ISDestroy(a->col);CHKERRQ(ierr); 291 } 292 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 293 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 294 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 295 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 296 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 297 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 298 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 299 #if defined(PETSC_USE_MAT_SINGLE) 300 if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);} 301 #endif 302 ierr = PetscFree(a);CHKERRQ(ierr); 303 PetscFunctionReturn(0); 304 } 305 306 #undef __FUNCT__ 307 #define __FUNCT__ "MatSetOption_SeqBAIJ" 308 int MatSetOption_SeqBAIJ(Mat A,MatOption op) 309 { 310 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 311 312 PetscFunctionBegin; 313 switch (op) { 314 case MAT_ROW_ORIENTED: 315 a->roworiented = PETSC_TRUE; 316 break; 317 case MAT_COLUMN_ORIENTED: 318 a->roworiented = PETSC_FALSE; 319 break; 320 case MAT_COLUMNS_SORTED: 321 a->sorted = PETSC_TRUE; 322 break; 323 case MAT_COLUMNS_UNSORTED: 324 a->sorted = PETSC_FALSE; 325 break; 326 case MAT_KEEP_ZEROED_ROWS: 327 a->keepzeroedrows = PETSC_TRUE; 328 break; 329 case MAT_NO_NEW_NONZERO_LOCATIONS: 330 a->nonew = 1; 331 break; 332 case MAT_NEW_NONZERO_LOCATION_ERR: 333 a->nonew = -1; 334 break; 335 case MAT_NEW_NONZERO_ALLOCATION_ERR: 336 a->nonew = -2; 337 break; 338 case MAT_YES_NEW_NONZERO_LOCATIONS: 339 a->nonew = 0; 340 break; 341 case MAT_ROWS_SORTED: 342 case MAT_ROWS_UNSORTED: 343 case MAT_YES_NEW_DIAGONALS: 344 case MAT_IGNORE_OFF_PROC_ENTRIES: 345 case MAT_USE_HASH_TABLE: 346 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n"); 347 break; 348 case MAT_NO_NEW_DIAGONALS: 349 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 350 case MAT_USE_SINGLE_PRECISION_SOLVES: 351 if (a->bs==4) { 352 int ierr; 353 a->single_precision_solves = PETSC_TRUE; 354 A->ops->solve = MatSolve_SeqBAIJ_Update; 355 A->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 356 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES set\n"); 357 } else { 358 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n"); 359 } 360 break; 361 default: 362 SETERRQ(PETSC_ERR_SUP,"unknown option"); 363 } 364 PetscFunctionReturn(0); 365 } 366 367 #undef __FUNCT__ 368 #define __FUNCT__ "MatGetRow_SeqBAIJ" 369 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 370 { 371 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 372 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr; 373 MatScalar *aa,*aa_i; 374 PetscScalar *v_i; 375 376 PetscFunctionBegin; 377 bs = a->bs; 378 ai = a->i; 379 aj = a->j; 380 aa = a->a; 381 bs2 = a->bs2; 382 383 if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range"); 384 385 bn = row/bs; /* Block number */ 386 bp = row % bs; /* Block Position */ 387 M = ai[bn+1] - ai[bn]; 388 *nz = bs*M; 389 390 if (v) { 391 *v = 0; 392 if (*nz) { 393 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 394 for (i=0; i<M; i++) { /* for each block in the block row */ 395 v_i = *v + i*bs; 396 aa_i = aa + bs2*(ai[bn] + i); 397 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 398 } 399 } 400 } 401 402 if (idx) { 403 *idx = 0; 404 if (*nz) { 405 ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr); 406 for (i=0; i<M; i++) { /* for each block in the block row */ 407 idx_i = *idx + i*bs; 408 itmp = bs*aj[ai[bn] + i]; 409 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 410 } 411 } 412 } 413 PetscFunctionReturn(0); 414 } 415 416 #undef __FUNCT__ 417 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 418 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 419 { 420 int ierr; 421 422 PetscFunctionBegin; 423 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 424 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 425 PetscFunctionReturn(0); 426 } 427 428 #undef __FUNCT__ 429 #define __FUNCT__ "MatTranspose_SeqBAIJ" 430 int MatTranspose_SeqBAIJ(Mat A,Mat *B) 431 { 432 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 433 Mat C; 434 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 435 int *rows,*cols,bs2=a->bs2; 436 PetscScalar *array; 437 438 PetscFunctionBegin; 439 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 440 ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr); 441 ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr); 442 443 #if defined(PETSC_USE_MAT_SINGLE) 444 ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 445 for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 446 #else 447 array = a->a; 448 #endif 449 450 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 451 ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr); 452 ierr = PetscFree(col);CHKERRQ(ierr); 453 ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr); 454 cols = rows + bs; 455 for (i=0; i<mbs; i++) { 456 cols[0] = i*bs; 457 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 458 len = ai[i+1] - ai[i]; 459 for (j=0; j<len; j++) { 460 rows[0] = (*aj++)*bs; 461 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 462 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 463 array += bs2; 464 } 465 } 466 ierr = PetscFree(rows);CHKERRQ(ierr); 467 #if defined(PETSC_USE_MAT_SINGLE) 468 ierr = PetscFree(array); 469 #endif 470 471 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 472 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 473 474 if (B) { 475 *B = C; 476 } else { 477 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 478 } 479 PetscFunctionReturn(0); 480 } 481 482 #undef __FUNCT__ 483 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 484 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 485 { 486 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 487 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 488 PetscScalar *aa; 489 FILE *file; 490 491 PetscFunctionBegin; 492 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 493 ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 494 col_lens[0] = MAT_FILE_COOKIE; 495 496 col_lens[1] = A->m; 497 col_lens[2] = A->n; 498 col_lens[3] = a->nz*bs2; 499 500 /* store lengths of each row and write (including header) to file */ 501 count = 0; 502 for (i=0; i<a->mbs; i++) { 503 for (j=0; j<bs; j++) { 504 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 505 } 506 } 507 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 508 ierr = PetscFree(col_lens);CHKERRQ(ierr); 509 510 /* store column indices (zero start index) */ 511 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 512 count = 0; 513 for (i=0; i<a->mbs; i++) { 514 for (j=0; j<bs; j++) { 515 for (k=a->i[i]; k<a->i[i+1]; k++) { 516 for (l=0; l<bs; l++) { 517 jj[count++] = bs*a->j[k] + l; 518 } 519 } 520 } 521 } 522 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr); 523 ierr = PetscFree(jj);CHKERRQ(ierr); 524 525 /* store nonzero values */ 526 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 527 count = 0; 528 for (i=0; i<a->mbs; i++) { 529 for (j=0; j<bs; j++) { 530 for (k=a->i[i]; k<a->i[i+1]; k++) { 531 for (l=0; l<bs; l++) { 532 aa[count++] = a->a[bs2*k + l*bs + j]; 533 } 534 } 535 } 536 } 537 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr); 538 ierr = PetscFree(aa);CHKERRQ(ierr); 539 540 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 541 if (file) { 542 fprintf(file,"-matload_block_size %d\n",a->bs); 543 } 544 PetscFunctionReturn(0); 545 } 546 547 extern int MatMPIBAIJFactorInfo_DSCPACK(Mat,PetscViewer); 548 549 #undef __FUNCT__ 550 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 551 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 552 { 553 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 554 int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 555 PetscViewerFormat format; 556 557 PetscFunctionBegin; 558 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 559 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_LONG) { 560 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 561 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 562 Mat aij; 563 ierr = MatConvert(A,MATSEQAIJ,&aij);CHKERRQ(ierr); 564 ierr = MatView(aij,viewer);CHKERRQ(ierr); 565 ierr = MatDestroy(aij);CHKERRQ(ierr); 566 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 567 #if defined(PETSC_HAVE_DSCPACK) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX) 568 ierr = MatMPIBAIJFactorInfo_DSCPACK(A,viewer);CHKERRQ(ierr); 569 #endif 570 PetscFunctionReturn(0); 571 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 572 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 573 for (i=0; i<a->mbs; i++) { 574 for (j=0; j<bs; j++) { 575 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 576 for (k=a->i[i]; k<a->i[i+1]; k++) { 577 for (l=0; l<bs; l++) { 578 #if defined(PETSC_USE_COMPLEX) 579 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 580 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %gi) ",bs*a->j[k]+l, 581 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 582 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 583 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %gi) ",bs*a->j[k]+l, 584 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 585 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 586 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 587 } 588 #else 589 if (a->a[bs2*k + l*bs + j] != 0.0) { 590 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 591 } 592 #endif 593 } 594 } 595 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 596 } 597 } 598 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 599 } else { 600 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 601 for (i=0; i<a->mbs; i++) { 602 for (j=0; j<bs; j++) { 603 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 604 for (k=a->i[i]; k<a->i[i+1]; k++) { 605 for (l=0; l<bs; l++) { 606 #if defined(PETSC_USE_COMPLEX) 607 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 608 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 609 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 610 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 611 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 612 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 613 } else { 614 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 615 } 616 #else 617 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 618 #endif 619 } 620 } 621 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 622 } 623 } 624 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 625 } 626 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 627 PetscFunctionReturn(0); 628 } 629 630 #undef __FUNCT__ 631 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 632 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 633 { 634 Mat A = (Mat) Aa; 635 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 636 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2; 637 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 638 MatScalar *aa; 639 PetscViewer viewer; 640 641 PetscFunctionBegin; 642 643 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 644 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 645 646 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 647 648 /* loop over matrix elements drawing boxes */ 649 color = PETSC_DRAW_BLUE; 650 for (i=0,row=0; i<mbs; i++,row+=bs) { 651 for (j=a->i[i]; j<a->i[i+1]; j++) { 652 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 653 x_l = a->j[j]*bs; x_r = x_l + 1.0; 654 aa = a->a + j*bs2; 655 for (k=0; k<bs; k++) { 656 for (l=0; l<bs; l++) { 657 if (PetscRealPart(*aa++) >= 0.) continue; 658 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 659 } 660 } 661 } 662 } 663 color = PETSC_DRAW_CYAN; 664 for (i=0,row=0; i<mbs; i++,row+=bs) { 665 for (j=a->i[i]; j<a->i[i+1]; j++) { 666 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 667 x_l = a->j[j]*bs; x_r = x_l + 1.0; 668 aa = a->a + j*bs2; 669 for (k=0; k<bs; k++) { 670 for (l=0; l<bs; l++) { 671 if (PetscRealPart(*aa++) != 0.) continue; 672 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 673 } 674 } 675 } 676 } 677 678 color = PETSC_DRAW_RED; 679 for (i=0,row=0; i<mbs; i++,row+=bs) { 680 for (j=a->i[i]; j<a->i[i+1]; j++) { 681 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 682 x_l = a->j[j]*bs; x_r = x_l + 1.0; 683 aa = a->a + j*bs2; 684 for (k=0; k<bs; k++) { 685 for (l=0; l<bs; l++) { 686 if (PetscRealPart(*aa++) <= 0.) continue; 687 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 688 } 689 } 690 } 691 } 692 PetscFunctionReturn(0); 693 } 694 695 #undef __FUNCT__ 696 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 697 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 698 { 699 int ierr; 700 PetscReal xl,yl,xr,yr,w,h; 701 PetscDraw draw; 702 PetscTruth isnull; 703 704 PetscFunctionBegin; 705 706 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 707 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 708 709 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 710 xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0; 711 xr += w; yr += h; xl = -w; yl = -h; 712 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 713 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 714 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 715 PetscFunctionReturn(0); 716 } 717 718 #undef __FUNCT__ 719 #define __FUNCT__ "MatView_SeqBAIJ" 720 int MatView_SeqBAIJ(Mat A,PetscViewer viewer) 721 { 722 int ierr; 723 PetscTruth issocket,isascii,isbinary,isdraw; 724 725 PetscFunctionBegin; 726 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 727 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 728 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 729 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 730 if (issocket) { 731 SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported"); 732 } else if (isascii){ 733 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 734 } else if (isbinary) { 735 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 736 } else if (isdraw) { 737 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 738 } else { 739 SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name); 740 } 741 PetscFunctionReturn(0); 742 } 743 744 745 #undef __FUNCT__ 746 #define __FUNCT__ "MatGetValues_SeqBAIJ" 747 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v) 748 { 749 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 750 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 751 int *ai = a->i,*ailen = a->ilen; 752 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 753 MatScalar *ap,*aa = a->a,zero = 0.0; 754 755 PetscFunctionBegin; 756 for (k=0; k<m; k++) { /* loop over rows */ 757 row = im[k]; brow = row/bs; 758 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 759 if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 760 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 761 nrow = ailen[brow]; 762 for (l=0; l<n; l++) { /* loop over columns */ 763 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 764 if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 765 col = in[l] ; 766 bcol = col/bs; 767 cidx = col%bs; 768 ridx = row%bs; 769 high = nrow; 770 low = 0; /* assume unsorted */ 771 while (high-low > 5) { 772 t = (low+high)/2; 773 if (rp[t] > bcol) high = t; 774 else low = t; 775 } 776 for (i=low; i<high; i++) { 777 if (rp[i] > bcol) break; 778 if (rp[i] == bcol) { 779 *v++ = ap[bs2*i+bs*cidx+ridx]; 780 goto finished; 781 } 782 } 783 *v++ = zero; 784 finished:; 785 } 786 } 787 PetscFunctionReturn(0); 788 } 789 790 #if defined(PETSC_USE_MAT_SINGLE) 791 #undef __FUNCT__ 792 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 793 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 794 { 795 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 796 int ierr,i,N = m*n*b->bs2; 797 MatScalar *vsingle; 798 799 PetscFunctionBegin; 800 if (N > b->setvalueslen) { 801 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 802 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 803 b->setvalueslen = N; 804 } 805 vsingle = b->setvaluescopy; 806 for (i=0; i<N; i++) { 807 vsingle[i] = v[i]; 808 } 809 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 810 PetscFunctionReturn(0); 811 } 812 #endif 813 814 815 #undef __FUNCT__ 816 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 817 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,int *im,int n,int *in,MatScalar *v,InsertMode is) 818 { 819 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 820 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 821 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 822 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 823 PetscTruth roworiented=a->roworiented; 824 MatScalar *value = v,*ap,*aa = a->a,*bap; 825 826 PetscFunctionBegin; 827 if (roworiented) { 828 stepval = (n-1)*bs; 829 } else { 830 stepval = (m-1)*bs; 831 } 832 for (k=0; k<m; k++) { /* loop over added rows */ 833 row = im[k]; 834 if (row < 0) continue; 835 #if defined(PETSC_USE_BOPT_g) 836 if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 837 #endif 838 rp = aj + ai[row]; 839 ap = aa + bs2*ai[row]; 840 rmax = imax[row]; 841 nrow = ailen[row]; 842 low = 0; 843 for (l=0; l<n; l++) { /* loop over added columns */ 844 if (in[l] < 0) continue; 845 #if defined(PETSC_USE_BOPT_g) 846 if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 847 #endif 848 col = in[l]; 849 if (roworiented) { 850 value = v + k*(stepval+bs)*bs + l*bs; 851 } else { 852 value = v + l*(stepval+bs)*bs + k*bs; 853 } 854 if (!sorted) low = 0; high = nrow; 855 while (high-low > 7) { 856 t = (low+high)/2; 857 if (rp[t] > col) high = t; 858 else low = t; 859 } 860 for (i=low; i<high; i++) { 861 if (rp[i] > col) break; 862 if (rp[i] == col) { 863 bap = ap + bs2*i; 864 if (roworiented) { 865 if (is == ADD_VALUES) { 866 for (ii=0; ii<bs; ii++,value+=stepval) { 867 for (jj=ii; jj<bs2; jj+=bs) { 868 bap[jj] += *value++; 869 } 870 } 871 } else { 872 for (ii=0; ii<bs; ii++,value+=stepval) { 873 for (jj=ii; jj<bs2; jj+=bs) { 874 bap[jj] = *value++; 875 } 876 } 877 } 878 } else { 879 if (is == ADD_VALUES) { 880 for (ii=0; ii<bs; ii++,value+=stepval) { 881 for (jj=0; jj<bs; jj++) { 882 *bap++ += *value++; 883 } 884 } 885 } else { 886 for (ii=0; ii<bs; ii++,value+=stepval) { 887 for (jj=0; jj<bs; jj++) { 888 *bap++ = *value++; 889 } 890 } 891 } 892 } 893 goto noinsert2; 894 } 895 } 896 if (nonew == 1) goto noinsert2; 897 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 898 if (nrow >= rmax) { 899 /* there is no extra room in row, therefore enlarge */ 900 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 901 MatScalar *new_a; 902 903 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 904 905 /* malloc new storage space */ 906 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 907 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 908 new_j = (int*)(new_a + bs2*new_nz); 909 new_i = new_j + new_nz; 910 911 /* copy over old data into new slots */ 912 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 913 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 914 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 915 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 916 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 917 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 918 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 919 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 920 /* free up old matrix storage */ 921 ierr = PetscFree(a->a);CHKERRQ(ierr); 922 if (!a->singlemalloc) { 923 ierr = PetscFree(a->i);CHKERRQ(ierr); 924 ierr = PetscFree(a->j);CHKERRQ(ierr); 925 } 926 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 927 a->singlemalloc = PETSC_TRUE; 928 929 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 930 rmax = imax[row] = imax[row] + CHUNKSIZE; 931 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 932 a->maxnz += bs2*CHUNKSIZE; 933 a->reallocs++; 934 a->nz++; 935 } 936 N = nrow++ - 1; 937 /* shift up all the later entries in this row */ 938 for (ii=N; ii>=i; ii--) { 939 rp[ii+1] = rp[ii]; 940 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 941 } 942 if (N >= i) { 943 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 944 } 945 rp[i] = col; 946 bap = ap + bs2*i; 947 if (roworiented) { 948 for (ii=0; ii<bs; ii++,value+=stepval) { 949 for (jj=ii; jj<bs2; jj+=bs) { 950 bap[jj] = *value++; 951 } 952 } 953 } else { 954 for (ii=0; ii<bs; ii++,value+=stepval) { 955 for (jj=0; jj<bs; jj++) { 956 *bap++ = *value++; 957 } 958 } 959 } 960 noinsert2:; 961 low = i; 962 } 963 ailen[row] = nrow; 964 } 965 PetscFunctionReturn(0); 966 } 967 968 #undef __FUNCT__ 969 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 970 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 971 { 972 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 973 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 974 int m = A->m,*ip,N,*ailen = a->ilen; 975 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 976 MatScalar *aa = a->a,*ap; 977 #if defined(PETSC_HAVE_DSCPACK) 978 PetscTruth flag; 979 #endif 980 981 PetscFunctionBegin; 982 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 983 984 if (m) rmax = ailen[0]; 985 for (i=1; i<mbs; i++) { 986 /* move each row back by the amount of empty slots (fshift) before it*/ 987 fshift += imax[i-1] - ailen[i-1]; 988 rmax = PetscMax(rmax,ailen[i]); 989 if (fshift) { 990 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 991 N = ailen[i]; 992 for (j=0; j<N; j++) { 993 ip[j-fshift] = ip[j]; 994 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 995 } 996 } 997 ai[i] = ai[i-1] + ailen[i-1]; 998 } 999 if (mbs) { 1000 fshift += imax[mbs-1] - ailen[mbs-1]; 1001 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1002 } 1003 /* reset ilen and imax for each row */ 1004 for (i=0; i<mbs; i++) { 1005 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1006 } 1007 a->nz = ai[mbs]; 1008 1009 /* diagonals may have moved, so kill the diagonal pointers */ 1010 if (fshift && a->diag) { 1011 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1012 PetscLogObjectMemory(A,-(mbs+1)*sizeof(int)); 1013 a->diag = 0; 1014 } 1015 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2); 1016 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",a->reallocs); 1017 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 1018 a->reallocs = 0; 1019 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1020 1021 #if defined(PETSC_HAVE_DSCPACK) 1022 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_baij_dscpack",&flag);CHKERRQ(ierr); 1023 if (flag) { ierr = MatUseDSCPACK_MPIBAIJ(A);CHKERRQ(ierr); } 1024 #endif 1025 1026 PetscFunctionReturn(0); 1027 } 1028 1029 1030 1031 /* 1032 This function returns an array of flags which indicate the locations of contiguous 1033 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1034 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1035 Assume: sizes should be long enough to hold all the values. 1036 */ 1037 #undef __FUNCT__ 1038 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1039 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 1040 { 1041 int i,j,k,row; 1042 PetscTruth flg; 1043 1044 PetscFunctionBegin; 1045 for (i=0,j=0; i<n; j++) { 1046 row = idx[i]; 1047 if (row%bs!=0) { /* Not the begining of a block */ 1048 sizes[j] = 1; 1049 i++; 1050 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1051 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1052 i++; 1053 } else { /* Begining of the block, so check if the complete block exists */ 1054 flg = PETSC_TRUE; 1055 for (k=1; k<bs; k++) { 1056 if (row+k != idx[i+k]) { /* break in the block */ 1057 flg = PETSC_FALSE; 1058 break; 1059 } 1060 } 1061 if (flg == PETSC_TRUE) { /* No break in the bs */ 1062 sizes[j] = bs; 1063 i+= bs; 1064 } else { 1065 sizes[j] = 1; 1066 i++; 1067 } 1068 } 1069 } 1070 *bs_max = j; 1071 PetscFunctionReturn(0); 1072 } 1073 1074 #undef __FUNCT__ 1075 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1076 int MatZeroRows_SeqBAIJ(Mat A,IS is,PetscScalar *diag) 1077 { 1078 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1079 int ierr,i,j,k,count,is_n,*is_idx,*rows; 1080 int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max; 1081 PetscScalar zero = 0.0; 1082 MatScalar *aa; 1083 1084 PetscFunctionBegin; 1085 /* Make a copy of the IS and sort it */ 1086 ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr); 1087 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1088 1089 /* allocate memory for rows,sizes */ 1090 ierr = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr); 1091 sizes = rows + is_n; 1092 1093 /* copy IS values to rows, and sort them */ 1094 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1095 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1096 if (baij->keepzeroedrows) { 1097 for (i=0; i<is_n; i++) { sizes[i] = 1; } 1098 bs_max = is_n; 1099 } else { 1100 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 1101 } 1102 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 1103 1104 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1105 row = rows[j]; 1106 if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row); 1107 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1108 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 1109 if (sizes[i] == bs && !baij->keepzeroedrows) { 1110 if (diag) { 1111 if (baij->ilen[row/bs] > 0) { 1112 baij->ilen[row/bs] = 1; 1113 baij->j[baij->i[row/bs]] = row/bs; 1114 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 1115 } 1116 /* Now insert all the diagonal values for this bs */ 1117 for (k=0; k<bs; k++) { 1118 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 1119 } 1120 } else { /* (!diag) */ 1121 baij->ilen[row/bs] = 0; 1122 } /* end (!diag) */ 1123 } else { /* (sizes[i] != bs) */ 1124 #if defined (PETSC_USE_DEBUG) 1125 if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1"); 1126 #endif 1127 for (k=0; k<count; k++) { 1128 aa[0] = zero; 1129 aa += bs; 1130 } 1131 if (diag) { 1132 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1133 } 1134 } 1135 } 1136 1137 ierr = PetscFree(rows);CHKERRQ(ierr); 1138 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1139 PetscFunctionReturn(0); 1140 } 1141 1142 #undef __FUNCT__ 1143 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1144 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is) 1145 { 1146 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1147 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 1148 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1149 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 1150 int ridx,cidx,bs2=a->bs2,ierr; 1151 PetscTruth roworiented=a->roworiented; 1152 MatScalar *ap,value,*aa=a->a,*bap; 1153 1154 PetscFunctionBegin; 1155 for (k=0; k<m; k++) { /* loop over added rows */ 1156 row = im[k]; brow = row/bs; 1157 if (row < 0) continue; 1158 #if defined(PETSC_USE_BOPT_g) 1159 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m); 1160 #endif 1161 rp = aj + ai[brow]; 1162 ap = aa + bs2*ai[brow]; 1163 rmax = imax[brow]; 1164 nrow = ailen[brow]; 1165 low = 0; 1166 for (l=0; l<n; l++) { /* loop over added columns */ 1167 if (in[l] < 0) continue; 1168 #if defined(PETSC_USE_BOPT_g) 1169 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n); 1170 #endif 1171 col = in[l]; bcol = col/bs; 1172 ridx = row % bs; cidx = col % bs; 1173 if (roworiented) { 1174 value = v[l + k*n]; 1175 } else { 1176 value = v[k + l*m]; 1177 } 1178 if (!sorted) low = 0; high = nrow; 1179 while (high-low > 7) { 1180 t = (low+high)/2; 1181 if (rp[t] > bcol) high = t; 1182 else low = t; 1183 } 1184 for (i=low; i<high; i++) { 1185 if (rp[i] > bcol) break; 1186 if (rp[i] == bcol) { 1187 bap = ap + bs2*i + bs*cidx + ridx; 1188 if (is == ADD_VALUES) *bap += value; 1189 else *bap = value; 1190 goto noinsert1; 1191 } 1192 } 1193 if (nonew == 1) goto noinsert1; 1194 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 1195 if (nrow >= rmax) { 1196 /* there is no extra room in row, therefore enlarge */ 1197 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1198 MatScalar *new_a; 1199 1200 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 1201 1202 /* Malloc new storage space */ 1203 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1204 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1205 new_j = (int*)(new_a + bs2*new_nz); 1206 new_i = new_j + new_nz; 1207 1208 /* copy over old data into new slots */ 1209 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 1210 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1211 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 1212 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1213 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 1214 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1215 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1216 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1217 /* free up old matrix storage */ 1218 ierr = PetscFree(a->a);CHKERRQ(ierr); 1219 if (!a->singlemalloc) { 1220 ierr = PetscFree(a->i);CHKERRQ(ierr); 1221 ierr = PetscFree(a->j);CHKERRQ(ierr); 1222 } 1223 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1224 a->singlemalloc = PETSC_TRUE; 1225 1226 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1227 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1228 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 1229 a->maxnz += bs2*CHUNKSIZE; 1230 a->reallocs++; 1231 a->nz++; 1232 } 1233 N = nrow++ - 1; 1234 /* shift up all the later entries in this row */ 1235 for (ii=N; ii>=i; ii--) { 1236 rp[ii+1] = rp[ii]; 1237 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1238 } 1239 if (N>=i) { 1240 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1241 } 1242 rp[i] = bcol; 1243 ap[bs2*i + bs*cidx + ridx] = value; 1244 noinsert1:; 1245 low = i; 1246 } 1247 ailen[brow] = nrow; 1248 } 1249 PetscFunctionReturn(0); 1250 } 1251 1252 1253 #undef __FUNCT__ 1254 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1255 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info) 1256 { 1257 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1258 Mat outA; 1259 int ierr; 1260 PetscTruth row_identity,col_identity; 1261 1262 PetscFunctionBegin; 1263 if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1264 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1265 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1266 if (!row_identity || !col_identity) { 1267 SETERRQ(1,"Row and column permutations must be identity for in-place ILU"); 1268 } 1269 1270 outA = inA; 1271 inA->factor = FACTOR_LU; 1272 1273 if (!a->diag) { 1274 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1275 } 1276 1277 a->row = row; 1278 a->col = col; 1279 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1280 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1281 1282 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1283 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1284 PetscLogObjectParent(inA,a->icol); 1285 1286 /* 1287 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1288 for ILU(0) factorization with natural ordering 1289 */ 1290 if (a->bs < 8) { 1291 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1292 } else { 1293 if (!a->solve_work) { 1294 ierr = PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1295 PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar)); 1296 } 1297 } 1298 1299 ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr); 1300 1301 PetscFunctionReturn(0); 1302 } 1303 #undef __FUNCT__ 1304 #define __FUNCT__ "MatPrintHelp_SeqBAIJ" 1305 int MatPrintHelp_SeqBAIJ(Mat A) 1306 { 1307 static PetscTruth called = PETSC_FALSE; 1308 MPI_Comm comm = A->comm; 1309 int ierr; 1310 1311 PetscFunctionBegin; 1312 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1313 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1314 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1315 PetscFunctionReturn(0); 1316 } 1317 1318 EXTERN_C_BEGIN 1319 #undef __FUNCT__ 1320 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1321 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices) 1322 { 1323 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1324 int i,nz,nbs; 1325 1326 PetscFunctionBegin; 1327 nz = baij->maxnz/baij->bs2; 1328 nbs = baij->nbs; 1329 for (i=0; i<nz; i++) { 1330 baij->j[i] = indices[i]; 1331 } 1332 baij->nz = nz; 1333 for (i=0; i<nbs; i++) { 1334 baij->ilen[i] = baij->imax[i]; 1335 } 1336 1337 PetscFunctionReturn(0); 1338 } 1339 EXTERN_C_END 1340 1341 #undef __FUNCT__ 1342 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1343 /*@ 1344 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1345 in the matrix. 1346 1347 Input Parameters: 1348 + mat - the SeqBAIJ matrix 1349 - indices - the column indices 1350 1351 Level: advanced 1352 1353 Notes: 1354 This can be called if you have precomputed the nonzero structure of the 1355 matrix and want to provide it to the matrix object to improve the performance 1356 of the MatSetValues() operation. 1357 1358 You MUST have set the correct numbers of nonzeros per row in the call to 1359 MatCreateSeqBAIJ(). 1360 1361 MUST be called before any calls to MatSetValues(); 1362 1363 @*/ 1364 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices) 1365 { 1366 int ierr,(*f)(Mat,int *); 1367 1368 PetscFunctionBegin; 1369 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1370 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1371 if (f) { 1372 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1373 } else { 1374 SETERRQ(1,"Wrong type of matrix to set column indices"); 1375 } 1376 PetscFunctionReturn(0); 1377 } 1378 1379 #undef __FUNCT__ 1380 #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1381 int MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1382 { 1383 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1384 int ierr,i,j,n,row,bs,*ai,*aj,mbs; 1385 PetscReal atmp; 1386 PetscScalar *x,zero = 0.0; 1387 MatScalar *aa; 1388 int ncols,brow,krow,kcol; 1389 1390 PetscFunctionBegin; 1391 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1392 bs = a->bs; 1393 aa = a->a; 1394 ai = a->i; 1395 aj = a->j; 1396 mbs = a->mbs; 1397 1398 ierr = VecSet(&zero,v);CHKERRQ(ierr); 1399 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1400 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1401 if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1402 for (i=0; i<mbs; i++) { 1403 ncols = ai[1] - ai[0]; ai++; 1404 brow = bs*i; 1405 for (j=0; j<ncols; j++){ 1406 /* bcol = bs*(*aj); */ 1407 for (kcol=0; kcol<bs; kcol++){ 1408 for (krow=0; krow<bs; krow++){ 1409 atmp = PetscAbsScalar(*aa); aa++; 1410 row = brow + krow; /* row index */ 1411 /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */ 1412 if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1413 } 1414 } 1415 aj++; 1416 } 1417 } 1418 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1419 PetscFunctionReturn(0); 1420 } 1421 1422 #undef __FUNCT__ 1423 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1424 int MatSetUpPreallocation_SeqBAIJ(Mat A) 1425 { 1426 int ierr; 1427 1428 PetscFunctionBegin; 1429 ierr = MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1430 PetscFunctionReturn(0); 1431 } 1432 1433 #undef __FUNCT__ 1434 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1435 int MatGetArray_SeqBAIJ(Mat A,PetscScalar **array) 1436 { 1437 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1438 PetscFunctionBegin; 1439 *array = a->a; 1440 PetscFunctionReturn(0); 1441 } 1442 1443 #undef __FUNCT__ 1444 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1445 int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar **array) 1446 { 1447 PetscFunctionBegin; 1448 PetscFunctionReturn(0); 1449 } 1450 1451 /* -------------------------------------------------------------------*/ 1452 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 1453 MatGetRow_SeqBAIJ, 1454 MatRestoreRow_SeqBAIJ, 1455 MatMult_SeqBAIJ_N, 1456 MatMultAdd_SeqBAIJ_N, 1457 MatMultTranspose_SeqBAIJ, 1458 MatMultTransposeAdd_SeqBAIJ, 1459 MatSolve_SeqBAIJ_N, 1460 0, 1461 0, 1462 0, 1463 MatLUFactor_SeqBAIJ, 1464 0, 1465 0, 1466 MatTranspose_SeqBAIJ, 1467 MatGetInfo_SeqBAIJ, 1468 MatEqual_SeqBAIJ, 1469 MatGetDiagonal_SeqBAIJ, 1470 MatDiagonalScale_SeqBAIJ, 1471 MatNorm_SeqBAIJ, 1472 0, 1473 MatAssemblyEnd_SeqBAIJ, 1474 0, 1475 MatSetOption_SeqBAIJ, 1476 MatZeroEntries_SeqBAIJ, 1477 MatZeroRows_SeqBAIJ, 1478 MatLUFactorSymbolic_SeqBAIJ, 1479 MatLUFactorNumeric_SeqBAIJ_N, 1480 0, 1481 0, 1482 MatSetUpPreallocation_SeqBAIJ, 1483 MatILUFactorSymbolic_SeqBAIJ, 1484 0, 1485 MatGetArray_SeqBAIJ, 1486 MatRestoreArray_SeqBAIJ, 1487 MatDuplicate_SeqBAIJ, 1488 0, 1489 0, 1490 MatILUFactor_SeqBAIJ, 1491 0, 1492 0, 1493 MatGetSubMatrices_SeqBAIJ, 1494 MatIncreaseOverlap_SeqBAIJ, 1495 MatGetValues_SeqBAIJ, 1496 0, 1497 MatPrintHelp_SeqBAIJ, 1498 MatScale_SeqBAIJ, 1499 0, 1500 0, 1501 0, 1502 MatGetBlockSize_SeqBAIJ, 1503 MatGetRowIJ_SeqBAIJ, 1504 MatRestoreRowIJ_SeqBAIJ, 1505 0, 1506 0, 1507 0, 1508 0, 1509 0, 1510 0, 1511 MatSetValuesBlocked_SeqBAIJ, 1512 MatGetSubMatrix_SeqBAIJ, 1513 MatDestroy_SeqBAIJ, 1514 MatView_SeqBAIJ, 1515 MatGetPetscMaps_Petsc, 1516 0, 1517 0, 1518 0, 1519 0, 1520 0, 1521 0, 1522 MatGetRowMax_SeqBAIJ, 1523 MatConvert_Basic}; 1524 1525 EXTERN_C_BEGIN 1526 #undef __FUNCT__ 1527 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 1528 int MatStoreValues_SeqBAIJ(Mat mat) 1529 { 1530 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1531 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1532 int ierr; 1533 1534 PetscFunctionBegin; 1535 if (aij->nonew != 1) { 1536 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1537 } 1538 1539 /* allocate space for values if not already there */ 1540 if (!aij->saved_values) { 1541 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1542 } 1543 1544 /* copy values over */ 1545 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1546 PetscFunctionReturn(0); 1547 } 1548 EXTERN_C_END 1549 1550 EXTERN_C_BEGIN 1551 #undef __FUNCT__ 1552 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 1553 int MatRetrieveValues_SeqBAIJ(Mat mat) 1554 { 1555 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1556 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 1557 1558 PetscFunctionBegin; 1559 if (aij->nonew != 1) { 1560 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1561 } 1562 if (!aij->saved_values) { 1563 SETERRQ(1,"Must call MatStoreValues(A);first"); 1564 } 1565 1566 /* copy values over */ 1567 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1568 PetscFunctionReturn(0); 1569 } 1570 EXTERN_C_END 1571 1572 EXTERN_C_BEGIN 1573 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*); 1574 EXTERN_C_END 1575 1576 EXTERN_C_BEGIN 1577 #undef __FUNCT__ 1578 #define __FUNCT__ "MatCreate_SeqBAIJ" 1579 int MatCreate_SeqBAIJ(Mat B) 1580 { 1581 int ierr,size; 1582 Mat_SeqBAIJ *b; 1583 1584 PetscFunctionBegin; 1585 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1586 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1587 1588 B->m = B->M = PetscMax(B->m,B->M); 1589 B->n = B->N = PetscMax(B->n,B->N); 1590 ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 1591 B->data = (void*)b; 1592 ierr = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 1593 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1594 B->factor = 0; 1595 B->lupivotthreshold = 1.0; 1596 B->mapping = 0; 1597 b->row = 0; 1598 b->col = 0; 1599 b->icol = 0; 1600 b->reallocs = 0; 1601 b->saved_values = 0; 1602 #if defined(PETSC_USE_MAT_SINGLE) 1603 b->setvalueslen = 0; 1604 b->setvaluescopy = PETSC_NULL; 1605 #endif 1606 b->single_precision_solves = PETSC_FALSE; 1607 1608 ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr); 1609 ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr); 1610 1611 b->sorted = PETSC_FALSE; 1612 b->roworiented = PETSC_TRUE; 1613 b->nonew = 0; 1614 b->diag = 0; 1615 b->solve_work = 0; 1616 b->mult_work = 0; 1617 B->spptr = 0; 1618 B->info.nz_unneeded = (PetscReal)b->maxnz; 1619 b->keepzeroedrows = PETSC_FALSE; 1620 1621 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1622 "MatStoreValues_SeqBAIJ", 1623 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 1624 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1625 "MatRetrieveValues_SeqBAIJ", 1626 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 1627 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 1628 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 1629 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 1630 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_SeqBAIJ_SeqAIJ_C", 1631 "MatConvert_SeqBAIJ_SeqAIJ", 1632 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 1633 PetscFunctionReturn(0); 1634 } 1635 EXTERN_C_END 1636 1637 #undef __FUNCT__ 1638 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 1639 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1640 { 1641 Mat C; 1642 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 1643 int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr; 1644 1645 PetscFunctionBegin; 1646 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1647 1648 *B = 0; 1649 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1650 ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr); 1651 c = (Mat_SeqBAIJ*)C->data; 1652 1653 c->bs = a->bs; 1654 c->bs2 = a->bs2; 1655 c->mbs = a->mbs; 1656 c->nbs = a->nbs; 1657 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1658 1659 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1660 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1661 for (i=0; i<mbs; i++) { 1662 c->imax[i] = a->imax[i]; 1663 c->ilen[i] = a->ilen[i]; 1664 } 1665 1666 /* allocate the matrix space */ 1667 c->singlemalloc = PETSC_TRUE; 1668 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1669 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1670 c->j = (int*)(c->a + nz*bs2); 1671 c->i = c->j + nz; 1672 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1673 if (mbs > 0) { 1674 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1675 if (cpvalues == MAT_COPY_VALUES) { 1676 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1677 } else { 1678 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1679 } 1680 } 1681 1682 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1683 c->sorted = a->sorted; 1684 c->roworiented = a->roworiented; 1685 c->nonew = a->nonew; 1686 1687 if (a->diag) { 1688 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1689 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1690 for (i=0; i<mbs; i++) { 1691 c->diag[i] = a->diag[i]; 1692 } 1693 } else c->diag = 0; 1694 c->nz = a->nz; 1695 c->maxnz = a->maxnz; 1696 c->solve_work = 0; 1697 C->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1698 c->mult_work = 0; 1699 C->preallocated = PETSC_TRUE; 1700 C->assembled = PETSC_TRUE; 1701 *B = C; 1702 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1703 PetscFunctionReturn(0); 1704 } 1705 1706 EXTERN_C_BEGIN 1707 #undef __FUNCT__ 1708 #define __FUNCT__ "MatLoad_SeqBAIJ" 1709 int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A) 1710 { 1711 Mat_SeqBAIJ *a; 1712 Mat B; 1713 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1714 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1715 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1716 int *masked,nmask,tmp,bs2,ishift; 1717 PetscScalar *aa; 1718 MPI_Comm comm = ((PetscObject)viewer)->comm; 1719 1720 PetscFunctionBegin; 1721 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1722 bs2 = bs*bs; 1723 1724 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1725 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1726 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1727 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1728 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1729 M = header[1]; N = header[2]; nz = header[3]; 1730 1731 if (header[3] < 0) { 1732 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 1733 } 1734 1735 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1736 1737 /* 1738 This code adds extra rows to make sure the number of rows is 1739 divisible by the blocksize 1740 */ 1741 mbs = M/bs; 1742 extra_rows = bs - M + bs*(mbs); 1743 if (extra_rows == bs) extra_rows = 0; 1744 else mbs++; 1745 if (extra_rows) { 1746 PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 1747 } 1748 1749 /* read in row lengths */ 1750 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1751 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1752 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1753 1754 /* read in column indices */ 1755 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1756 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1757 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1758 1759 /* loop over row lengths determining block row lengths */ 1760 ierr = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr); 1761 ierr = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1762 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1763 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1764 masked = mask + mbs; 1765 rowcount = 0; nzcount = 0; 1766 for (i=0; i<mbs; i++) { 1767 nmask = 0; 1768 for (j=0; j<bs; j++) { 1769 kmax = rowlengths[rowcount]; 1770 for (k=0; k<kmax; k++) { 1771 tmp = jj[nzcount++]/bs; 1772 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1773 } 1774 rowcount++; 1775 } 1776 browlengths[i] += nmask; 1777 /* zero out the mask elements we set */ 1778 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1779 } 1780 1781 /* create our matrix */ 1782 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr); 1783 B = *A; 1784 a = (Mat_SeqBAIJ*)B->data; 1785 1786 /* set matrix "i" values */ 1787 a->i[0] = 0; 1788 for (i=1; i<= mbs; i++) { 1789 a->i[i] = a->i[i-1] + browlengths[i-1]; 1790 a->ilen[i-1] = browlengths[i-1]; 1791 } 1792 a->nz = 0; 1793 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 1794 1795 /* read in nonzero values */ 1796 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1797 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1798 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1799 1800 /* set "a" and "j" values into matrix */ 1801 nzcount = 0; jcount = 0; 1802 for (i=0; i<mbs; i++) { 1803 nzcountb = nzcount; 1804 nmask = 0; 1805 for (j=0; j<bs; j++) { 1806 kmax = rowlengths[i*bs+j]; 1807 for (k=0; k<kmax; k++) { 1808 tmp = jj[nzcount++]/bs; 1809 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1810 } 1811 } 1812 /* sort the masked values */ 1813 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1814 1815 /* set "j" values into matrix */ 1816 maskcount = 1; 1817 for (j=0; j<nmask; j++) { 1818 a->j[jcount++] = masked[j]; 1819 mask[masked[j]] = maskcount++; 1820 } 1821 /* set "a" values into matrix */ 1822 ishift = bs2*a->i[i]; 1823 for (j=0; j<bs; j++) { 1824 kmax = rowlengths[i*bs+j]; 1825 for (k=0; k<kmax; k++) { 1826 tmp = jj[nzcountb]/bs ; 1827 block = mask[tmp] - 1; 1828 point = jj[nzcountb] - bs*tmp; 1829 idx = ishift + bs2*block + j + bs*point; 1830 a->a[idx] = (MatScalar)aa[nzcountb++]; 1831 } 1832 } 1833 /* zero out the mask elements we set */ 1834 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1835 } 1836 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1837 1838 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1839 ierr = PetscFree(browlengths);CHKERRQ(ierr); 1840 ierr = PetscFree(aa);CHKERRQ(ierr); 1841 ierr = PetscFree(jj);CHKERRQ(ierr); 1842 ierr = PetscFree(mask);CHKERRQ(ierr); 1843 1844 B->assembled = PETSC_TRUE; 1845 1846 ierr = MatView_Private(B);CHKERRQ(ierr); 1847 PetscFunctionReturn(0); 1848 } 1849 EXTERN_C_END 1850 1851 #undef __FUNCT__ 1852 #define __FUNCT__ "MatCreateSeqBAIJ" 1853 /*@C 1854 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 1855 compressed row) format. For good matrix assembly performance the 1856 user should preallocate the matrix storage by setting the parameter nz 1857 (or the array nnz). By setting these parameters accurately, performance 1858 during matrix assembly can be increased by more than a factor of 50. 1859 1860 Collective on MPI_Comm 1861 1862 Input Parameters: 1863 + comm - MPI communicator, set to PETSC_COMM_SELF 1864 . bs - size of block 1865 . m - number of rows 1866 . n - number of columns 1867 . nz - number of nonzero blocks per block row (same for all rows) 1868 - nnz - array containing the number of nonzero blocks in the various block rows 1869 (possibly different for each block row) or PETSC_NULL 1870 1871 Output Parameter: 1872 . A - the matrix 1873 1874 Options Database Keys: 1875 . -mat_no_unroll - uses code that does not unroll the loops in the 1876 block calculations (much slower) 1877 . -mat_block_size - size of the blocks to use 1878 1879 Level: intermediate 1880 1881 Notes: 1882 A nonzero block is any block that as 1 or more nonzeros in it 1883 1884 The block AIJ format is fully compatible with standard Fortran 77 1885 storage. That is, the stored row and column indices can begin at 1886 either one (as in Fortran) or zero. See the users' manual for details. 1887 1888 Specify the preallocated storage with either nz or nnz (not both). 1889 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1890 allocation. For additional details, see the users manual chapter on 1891 matrices. 1892 1893 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1894 @*/ 1895 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A) 1896 { 1897 int ierr; 1898 1899 PetscFunctionBegin; 1900 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1901 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 1902 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1903 PetscFunctionReturn(0); 1904 } 1905 1906 #undef __FUNCT__ 1907 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 1908 /*@C 1909 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 1910 per row in the matrix. For good matrix assembly performance the 1911 user should preallocate the matrix storage by setting the parameter nz 1912 (or the array nnz). By setting these parameters accurately, performance 1913 during matrix assembly can be increased by more than a factor of 50. 1914 1915 Collective on MPI_Comm 1916 1917 Input Parameters: 1918 + A - the matrix 1919 . bs - size of block 1920 . nz - number of block nonzeros per block row (same for all rows) 1921 - nnz - array containing the number of block nonzeros in the various block rows 1922 (possibly different for each block row) or PETSC_NULL 1923 1924 Options Database Keys: 1925 . -mat_no_unroll - uses code that does not unroll the loops in the 1926 block calculations (much slower) 1927 . -mat_block_size - size of the blocks to use 1928 1929 Level: intermediate 1930 1931 Notes: 1932 The block AIJ format is fully compatible with standard Fortran 77 1933 storage. That is, the stored row and column indices can begin at 1934 either one (as in Fortran) or zero. See the users' manual for details. 1935 1936 Specify the preallocated storage with either nz or nnz (not both). 1937 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1938 allocation. For additional details, see the users manual chapter on 1939 matrices. 1940 1941 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1942 @*/ 1943 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz) 1944 { 1945 Mat_SeqBAIJ *b; 1946 int i,len,ierr,mbs,nbs,bs2,newbs = bs; 1947 PetscTruth flg; 1948 1949 PetscFunctionBegin; 1950 ierr = PetscTypeCompare((PetscObject)B,MATSEQBAIJ,&flg);CHKERRQ(ierr); 1951 if (!flg) PetscFunctionReturn(0); 1952 1953 B->preallocated = PETSC_TRUE; 1954 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr); 1955 if (nnz && newbs != bs) { 1956 SETERRQ(1,"Cannot change blocksize from command line if setting nnz"); 1957 } 1958 bs = newbs; 1959 1960 mbs = B->m/bs; 1961 nbs = B->n/bs; 1962 bs2 = bs*bs; 1963 1964 if (mbs*bs!=B->m || nbs*bs!=B->n) { 1965 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs); 1966 } 1967 1968 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1969 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1970 if (nnz) { 1971 for (i=0; i<mbs; i++) { 1972 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1973 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs); 1974 } 1975 } 1976 1977 b = (Mat_SeqBAIJ*)B->data; 1978 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1979 B->ops->solve = MatSolve_SeqBAIJ_Update; 1980 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 1981 if (!flg) { 1982 switch (bs) { 1983 case 1: 1984 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1985 B->ops->mult = MatMult_SeqBAIJ_1; 1986 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 1987 break; 1988 case 2: 1989 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 1990 B->ops->mult = MatMult_SeqBAIJ_2; 1991 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 1992 break; 1993 case 3: 1994 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 1995 B->ops->mult = MatMult_SeqBAIJ_3; 1996 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 1997 break; 1998 case 4: 1999 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2000 B->ops->mult = MatMult_SeqBAIJ_4; 2001 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2002 break; 2003 case 5: 2004 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2005 B->ops->mult = MatMult_SeqBAIJ_5; 2006 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2007 break; 2008 case 6: 2009 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 2010 B->ops->mult = MatMult_SeqBAIJ_6; 2011 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2012 break; 2013 case 7: 2014 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 2015 B->ops->mult = MatMult_SeqBAIJ_7; 2016 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2017 break; 2018 default: 2019 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 2020 B->ops->mult = MatMult_SeqBAIJ_N; 2021 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2022 break; 2023 } 2024 } 2025 b->bs = bs; 2026 b->mbs = mbs; 2027 b->nbs = nbs; 2028 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 2029 if (!nnz) { 2030 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2031 else if (nz <= 0) nz = 1; 2032 for (i=0; i<mbs; i++) b->imax[i] = nz; 2033 nz = nz*mbs; 2034 } else { 2035 nz = 0; 2036 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2037 } 2038 2039 /* allocate the matrix space */ 2040 len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 2041 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 2042 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2043 b->j = (int*)(b->a + nz*bs2); 2044 ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 2045 b->i = b->j + nz; 2046 b->singlemalloc = PETSC_TRUE; 2047 2048 b->i[0] = 0; 2049 for (i=1; i<mbs+1; i++) { 2050 b->i[i] = b->i[i-1] + b->imax[i-1]; 2051 } 2052 2053 /* b->ilen will count nonzeros in each block row so far. */ 2054 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 2055 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 2056 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2057 2058 b->bs = bs; 2059 b->bs2 = bs2; 2060 b->mbs = mbs; 2061 b->nz = 0; 2062 b->maxnz = nz*bs2; 2063 B->info.nz_unneeded = (PetscReal)b->maxnz; 2064 PetscFunctionReturn(0); 2065 } 2066 2067