1 /*$Id: baij.c,v 1.245 2001/08/07 03:02:55 balay Exp bsmith $*/ 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" 8 #include "src/vec/vecimpl.h" 9 #include "src/inline/spops.h" 10 #include "petscsys.h" /*I "petscmat.h" I*/ 11 12 /* 13 Special version for Fun3d sequential benchmark 14 */ 15 #if defined(PETSC_HAVE_FORTRAN_CAPS) 16 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 17 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 18 #define matsetvaluesblocked4_ matsetvaluesblocked4 19 #endif 20 21 #undef __FUNCT__ 22 #define __FUNCT__ "matsetvaluesblocked4_" 23 void matsetvaluesblocked4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v) 24 { 25 Mat A = *AA; 26 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 27 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 28 int *ai=a->i,*ailen=a->ilen; 29 int *aj=a->j,stepval; 30 PetscScalar *value = v; 31 MatScalar *ap,*aa = a->a,*bap; 32 33 PetscFunctionBegin; 34 stepval = (n-1)*4; 35 for (k=0; k<m; k++) { /* loop over added rows */ 36 row = im[k]; 37 rp = aj + ai[row]; 38 ap = aa + 16*ai[row]; 39 nrow = ailen[row]; 40 low = 0; 41 for (l=0; l<n; l++) { /* loop over added columns */ 42 col = in[l]; 43 value = v + k*(stepval+4)*4 + l*4; 44 low = 0; high = nrow; 45 while (high-low > 7) { 46 t = (low+high)/2; 47 if (rp[t] > col) high = t; 48 else low = t; 49 } 50 for (i=low; i<high; i++) { 51 if (rp[i] > col) break; 52 if (rp[i] == col) { 53 bap = ap + 16*i; 54 for (ii=0; ii<4; ii++,value+=stepval) { 55 for (jj=ii; jj<16; jj+=4) { 56 bap[jj] += *value++; 57 } 58 } 59 goto noinsert2; 60 } 61 } 62 N = nrow++ - 1; 63 /* shift up all the later entries in this row */ 64 for (ii=N; ii>=i; ii--) { 65 rp[ii+1] = rp[ii]; 66 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 67 } 68 if (N >= i) { 69 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 70 } 71 rp[i] = col; 72 bap = ap + 16*i; 73 for (ii=0; ii<4; ii++,value+=stepval) { 74 for (jj=ii; jj<16; jj+=4) { 75 bap[jj] = *value++; 76 } 77 } 78 noinsert2:; 79 low = i; 80 } 81 ailen[row] = nrow; 82 } 83 } 84 85 #if defined(PETSC_HAVE_FORTRAN_CAPS) 86 #define matsetvalues4_ MATSETVALUES4 87 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 88 #define matsetvalues4_ matsetvalues4 89 #endif 90 91 #undef __FUNCT__ 92 #define __FUNCT__ "MatSetValues4_" 93 void matsetvalues4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v) 94 { 95 Mat A = *AA; 96 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 97 int *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 98 int *ai=a->i,*ailen=a->ilen; 99 int *aj=a->j,brow,bcol; 100 int ridx,cidx; 101 MatScalar *ap,value,*aa=a->a,*bap; 102 103 PetscFunctionBegin; 104 for (k=0; k<m; k++) { /* loop over added rows */ 105 row = im[k]; brow = row/4; 106 rp = aj + ai[brow]; 107 ap = aa + 16*ai[brow]; 108 nrow = ailen[brow]; 109 low = 0; 110 for (l=0; l<n; l++) { /* loop over added columns */ 111 col = in[l]; bcol = col/4; 112 ridx = row % 4; cidx = col % 4; 113 value = v[l + k*n]; 114 low = 0; high = nrow; 115 while (high-low > 7) { 116 t = (low+high)/2; 117 if (rp[t] > bcol) high = t; 118 else low = t; 119 } 120 for (i=low; i<high; i++) { 121 if (rp[i] > bcol) break; 122 if (rp[i] == bcol) { 123 bap = ap + 16*i + 4*cidx + ridx; 124 *bap += value; 125 goto noinsert1; 126 } 127 } 128 N = nrow++ - 1; 129 /* shift up all the later entries in this row */ 130 for (ii=N; ii>=i; ii--) { 131 rp[ii+1] = rp[ii]; 132 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 133 } 134 if (N>=i) { 135 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 136 } 137 rp[i] = bcol; 138 ap[16*i + 4*cidx + ridx] = value; 139 noinsert1:; 140 low = i; 141 } 142 ailen[brow] = nrow; 143 } 144 } 145 146 /* UGLY, ugly, ugly 147 When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 148 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 149 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 150 converts the entries into single precision and then calls ..._MatScalar() to put them 151 into the single precision data structures. 152 */ 153 #if defined(PETSC_USE_MAT_SINGLE) 154 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 155 #else 156 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 157 #endif 158 #if defined(PETSC_HAVE_DSCPACK) 159 EXTERN int MatUseDSCPACK_MPIBAIJ(Mat); 160 #endif 161 162 #define CHUNKSIZE 10 163 164 /* 165 Checks for missing diagonals 166 */ 167 #undef __FUNCT__ 168 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 169 int MatMissingDiagonal_SeqBAIJ(Mat A) 170 { 171 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 172 int *diag,*jj = a->j,i,ierr; 173 174 PetscFunctionBegin; 175 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 176 diag = a->diag; 177 for (i=0; i<a->mbs; i++) { 178 if (jj[diag[i]] != i) { 179 SETERRQ1(1,"Matrix is missing diagonal number %d",i); 180 } 181 } 182 PetscFunctionReturn(0); 183 } 184 185 #undef __FUNCT__ 186 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 187 int MatMarkDiagonal_SeqBAIJ(Mat A) 188 { 189 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 190 int i,j,*diag,m = a->mbs,ierr; 191 192 PetscFunctionBegin; 193 if (a->diag) PetscFunctionReturn(0); 194 195 ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr); 196 PetscLogObjectMemory(A,(m+1)*sizeof(int)); 197 for (i=0; i<m; i++) { 198 diag[i] = a->i[i+1]; 199 for (j=a->i[i]; j<a->i[i+1]; j++) { 200 if (a->j[j] == i) { 201 diag[i] = j; 202 break; 203 } 204 } 205 } 206 a->diag = diag; 207 PetscFunctionReturn(0); 208 } 209 210 211 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 212 213 #undef __FUNCT__ 214 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 215 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 216 { 217 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 218 int ierr,n = a->mbs,i; 219 220 PetscFunctionBegin; 221 *nn = n; 222 if (!ia) PetscFunctionReturn(0); 223 if (symmetric) { 224 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 225 } else if (oshift == 1) { 226 /* temporarily add 1 to i and j indices */ 227 int nz = a->i[n]; 228 for (i=0; i<nz; i++) a->j[i]++; 229 for (i=0; i<n+1; i++) a->i[i]++; 230 *ia = a->i; *ja = a->j; 231 } else { 232 *ia = a->i; *ja = a->j; 233 } 234 235 PetscFunctionReturn(0); 236 } 237 238 #undef __FUNCT__ 239 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 240 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 241 { 242 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 243 int i,n = a->mbs,ierr; 244 245 PetscFunctionBegin; 246 if (!ia) PetscFunctionReturn(0); 247 if (symmetric) { 248 ierr = PetscFree(*ia);CHKERRQ(ierr); 249 ierr = PetscFree(*ja);CHKERRQ(ierr); 250 } else if (oshift == 1) { 251 int nz = a->i[n]-1; 252 for (i=0; i<nz; i++) a->j[i]--; 253 for (i=0; i<n+1; i++) a->i[i]--; 254 } 255 PetscFunctionReturn(0); 256 } 257 258 #undef __FUNCT__ 259 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ" 260 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs) 261 { 262 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 263 264 PetscFunctionBegin; 265 *bs = baij->bs; 266 PetscFunctionReturn(0); 267 } 268 269 270 #undef __FUNCT__ 271 #define __FUNCT__ "MatDestroy_SeqBAIJ" 272 int MatDestroy_SeqBAIJ(Mat A) 273 { 274 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 275 int ierr; 276 277 PetscFunctionBegin; 278 #if defined(PETSC_USE_LOG) 279 PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz); 280 #endif 281 ierr = PetscFree(a->a);CHKERRQ(ierr); 282 if (!a->singlemalloc) { 283 ierr = PetscFree(a->i);CHKERRQ(ierr); 284 ierr = PetscFree(a->j);CHKERRQ(ierr); 285 } 286 if (a->row) { 287 ierr = ISDestroy(a->row);CHKERRQ(ierr); 288 } 289 if (a->col) { 290 ierr = ISDestroy(a->col);CHKERRQ(ierr); 291 } 292 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 293 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 294 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 295 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 296 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 297 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 298 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 299 #if defined(PETSC_USE_MAT_SINGLE) 300 if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);} 301 #endif 302 ierr = PetscFree(a);CHKERRQ(ierr); 303 PetscFunctionReturn(0); 304 } 305 306 #undef __FUNCT__ 307 #define __FUNCT__ "MatSetOption_SeqBAIJ" 308 int MatSetOption_SeqBAIJ(Mat A,MatOption op) 309 { 310 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 311 312 PetscFunctionBegin; 313 switch (op) { 314 case MAT_ROW_ORIENTED: 315 a->roworiented = PETSC_TRUE; 316 break; 317 case MAT_COLUMN_ORIENTED: 318 a->roworiented = PETSC_FALSE; 319 break; 320 case MAT_COLUMNS_SORTED: 321 a->sorted = PETSC_TRUE; 322 break; 323 case MAT_COLUMNS_UNSORTED: 324 a->sorted = PETSC_FALSE; 325 break; 326 case MAT_KEEP_ZEROED_ROWS: 327 a->keepzeroedrows = PETSC_TRUE; 328 break; 329 case MAT_NO_NEW_NONZERO_LOCATIONS: 330 a->nonew = 1; 331 break; 332 case MAT_NEW_NONZERO_LOCATION_ERR: 333 a->nonew = -1; 334 break; 335 case MAT_NEW_NONZERO_ALLOCATION_ERR: 336 a->nonew = -2; 337 break; 338 case MAT_YES_NEW_NONZERO_LOCATIONS: 339 a->nonew = 0; 340 break; 341 case MAT_ROWS_SORTED: 342 case MAT_ROWS_UNSORTED: 343 case MAT_YES_NEW_DIAGONALS: 344 case MAT_IGNORE_OFF_PROC_ENTRIES: 345 case MAT_USE_HASH_TABLE: 346 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n"); 347 break; 348 case MAT_NO_NEW_DIAGONALS: 349 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 350 case MAT_USE_SINGLE_PRECISION_SOLVES: 351 if (a->bs==4) { 352 int ierr; 353 a->single_precision_solves = PETSC_TRUE; 354 A->ops->solve = MatSolve_SeqBAIJ_Update; 355 A->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 356 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES set\n"); 357 } else { 358 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n"); 359 } 360 break; 361 default: 362 SETERRQ(PETSC_ERR_SUP,"unknown option"); 363 } 364 PetscFunctionReturn(0); 365 } 366 367 #undef __FUNCT__ 368 #define __FUNCT__ "MatGetRow_SeqBAIJ" 369 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 370 { 371 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 372 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr; 373 MatScalar *aa,*aa_i; 374 PetscScalar *v_i; 375 376 PetscFunctionBegin; 377 bs = a->bs; 378 ai = a->i; 379 aj = a->j; 380 aa = a->a; 381 bs2 = a->bs2; 382 383 if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range"); 384 385 bn = row/bs; /* Block number */ 386 bp = row % bs; /* Block Position */ 387 M = ai[bn+1] - ai[bn]; 388 *nz = bs*M; 389 390 if (v) { 391 *v = 0; 392 if (*nz) { 393 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 394 for (i=0; i<M; i++) { /* for each block in the block row */ 395 v_i = *v + i*bs; 396 aa_i = aa + bs2*(ai[bn] + i); 397 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 398 } 399 } 400 } 401 402 if (idx) { 403 *idx = 0; 404 if (*nz) { 405 ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr); 406 for (i=0; i<M; i++) { /* for each block in the block row */ 407 idx_i = *idx + i*bs; 408 itmp = bs*aj[ai[bn] + i]; 409 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 410 } 411 } 412 } 413 PetscFunctionReturn(0); 414 } 415 416 #undef __FUNCT__ 417 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 418 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 419 { 420 int ierr; 421 422 PetscFunctionBegin; 423 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 424 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 425 PetscFunctionReturn(0); 426 } 427 428 #undef __FUNCT__ 429 #define __FUNCT__ "MatTranspose_SeqBAIJ" 430 int MatTranspose_SeqBAIJ(Mat A,Mat *B) 431 { 432 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 433 Mat C; 434 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 435 int *rows,*cols,bs2=a->bs2; 436 PetscScalar *array; 437 438 PetscFunctionBegin; 439 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 440 ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr); 441 ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr); 442 443 #if defined(PETSC_USE_MAT_SINGLE) 444 ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 445 for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 446 #else 447 array = a->a; 448 #endif 449 450 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 451 ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr); 452 ierr = PetscFree(col);CHKERRQ(ierr); 453 ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr); 454 cols = rows + bs; 455 for (i=0; i<mbs; i++) { 456 cols[0] = i*bs; 457 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 458 len = ai[i+1] - ai[i]; 459 for (j=0; j<len; j++) { 460 rows[0] = (*aj++)*bs; 461 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 462 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 463 array += bs2; 464 } 465 } 466 ierr = PetscFree(rows);CHKERRQ(ierr); 467 #if defined(PETSC_USE_MAT_SINGLE) 468 ierr = PetscFree(array); 469 #endif 470 471 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 472 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 473 474 if (B) { 475 *B = C; 476 } else { 477 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 478 } 479 PetscFunctionReturn(0); 480 } 481 482 #undef __FUNCT__ 483 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 484 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 485 { 486 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 487 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 488 PetscScalar *aa; 489 FILE *file; 490 491 PetscFunctionBegin; 492 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 493 ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 494 col_lens[0] = MAT_FILE_COOKIE; 495 496 col_lens[1] = A->m; 497 col_lens[2] = A->n; 498 col_lens[3] = a->nz*bs2; 499 500 /* store lengths of each row and write (including header) to file */ 501 count = 0; 502 for (i=0; i<a->mbs; i++) { 503 for (j=0; j<bs; j++) { 504 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 505 } 506 } 507 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 508 ierr = PetscFree(col_lens);CHKERRQ(ierr); 509 510 /* store column indices (zero start index) */ 511 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 512 count = 0; 513 for (i=0; i<a->mbs; i++) { 514 for (j=0; j<bs; j++) { 515 for (k=a->i[i]; k<a->i[i+1]; k++) { 516 for (l=0; l<bs; l++) { 517 jj[count++] = bs*a->j[k] + l; 518 } 519 } 520 } 521 } 522 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr); 523 ierr = PetscFree(jj);CHKERRQ(ierr); 524 525 /* store nonzero values */ 526 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 527 count = 0; 528 for (i=0; i<a->mbs; i++) { 529 for (j=0; j<bs; j++) { 530 for (k=a->i[i]; k<a->i[i+1]; k++) { 531 for (l=0; l<bs; l++) { 532 aa[count++] = a->a[bs2*k + l*bs + j]; 533 } 534 } 535 } 536 } 537 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr); 538 ierr = PetscFree(aa);CHKERRQ(ierr); 539 540 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 541 if (file) { 542 fprintf(file,"-matload_block_size %d\n",a->bs); 543 } 544 PetscFunctionReturn(0); 545 } 546 547 extern int MatMPIBAIJFactorInfo_DSCPACK(Mat,PetscViewer); 548 549 #undef __FUNCT__ 550 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 551 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 552 { 553 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 554 int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 555 PetscViewerFormat format; 556 557 PetscFunctionBegin; 558 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 559 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_LONG) { 560 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 561 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 562 Mat aij; 563 ierr = MatConvert(A,MATSEQAIJ,&aij);CHKERRQ(ierr); 564 ierr = MatView(aij,viewer);CHKERRQ(ierr); 565 ierr = MatDestroy(aij);CHKERRQ(ierr); 566 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 567 #if defined(PETSC_HAVE_DSCPACK) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX) 568 ierr = MatMPIBAIJFactorInfo_DSCPACK(A,viewer);CHKERRQ(ierr); 569 #endif 570 PetscFunctionReturn(0); 571 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 572 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 573 for (i=0; i<a->mbs; i++) { 574 for (j=0; j<bs; j++) { 575 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 576 for (k=a->i[i]; k<a->i[i+1]; k++) { 577 for (l=0; l<bs; l++) { 578 #if defined(PETSC_USE_COMPLEX) 579 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 580 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %gi) ",bs*a->j[k]+l, 581 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 582 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 583 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %gi) ",bs*a->j[k]+l, 584 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 585 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 586 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 587 } 588 #else 589 if (a->a[bs2*k + l*bs + j] != 0.0) { 590 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 591 } 592 #endif 593 } 594 } 595 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 596 } 597 } 598 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 599 } else { 600 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 601 for (i=0; i<a->mbs; i++) { 602 for (j=0; j<bs; j++) { 603 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 604 for (k=a->i[i]; k<a->i[i+1]; k++) { 605 for (l=0; l<bs; l++) { 606 #if defined(PETSC_USE_COMPLEX) 607 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 608 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 609 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 610 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 611 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 612 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 613 } else { 614 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 615 } 616 #else 617 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 618 #endif 619 } 620 } 621 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 622 } 623 } 624 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 625 } 626 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 627 PetscFunctionReturn(0); 628 } 629 630 #undef __FUNCT__ 631 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 632 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 633 { 634 Mat A = (Mat) Aa; 635 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 636 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2; 637 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 638 MatScalar *aa; 639 PetscViewer viewer; 640 641 PetscFunctionBegin; 642 643 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 644 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 645 646 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 647 648 /* loop over matrix elements drawing boxes */ 649 color = PETSC_DRAW_BLUE; 650 for (i=0,row=0; i<mbs; i++,row+=bs) { 651 for (j=a->i[i]; j<a->i[i+1]; j++) { 652 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 653 x_l = a->j[j]*bs; x_r = x_l + 1.0; 654 aa = a->a + j*bs2; 655 for (k=0; k<bs; k++) { 656 for (l=0; l<bs; l++) { 657 if (PetscRealPart(*aa++) >= 0.) continue; 658 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 659 } 660 } 661 } 662 } 663 color = PETSC_DRAW_CYAN; 664 for (i=0,row=0; i<mbs; i++,row+=bs) { 665 for (j=a->i[i]; j<a->i[i+1]; j++) { 666 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 667 x_l = a->j[j]*bs; x_r = x_l + 1.0; 668 aa = a->a + j*bs2; 669 for (k=0; k<bs; k++) { 670 for (l=0; l<bs; l++) { 671 if (PetscRealPart(*aa++) != 0.) continue; 672 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 673 } 674 } 675 } 676 } 677 678 color = PETSC_DRAW_RED; 679 for (i=0,row=0; i<mbs; i++,row+=bs) { 680 for (j=a->i[i]; j<a->i[i+1]; j++) { 681 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 682 x_l = a->j[j]*bs; x_r = x_l + 1.0; 683 aa = a->a + j*bs2; 684 for (k=0; k<bs; k++) { 685 for (l=0; l<bs; l++) { 686 if (PetscRealPart(*aa++) <= 0.) continue; 687 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 688 } 689 } 690 } 691 } 692 PetscFunctionReturn(0); 693 } 694 695 #undef __FUNCT__ 696 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 697 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 698 { 699 int ierr; 700 PetscReal xl,yl,xr,yr,w,h; 701 PetscDraw draw; 702 PetscTruth isnull; 703 704 PetscFunctionBegin; 705 706 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 707 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 708 709 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 710 xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0; 711 xr += w; yr += h; xl = -w; yl = -h; 712 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 713 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 714 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 715 PetscFunctionReturn(0); 716 } 717 718 #undef __FUNCT__ 719 #define __FUNCT__ "MatView_SeqBAIJ" 720 int MatView_SeqBAIJ(Mat A,PetscViewer viewer) 721 { 722 int ierr; 723 PetscTruth issocket,isascii,isbinary,isdraw; 724 725 PetscFunctionBegin; 726 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 727 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 728 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 729 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 730 if (issocket) { 731 SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported"); 732 } else if (isascii){ 733 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 734 } else if (isbinary) { 735 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 736 } else if (isdraw) { 737 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 738 } else { 739 SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name); 740 } 741 PetscFunctionReturn(0); 742 } 743 744 745 #undef __FUNCT__ 746 #define __FUNCT__ "MatGetValues_SeqBAIJ" 747 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v) 748 { 749 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 750 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 751 int *ai = a->i,*ailen = a->ilen; 752 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 753 MatScalar *ap,*aa = a->a,zero = 0.0; 754 755 PetscFunctionBegin; 756 for (k=0; k<m; k++) { /* loop over rows */ 757 row = im[k]; brow = row/bs; 758 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 759 if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 760 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 761 nrow = ailen[brow]; 762 for (l=0; l<n; l++) { /* loop over columns */ 763 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 764 if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 765 col = in[l] ; 766 bcol = col/bs; 767 cidx = col%bs; 768 ridx = row%bs; 769 high = nrow; 770 low = 0; /* assume unsorted */ 771 while (high-low > 5) { 772 t = (low+high)/2; 773 if (rp[t] > bcol) high = t; 774 else low = t; 775 } 776 for (i=low; i<high; i++) { 777 if (rp[i] > bcol) break; 778 if (rp[i] == bcol) { 779 *v++ = ap[bs2*i+bs*cidx+ridx]; 780 goto finished; 781 } 782 } 783 *v++ = zero; 784 finished:; 785 } 786 } 787 PetscFunctionReturn(0); 788 } 789 790 #if defined(PETSC_USE_MAT_SINGLE) 791 #undef __FUNCT__ 792 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 793 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 794 { 795 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 796 int ierr,i,N = m*n*b->bs2; 797 MatScalar *vsingle; 798 799 PetscFunctionBegin; 800 if (N > b->setvalueslen) { 801 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 802 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 803 b->setvalueslen = N; 804 } 805 vsingle = b->setvaluescopy; 806 for (i=0; i<N; i++) { 807 vsingle[i] = v[i]; 808 } 809 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 810 PetscFunctionReturn(0); 811 } 812 #endif 813 814 815 #undef __FUNCT__ 816 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 817 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,int *im,int n,int *in,MatScalar *v,InsertMode is) 818 { 819 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 820 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 821 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 822 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 823 PetscTruth roworiented=a->roworiented; 824 MatScalar *value = v,*ap,*aa = a->a,*bap; 825 826 PetscFunctionBegin; 827 if (roworiented) { 828 stepval = (n-1)*bs; 829 } else { 830 stepval = (m-1)*bs; 831 } 832 for (k=0; k<m; k++) { /* loop over added rows */ 833 row = im[k]; 834 if (row < 0) continue; 835 #if defined(PETSC_USE_BOPT_g) 836 if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 837 #endif 838 rp = aj + ai[row]; 839 ap = aa + bs2*ai[row]; 840 rmax = imax[row]; 841 nrow = ailen[row]; 842 low = 0; 843 for (l=0; l<n; l++) { /* loop over added columns */ 844 if (in[l] < 0) continue; 845 #if defined(PETSC_USE_BOPT_g) 846 if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 847 #endif 848 col = in[l]; 849 if (roworiented) { 850 value = v + k*(stepval+bs)*bs + l*bs; 851 } else { 852 value = v + l*(stepval+bs)*bs + k*bs; 853 } 854 if (!sorted) low = 0; high = nrow; 855 while (high-low > 7) { 856 t = (low+high)/2; 857 if (rp[t] > col) high = t; 858 else low = t; 859 } 860 for (i=low; i<high; i++) { 861 if (rp[i] > col) break; 862 if (rp[i] == col) { 863 bap = ap + bs2*i; 864 if (roworiented) { 865 if (is == ADD_VALUES) { 866 for (ii=0; ii<bs; ii++,value+=stepval) { 867 for (jj=ii; jj<bs2; jj+=bs) { 868 bap[jj] += *value++; 869 } 870 } 871 } else { 872 for (ii=0; ii<bs; ii++,value+=stepval) { 873 for (jj=ii; jj<bs2; jj+=bs) { 874 bap[jj] = *value++; 875 } 876 } 877 } 878 } else { 879 if (is == ADD_VALUES) { 880 for (ii=0; ii<bs; ii++,value+=stepval) { 881 for (jj=0; jj<bs; jj++) { 882 *bap++ += *value++; 883 } 884 } 885 } else { 886 for (ii=0; ii<bs; ii++,value+=stepval) { 887 for (jj=0; jj<bs; jj++) { 888 *bap++ = *value++; 889 } 890 } 891 } 892 } 893 goto noinsert2; 894 } 895 } 896 if (nonew == 1) goto noinsert2; 897 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 898 if (nrow >= rmax) { 899 /* there is no extra room in row, therefore enlarge */ 900 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 901 MatScalar *new_a; 902 903 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 904 905 /* malloc new storage space */ 906 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 907 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 908 new_j = (int*)(new_a + bs2*new_nz); 909 new_i = new_j + new_nz; 910 911 /* copy over old data into new slots */ 912 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 913 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 914 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 915 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 916 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 917 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 918 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 919 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 920 /* free up old matrix storage */ 921 ierr = PetscFree(a->a);CHKERRQ(ierr); 922 if (!a->singlemalloc) { 923 ierr = PetscFree(a->i);CHKERRQ(ierr); 924 ierr = PetscFree(a->j);CHKERRQ(ierr); 925 } 926 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 927 a->singlemalloc = PETSC_TRUE; 928 929 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 930 rmax = imax[row] = imax[row] + CHUNKSIZE; 931 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 932 a->maxnz += bs2*CHUNKSIZE; 933 a->reallocs++; 934 a->nz++; 935 } 936 N = nrow++ - 1; 937 /* shift up all the later entries in this row */ 938 for (ii=N; ii>=i; ii--) { 939 rp[ii+1] = rp[ii]; 940 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 941 } 942 if (N >= i) { 943 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 944 } 945 rp[i] = col; 946 bap = ap + bs2*i; 947 if (roworiented) { 948 for (ii=0; ii<bs; ii++,value+=stepval) { 949 for (jj=ii; jj<bs2; jj+=bs) { 950 bap[jj] = *value++; 951 } 952 } 953 } else { 954 for (ii=0; ii<bs; ii++,value+=stepval) { 955 for (jj=0; jj<bs; jj++) { 956 *bap++ = *value++; 957 } 958 } 959 } 960 noinsert2:; 961 low = i; 962 } 963 ailen[row] = nrow; 964 } 965 PetscFunctionReturn(0); 966 } 967 968 #undef __FUNCT__ 969 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 970 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 971 { 972 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 973 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 974 int m = A->m,*ip,N,*ailen = a->ilen; 975 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 976 MatScalar *aa = a->a,*ap; 977 #if defined(PETSC_HAVE_DSCPACK) 978 PetscTruth flag; 979 #endif 980 981 PetscFunctionBegin; 982 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 983 984 if (m) rmax = ailen[0]; 985 for (i=1; i<mbs; i++) { 986 /* move each row back by the amount of empty slots (fshift) before it*/ 987 fshift += imax[i-1] - ailen[i-1]; 988 rmax = PetscMax(rmax,ailen[i]); 989 if (fshift) { 990 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 991 N = ailen[i]; 992 for (j=0; j<N; j++) { 993 ip[j-fshift] = ip[j]; 994 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 995 } 996 } 997 ai[i] = ai[i-1] + ailen[i-1]; 998 } 999 if (mbs) { 1000 fshift += imax[mbs-1] - ailen[mbs-1]; 1001 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1002 } 1003 /* reset ilen and imax for each row */ 1004 for (i=0; i<mbs; i++) { 1005 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1006 } 1007 a->nz = ai[mbs]; 1008 1009 /* diagonals may have moved, so kill the diagonal pointers */ 1010 if (fshift && a->diag) { 1011 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1012 PetscLogObjectMemory(A,-(mbs+1)*sizeof(int)); 1013 a->diag = 0; 1014 } 1015 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2); 1016 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",a->reallocs); 1017 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 1018 a->reallocs = 0; 1019 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1020 1021 #if defined(PETSC_HAVE_DSCPACK) 1022 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_baij_dscpack",&flag);CHKERRQ(ierr); 1023 if (flag) { ierr = MatUseDSCPACK_MPIBAIJ(A);CHKERRQ(ierr); } 1024 #endif 1025 1026 PetscFunctionReturn(0); 1027 } 1028 1029 1030 1031 /* 1032 This function returns an array of flags which indicate the locations of contiguous 1033 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1034 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1035 Assume: sizes should be long enough to hold all the values. 1036 */ 1037 #undef __FUNCT__ 1038 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1039 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 1040 { 1041 int i,j,k,row; 1042 PetscTruth flg; 1043 1044 PetscFunctionBegin; 1045 for (i=0,j=0; i<n; j++) { 1046 row = idx[i]; 1047 if (row%bs!=0) { /* Not the begining of a block */ 1048 sizes[j] = 1; 1049 i++; 1050 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1051 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1052 i++; 1053 } else { /* Begining of the block, so check if the complete block exists */ 1054 flg = PETSC_TRUE; 1055 for (k=1; k<bs; k++) { 1056 if (row+k != idx[i+k]) { /* break in the block */ 1057 flg = PETSC_FALSE; 1058 break; 1059 } 1060 } 1061 if (flg == PETSC_TRUE) { /* No break in the bs */ 1062 sizes[j] = bs; 1063 i+= bs; 1064 } else { 1065 sizes[j] = 1; 1066 i++; 1067 } 1068 } 1069 } 1070 *bs_max = j; 1071 PetscFunctionReturn(0); 1072 } 1073 1074 #undef __FUNCT__ 1075 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1076 int MatZeroRows_SeqBAIJ(Mat A,IS is,PetscScalar *diag) 1077 { 1078 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1079 int ierr,i,j,k,count,is_n,*is_idx,*rows; 1080 int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max; 1081 PetscScalar zero = 0.0; 1082 MatScalar *aa; 1083 1084 PetscFunctionBegin; 1085 /* Make a copy of the IS and sort it */ 1086 ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr); 1087 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1088 1089 /* allocate memory for rows,sizes */ 1090 ierr = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr); 1091 sizes = rows + is_n; 1092 1093 /* copy IS values to rows, and sort them */ 1094 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1095 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1096 if (baij->keepzeroedrows) { 1097 for (i=0; i<is_n; i++) { sizes[i] = 1; } 1098 bs_max = is_n; 1099 } else { 1100 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 1101 } 1102 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 1103 1104 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1105 row = rows[j]; 1106 if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row); 1107 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1108 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 1109 if (sizes[i] == bs && !baij->keepzeroedrows) { 1110 if (diag) { 1111 if (baij->ilen[row/bs] > 0) { 1112 baij->ilen[row/bs] = 1; 1113 baij->j[baij->i[row/bs]] = row/bs; 1114 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 1115 } 1116 /* Now insert all the diagonal values for this bs */ 1117 for (k=0; k<bs; k++) { 1118 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 1119 } 1120 } else { /* (!diag) */ 1121 baij->ilen[row/bs] = 0; 1122 } /* end (!diag) */ 1123 } else { /* (sizes[i] != bs) */ 1124 #if defined (PETSC_USE_DEBUG) 1125 if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1"); 1126 #endif 1127 for (k=0; k<count; k++) { 1128 aa[0] = zero; 1129 aa += bs; 1130 } 1131 if (diag) { 1132 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1133 } 1134 } 1135 } 1136 1137 ierr = PetscFree(rows);CHKERRQ(ierr); 1138 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1139 PetscFunctionReturn(0); 1140 } 1141 1142 #undef __FUNCT__ 1143 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1144 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is) 1145 { 1146 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1147 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 1148 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1149 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 1150 int ridx,cidx,bs2=a->bs2,ierr; 1151 PetscTruth roworiented=a->roworiented; 1152 MatScalar *ap,value,*aa=a->a,*bap; 1153 1154 PetscFunctionBegin; 1155 for (k=0; k<m; k++) { /* loop over added rows */ 1156 row = im[k]; brow = row/bs; 1157 if (row < 0) continue; 1158 #if defined(PETSC_USE_BOPT_g) 1159 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m); 1160 #endif 1161 rp = aj + ai[brow]; 1162 ap = aa + bs2*ai[brow]; 1163 rmax = imax[brow]; 1164 nrow = ailen[brow]; 1165 low = 0; 1166 for (l=0; l<n; l++) { /* loop over added columns */ 1167 if (in[l] < 0) continue; 1168 #if defined(PETSC_USE_BOPT_g) 1169 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n); 1170 #endif 1171 col = in[l]; bcol = col/bs; 1172 ridx = row % bs; cidx = col % bs; 1173 if (roworiented) { 1174 value = v[l + k*n]; 1175 } else { 1176 value = v[k + l*m]; 1177 } 1178 if (!sorted) low = 0; high = nrow; 1179 while (high-low > 7) { 1180 t = (low+high)/2; 1181 if (rp[t] > bcol) high = t; 1182 else low = t; 1183 } 1184 for (i=low; i<high; i++) { 1185 if (rp[i] > bcol) break; 1186 if (rp[i] == bcol) { 1187 bap = ap + bs2*i + bs*cidx + ridx; 1188 if (is == ADD_VALUES) *bap += value; 1189 else *bap = value; 1190 goto noinsert1; 1191 } 1192 } 1193 if (nonew == 1) goto noinsert1; 1194 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 1195 if (nrow >= rmax) { 1196 /* there is no extra room in row, therefore enlarge */ 1197 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1198 MatScalar *new_a; 1199 1200 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 1201 1202 /* Malloc new storage space */ 1203 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1204 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1205 new_j = (int*)(new_a + bs2*new_nz); 1206 new_i = new_j + new_nz; 1207 1208 /* copy over old data into new slots */ 1209 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 1210 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1211 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 1212 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1213 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 1214 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1215 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1216 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1217 /* free up old matrix storage */ 1218 ierr = PetscFree(a->a);CHKERRQ(ierr); 1219 if (!a->singlemalloc) { 1220 ierr = PetscFree(a->i);CHKERRQ(ierr); 1221 ierr = PetscFree(a->j);CHKERRQ(ierr); 1222 } 1223 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1224 a->singlemalloc = PETSC_TRUE; 1225 1226 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1227 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1228 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 1229 a->maxnz += bs2*CHUNKSIZE; 1230 a->reallocs++; 1231 a->nz++; 1232 } 1233 N = nrow++ - 1; 1234 /* shift up all the later entries in this row */ 1235 for (ii=N; ii>=i; ii--) { 1236 rp[ii+1] = rp[ii]; 1237 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1238 } 1239 if (N>=i) { 1240 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1241 } 1242 rp[i] = bcol; 1243 ap[bs2*i + bs*cidx + ridx] = value; 1244 noinsert1:; 1245 low = i; 1246 } 1247 ailen[brow] = nrow; 1248 } 1249 PetscFunctionReturn(0); 1250 } 1251 1252 1253 #undef __FUNCT__ 1254 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1255 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info) 1256 { 1257 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1258 Mat outA; 1259 int ierr; 1260 PetscTruth row_identity,col_identity; 1261 1262 PetscFunctionBegin; 1263 if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1264 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1265 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1266 if (!row_identity || !col_identity) { 1267 SETERRQ(1,"Row and column permutations must be identity for in-place ILU"); 1268 } 1269 1270 outA = inA; 1271 inA->factor = FACTOR_LU; 1272 1273 if (!a->diag) { 1274 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1275 } 1276 1277 a->row = row; 1278 a->col = col; 1279 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1280 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1281 1282 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1283 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1284 PetscLogObjectParent(inA,a->icol); 1285 1286 /* 1287 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1288 for ILU(0) factorization with natural ordering 1289 */ 1290 if (a->bs < 8) { 1291 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1292 } else { 1293 if (!a->solve_work) { 1294 ierr = PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1295 PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar)); 1296 } 1297 } 1298 1299 ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr); 1300 1301 PetscFunctionReturn(0); 1302 } 1303 #undef __FUNCT__ 1304 #define __FUNCT__ "MatPrintHelp_SeqBAIJ" 1305 int MatPrintHelp_SeqBAIJ(Mat A) 1306 { 1307 static PetscTruth called = PETSC_FALSE; 1308 MPI_Comm comm = A->comm; 1309 int ierr; 1310 1311 PetscFunctionBegin; 1312 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1313 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1314 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1315 PetscFunctionReturn(0); 1316 } 1317 1318 EXTERN_C_BEGIN 1319 #undef __FUNCT__ 1320 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1321 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices) 1322 { 1323 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1324 int i,nz,nbs; 1325 1326 PetscFunctionBegin; 1327 nz = baij->maxnz/baij->bs2; 1328 nbs = baij->nbs; 1329 for (i=0; i<nz; i++) { 1330 baij->j[i] = indices[i]; 1331 } 1332 baij->nz = nz; 1333 for (i=0; i<nbs; i++) { 1334 baij->ilen[i] = baij->imax[i]; 1335 } 1336 1337 PetscFunctionReturn(0); 1338 } 1339 EXTERN_C_END 1340 1341 #undef __FUNCT__ 1342 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1343 /*@ 1344 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1345 in the matrix. 1346 1347 Input Parameters: 1348 + mat - the SeqBAIJ matrix 1349 - indices - the column indices 1350 1351 Level: advanced 1352 1353 Notes: 1354 This can be called if you have precomputed the nonzero structure of the 1355 matrix and want to provide it to the matrix object to improve the performance 1356 of the MatSetValues() operation. 1357 1358 You MUST have set the correct numbers of nonzeros per row in the call to 1359 MatCreateSeqBAIJ(). 1360 1361 MUST be called before any calls to MatSetValues(); 1362 1363 @*/ 1364 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices) 1365 { 1366 int ierr,(*f)(Mat,int *); 1367 1368 PetscFunctionBegin; 1369 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1370 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1371 if (f) { 1372 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1373 } else { 1374 SETERRQ(1,"Wrong type of matrix to set column indices"); 1375 } 1376 PetscFunctionReturn(0); 1377 } 1378 1379 #undef __FUNCT__ 1380 #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1381 int MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1382 { 1383 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1384 int ierr,i,j,n,row,bs,*ai,*aj,mbs; 1385 PetscReal atmp; 1386 PetscScalar *x,zero = 0.0; 1387 MatScalar *aa; 1388 int ncols,brow,krow,kcol; 1389 1390 PetscFunctionBegin; 1391 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1392 bs = a->bs; 1393 aa = a->a; 1394 ai = a->i; 1395 aj = a->j; 1396 mbs = a->mbs; 1397 1398 ierr = VecSet(&zero,v);CHKERRQ(ierr); 1399 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1400 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1401 if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1402 for (i=0; i<mbs; i++) { 1403 ncols = ai[1] - ai[0]; ai++; 1404 brow = bs*i; 1405 for (j=0; j<ncols; j++){ 1406 /* bcol = bs*(*aj); */ 1407 for (kcol=0; kcol<bs; kcol++){ 1408 for (krow=0; krow<bs; krow++){ 1409 atmp = PetscAbsScalar(*aa); aa++; 1410 row = brow + krow; /* row index */ 1411 /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */ 1412 if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1413 } 1414 } 1415 aj++; 1416 } 1417 } 1418 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1419 PetscFunctionReturn(0); 1420 } 1421 1422 #undef __FUNCT__ 1423 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1424 int MatSetUpPreallocation_SeqBAIJ(Mat A) 1425 { 1426 int ierr; 1427 1428 PetscFunctionBegin; 1429 ierr = MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1430 PetscFunctionReturn(0); 1431 } 1432 1433 #undef __FUNCT__ 1434 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1435 int MatGetArray_SeqBAIJ(Mat A,PetscScalar **array) 1436 { 1437 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1438 PetscFunctionBegin; 1439 *array = a->a; 1440 PetscFunctionReturn(0); 1441 } 1442 1443 #undef __FUNCT__ 1444 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1445 int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar **array) 1446 { 1447 PetscFunctionBegin; 1448 PetscFunctionReturn(0); 1449 } 1450 1451 #include "petscblaslapack.h" 1452 #undef __FUNCT__ 1453 #define __FUNCT__ "MatAXPY_SeqBAIJ" 1454 int MatAXPY_SeqBAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str) 1455 { 1456 int ierr,one=1; 1457 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 1458 1459 PetscFunctionBegin; 1460 if (str == SAME_NONZERO_PATTERN) { 1461 BLaxpy_(&x->nz,a,x->a,&one,y->a,&one); 1462 } else { 1463 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1464 } 1465 PetscFunctionReturn(0); 1466 } 1467 1468 /* -------------------------------------------------------------------*/ 1469 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 1470 MatGetRow_SeqBAIJ, 1471 MatRestoreRow_SeqBAIJ, 1472 MatMult_SeqBAIJ_N, 1473 MatMultAdd_SeqBAIJ_N, 1474 MatMultTranspose_SeqBAIJ, 1475 MatMultTransposeAdd_SeqBAIJ, 1476 MatSolve_SeqBAIJ_N, 1477 0, 1478 0, 1479 0, 1480 MatLUFactor_SeqBAIJ, 1481 0, 1482 0, 1483 MatTranspose_SeqBAIJ, 1484 MatGetInfo_SeqBAIJ, 1485 MatEqual_SeqBAIJ, 1486 MatGetDiagonal_SeqBAIJ, 1487 MatDiagonalScale_SeqBAIJ, 1488 MatNorm_SeqBAIJ, 1489 0, 1490 MatAssemblyEnd_SeqBAIJ, 1491 0, 1492 MatSetOption_SeqBAIJ, 1493 MatZeroEntries_SeqBAIJ, 1494 MatZeroRows_SeqBAIJ, 1495 MatLUFactorSymbolic_SeqBAIJ, 1496 MatLUFactorNumeric_SeqBAIJ_N, 1497 0, 1498 0, 1499 MatSetUpPreallocation_SeqBAIJ, 1500 MatILUFactorSymbolic_SeqBAIJ, 1501 0, 1502 MatGetArray_SeqBAIJ, 1503 MatRestoreArray_SeqBAIJ, 1504 MatDuplicate_SeqBAIJ, 1505 0, 1506 0, 1507 MatILUFactor_SeqBAIJ, 1508 0, 1509 MatAXPY_SeqBAIJ, 1510 MatGetSubMatrices_SeqBAIJ, 1511 MatIncreaseOverlap_SeqBAIJ, 1512 MatGetValues_SeqBAIJ, 1513 0, 1514 MatPrintHelp_SeqBAIJ, 1515 MatScale_SeqBAIJ, 1516 0, 1517 0, 1518 0, 1519 MatGetBlockSize_SeqBAIJ, 1520 MatGetRowIJ_SeqBAIJ, 1521 MatRestoreRowIJ_SeqBAIJ, 1522 0, 1523 0, 1524 0, 1525 0, 1526 0, 1527 0, 1528 MatSetValuesBlocked_SeqBAIJ, 1529 MatGetSubMatrix_SeqBAIJ, 1530 MatDestroy_SeqBAIJ, 1531 MatView_SeqBAIJ, 1532 MatGetPetscMaps_Petsc, 1533 0, 1534 0, 1535 0, 1536 0, 1537 0, 1538 0, 1539 MatGetRowMax_SeqBAIJ, 1540 MatConvert_Basic}; 1541 1542 EXTERN_C_BEGIN 1543 #undef __FUNCT__ 1544 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 1545 int MatStoreValues_SeqBAIJ(Mat mat) 1546 { 1547 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1548 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1549 int ierr; 1550 1551 PetscFunctionBegin; 1552 if (aij->nonew != 1) { 1553 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1554 } 1555 1556 /* allocate space for values if not already there */ 1557 if (!aij->saved_values) { 1558 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1559 } 1560 1561 /* copy values over */ 1562 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1563 PetscFunctionReturn(0); 1564 } 1565 EXTERN_C_END 1566 1567 EXTERN_C_BEGIN 1568 #undef __FUNCT__ 1569 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 1570 int MatRetrieveValues_SeqBAIJ(Mat mat) 1571 { 1572 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1573 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 1574 1575 PetscFunctionBegin; 1576 if (aij->nonew != 1) { 1577 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1578 } 1579 if (!aij->saved_values) { 1580 SETERRQ(1,"Must call MatStoreValues(A);first"); 1581 } 1582 1583 /* copy values over */ 1584 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1585 PetscFunctionReturn(0); 1586 } 1587 EXTERN_C_END 1588 1589 EXTERN_C_BEGIN 1590 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*); 1591 EXTERN_C_END 1592 1593 EXTERN_C_BEGIN 1594 #undef __FUNCT__ 1595 #define __FUNCT__ "MatCreate_SeqBAIJ" 1596 int MatCreate_SeqBAIJ(Mat B) 1597 { 1598 int ierr,size; 1599 Mat_SeqBAIJ *b; 1600 1601 PetscFunctionBegin; 1602 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1603 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1604 1605 B->m = B->M = PetscMax(B->m,B->M); 1606 B->n = B->N = PetscMax(B->n,B->N); 1607 ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 1608 B->data = (void*)b; 1609 ierr = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 1610 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1611 B->factor = 0; 1612 B->lupivotthreshold = 1.0; 1613 B->mapping = 0; 1614 b->row = 0; 1615 b->col = 0; 1616 b->icol = 0; 1617 b->reallocs = 0; 1618 b->saved_values = 0; 1619 #if defined(PETSC_USE_MAT_SINGLE) 1620 b->setvalueslen = 0; 1621 b->setvaluescopy = PETSC_NULL; 1622 #endif 1623 b->single_precision_solves = PETSC_FALSE; 1624 1625 ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr); 1626 ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr); 1627 1628 b->sorted = PETSC_FALSE; 1629 b->roworiented = PETSC_TRUE; 1630 b->nonew = 0; 1631 b->diag = 0; 1632 b->solve_work = 0; 1633 b->mult_work = 0; 1634 B->spptr = 0; 1635 B->info.nz_unneeded = (PetscReal)b->maxnz; 1636 b->keepzeroedrows = PETSC_FALSE; 1637 1638 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1639 "MatStoreValues_SeqBAIJ", 1640 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 1641 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1642 "MatRetrieveValues_SeqBAIJ", 1643 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 1644 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 1645 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 1646 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 1647 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_SeqBAIJ_SeqAIJ_C", 1648 "MatConvert_SeqBAIJ_SeqAIJ", 1649 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 1650 PetscFunctionReturn(0); 1651 } 1652 EXTERN_C_END 1653 1654 #undef __FUNCT__ 1655 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 1656 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1657 { 1658 Mat C; 1659 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 1660 int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr; 1661 1662 PetscFunctionBegin; 1663 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1664 1665 *B = 0; 1666 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1667 ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr); 1668 c = (Mat_SeqBAIJ*)C->data; 1669 1670 c->bs = a->bs; 1671 c->bs2 = a->bs2; 1672 c->mbs = a->mbs; 1673 c->nbs = a->nbs; 1674 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1675 1676 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1677 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1678 for (i=0; i<mbs; i++) { 1679 c->imax[i] = a->imax[i]; 1680 c->ilen[i] = a->ilen[i]; 1681 } 1682 1683 /* allocate the matrix space */ 1684 c->singlemalloc = PETSC_TRUE; 1685 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1686 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1687 c->j = (int*)(c->a + nz*bs2); 1688 c->i = c->j + nz; 1689 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1690 if (mbs > 0) { 1691 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1692 if (cpvalues == MAT_COPY_VALUES) { 1693 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1694 } else { 1695 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1696 } 1697 } 1698 1699 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1700 c->sorted = a->sorted; 1701 c->roworiented = a->roworiented; 1702 c->nonew = a->nonew; 1703 1704 if (a->diag) { 1705 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1706 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1707 for (i=0; i<mbs; i++) { 1708 c->diag[i] = a->diag[i]; 1709 } 1710 } else c->diag = 0; 1711 c->nz = a->nz; 1712 c->maxnz = a->maxnz; 1713 c->solve_work = 0; 1714 C->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1715 c->mult_work = 0; 1716 C->preallocated = PETSC_TRUE; 1717 C->assembled = PETSC_TRUE; 1718 *B = C; 1719 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1720 PetscFunctionReturn(0); 1721 } 1722 1723 EXTERN_C_BEGIN 1724 #undef __FUNCT__ 1725 #define __FUNCT__ "MatLoad_SeqBAIJ" 1726 int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A) 1727 { 1728 Mat_SeqBAIJ *a; 1729 Mat B; 1730 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1731 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1732 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1733 int *masked,nmask,tmp,bs2,ishift; 1734 PetscScalar *aa; 1735 MPI_Comm comm = ((PetscObject)viewer)->comm; 1736 1737 PetscFunctionBegin; 1738 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1739 bs2 = bs*bs; 1740 1741 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1742 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1743 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1744 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1745 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1746 M = header[1]; N = header[2]; nz = header[3]; 1747 1748 if (header[3] < 0) { 1749 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 1750 } 1751 1752 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1753 1754 /* 1755 This code adds extra rows to make sure the number of rows is 1756 divisible by the blocksize 1757 */ 1758 mbs = M/bs; 1759 extra_rows = bs - M + bs*(mbs); 1760 if (extra_rows == bs) extra_rows = 0; 1761 else mbs++; 1762 if (extra_rows) { 1763 PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 1764 } 1765 1766 /* read in row lengths */ 1767 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1768 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1769 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1770 1771 /* read in column indices */ 1772 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1773 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1774 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1775 1776 /* loop over row lengths determining block row lengths */ 1777 ierr = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr); 1778 ierr = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1779 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1780 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1781 masked = mask + mbs; 1782 rowcount = 0; nzcount = 0; 1783 for (i=0; i<mbs; i++) { 1784 nmask = 0; 1785 for (j=0; j<bs; j++) { 1786 kmax = rowlengths[rowcount]; 1787 for (k=0; k<kmax; k++) { 1788 tmp = jj[nzcount++]/bs; 1789 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1790 } 1791 rowcount++; 1792 } 1793 browlengths[i] += nmask; 1794 /* zero out the mask elements we set */ 1795 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1796 } 1797 1798 /* create our matrix */ 1799 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr); 1800 B = *A; 1801 a = (Mat_SeqBAIJ*)B->data; 1802 1803 /* set matrix "i" values */ 1804 a->i[0] = 0; 1805 for (i=1; i<= mbs; i++) { 1806 a->i[i] = a->i[i-1] + browlengths[i-1]; 1807 a->ilen[i-1] = browlengths[i-1]; 1808 } 1809 a->nz = 0; 1810 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 1811 1812 /* read in nonzero values */ 1813 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1814 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1815 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1816 1817 /* set "a" and "j" values into matrix */ 1818 nzcount = 0; jcount = 0; 1819 for (i=0; i<mbs; i++) { 1820 nzcountb = nzcount; 1821 nmask = 0; 1822 for (j=0; j<bs; j++) { 1823 kmax = rowlengths[i*bs+j]; 1824 for (k=0; k<kmax; k++) { 1825 tmp = jj[nzcount++]/bs; 1826 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1827 } 1828 } 1829 /* sort the masked values */ 1830 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1831 1832 /* set "j" values into matrix */ 1833 maskcount = 1; 1834 for (j=0; j<nmask; j++) { 1835 a->j[jcount++] = masked[j]; 1836 mask[masked[j]] = maskcount++; 1837 } 1838 /* set "a" values into matrix */ 1839 ishift = bs2*a->i[i]; 1840 for (j=0; j<bs; j++) { 1841 kmax = rowlengths[i*bs+j]; 1842 for (k=0; k<kmax; k++) { 1843 tmp = jj[nzcountb]/bs ; 1844 block = mask[tmp] - 1; 1845 point = jj[nzcountb] - bs*tmp; 1846 idx = ishift + bs2*block + j + bs*point; 1847 a->a[idx] = (MatScalar)aa[nzcountb++]; 1848 } 1849 } 1850 /* zero out the mask elements we set */ 1851 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1852 } 1853 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1854 1855 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1856 ierr = PetscFree(browlengths);CHKERRQ(ierr); 1857 ierr = PetscFree(aa);CHKERRQ(ierr); 1858 ierr = PetscFree(jj);CHKERRQ(ierr); 1859 ierr = PetscFree(mask);CHKERRQ(ierr); 1860 1861 B->assembled = PETSC_TRUE; 1862 1863 ierr = MatView_Private(B);CHKERRQ(ierr); 1864 PetscFunctionReturn(0); 1865 } 1866 EXTERN_C_END 1867 1868 #undef __FUNCT__ 1869 #define __FUNCT__ "MatCreateSeqBAIJ" 1870 /*@C 1871 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 1872 compressed row) format. For good matrix assembly performance the 1873 user should preallocate the matrix storage by setting the parameter nz 1874 (or the array nnz). By setting these parameters accurately, performance 1875 during matrix assembly can be increased by more than a factor of 50. 1876 1877 Collective on MPI_Comm 1878 1879 Input Parameters: 1880 + comm - MPI communicator, set to PETSC_COMM_SELF 1881 . bs - size of block 1882 . m - number of rows 1883 . n - number of columns 1884 . nz - number of nonzero blocks per block row (same for all rows) 1885 - nnz - array containing the number of nonzero blocks in the various block rows 1886 (possibly different for each block row) or PETSC_NULL 1887 1888 Output Parameter: 1889 . A - the matrix 1890 1891 Options Database Keys: 1892 . -mat_no_unroll - uses code that does not unroll the loops in the 1893 block calculations (much slower) 1894 . -mat_block_size - size of the blocks to use 1895 1896 Level: intermediate 1897 1898 Notes: 1899 A nonzero block is any block that as 1 or more nonzeros in it 1900 1901 The block AIJ format is fully compatible with standard Fortran 77 1902 storage. That is, the stored row and column indices can begin at 1903 either one (as in Fortran) or zero. See the users' manual for details. 1904 1905 Specify the preallocated storage with either nz or nnz (not both). 1906 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1907 allocation. For additional details, see the users manual chapter on 1908 matrices. 1909 1910 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1911 @*/ 1912 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A) 1913 { 1914 int ierr; 1915 1916 PetscFunctionBegin; 1917 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1918 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 1919 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1920 PetscFunctionReturn(0); 1921 } 1922 1923 #undef __FUNCT__ 1924 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 1925 /*@C 1926 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 1927 per row in the matrix. For good matrix assembly performance the 1928 user should preallocate the matrix storage by setting the parameter nz 1929 (or the array nnz). By setting these parameters accurately, performance 1930 during matrix assembly can be increased by more than a factor of 50. 1931 1932 Collective on MPI_Comm 1933 1934 Input Parameters: 1935 + A - the matrix 1936 . bs - size of block 1937 . nz - number of block nonzeros per block row (same for all rows) 1938 - nnz - array containing the number of block nonzeros in the various block rows 1939 (possibly different for each block row) or PETSC_NULL 1940 1941 Options Database Keys: 1942 . -mat_no_unroll - uses code that does not unroll the loops in the 1943 block calculations (much slower) 1944 . -mat_block_size - size of the blocks to use 1945 1946 Level: intermediate 1947 1948 Notes: 1949 The block AIJ format is fully compatible with standard Fortran 77 1950 storage. That is, the stored row and column indices can begin at 1951 either one (as in Fortran) or zero. See the users' manual for details. 1952 1953 Specify the preallocated storage with either nz or nnz (not both). 1954 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1955 allocation. For additional details, see the users manual chapter on 1956 matrices. 1957 1958 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1959 @*/ 1960 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz) 1961 { 1962 Mat_SeqBAIJ *b; 1963 int i,len,ierr,mbs,nbs,bs2,newbs = bs; 1964 PetscTruth flg; 1965 1966 PetscFunctionBegin; 1967 ierr = PetscTypeCompare((PetscObject)B,MATSEQBAIJ,&flg);CHKERRQ(ierr); 1968 if (!flg) PetscFunctionReturn(0); 1969 1970 B->preallocated = PETSC_TRUE; 1971 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr); 1972 if (nnz && newbs != bs) { 1973 SETERRQ(1,"Cannot change blocksize from command line if setting nnz"); 1974 } 1975 bs = newbs; 1976 1977 mbs = B->m/bs; 1978 nbs = B->n/bs; 1979 bs2 = bs*bs; 1980 1981 if (mbs*bs!=B->m || nbs*bs!=B->n) { 1982 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs); 1983 } 1984 1985 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1986 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1987 if (nnz) { 1988 for (i=0; i<mbs; i++) { 1989 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1990 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs); 1991 } 1992 } 1993 1994 b = (Mat_SeqBAIJ*)B->data; 1995 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1996 B->ops->solve = MatSolve_SeqBAIJ_Update; 1997 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 1998 if (!flg) { 1999 switch (bs) { 2000 case 1: 2001 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 2002 B->ops->mult = MatMult_SeqBAIJ_1; 2003 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2004 break; 2005 case 2: 2006 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2007 B->ops->mult = MatMult_SeqBAIJ_2; 2008 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2009 break; 2010 case 3: 2011 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2012 B->ops->mult = MatMult_SeqBAIJ_3; 2013 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2014 break; 2015 case 4: 2016 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2017 B->ops->mult = MatMult_SeqBAIJ_4; 2018 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2019 break; 2020 case 5: 2021 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2022 B->ops->mult = MatMult_SeqBAIJ_5; 2023 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2024 break; 2025 case 6: 2026 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 2027 B->ops->mult = MatMult_SeqBAIJ_6; 2028 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2029 break; 2030 case 7: 2031 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 2032 B->ops->mult = MatMult_SeqBAIJ_7; 2033 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2034 break; 2035 default: 2036 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 2037 B->ops->mult = MatMult_SeqBAIJ_N; 2038 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2039 break; 2040 } 2041 } 2042 b->bs = bs; 2043 b->mbs = mbs; 2044 b->nbs = nbs; 2045 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 2046 if (!nnz) { 2047 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2048 else if (nz <= 0) nz = 1; 2049 for (i=0; i<mbs; i++) b->imax[i] = nz; 2050 nz = nz*mbs; 2051 } else { 2052 nz = 0; 2053 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2054 } 2055 2056 /* allocate the matrix space */ 2057 len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 2058 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 2059 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2060 b->j = (int*)(b->a + nz*bs2); 2061 ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 2062 b->i = b->j + nz; 2063 b->singlemalloc = PETSC_TRUE; 2064 2065 b->i[0] = 0; 2066 for (i=1; i<mbs+1; i++) { 2067 b->i[i] = b->i[i-1] + b->imax[i-1]; 2068 } 2069 2070 /* b->ilen will count nonzeros in each block row so far. */ 2071 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 2072 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 2073 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2074 2075 b->bs = bs; 2076 b->bs2 = bs2; 2077 b->mbs = mbs; 2078 b->nz = 0; 2079 b->maxnz = nz*bs2; 2080 B->info.nz_unneeded = (PetscReal)b->maxnz; 2081 PetscFunctionReturn(0); 2082 } 2083 2084