1 /*$Id: baij.c,v 1.245 2001/08/07 03:02:55 balay Exp bsmith $*/ 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" 8 #include "src/vec/vecimpl.h" 9 #include "src/inline/spops.h" 10 #include "petscsys.h" /*I "petscmat.h" I*/ 11 12 /* 13 Special version for Fun3d sequential benchmark 14 */ 15 #if defined(PETSC_HAVE_FORTRAN_CAPS) 16 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 17 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 18 #define matsetvaluesblocked4_ matsetvaluesblocked4 19 #endif 20 21 #undef __FUNCT__ 22 #define __FUNCT__ "matsetvaluesblocked4_" 23 void matsetvaluesblocked4_(Mat *AA,int *mm,int *im,int *nn,int *in,MatScalar *v,InsertMode *is,int *err) 24 { 25 Mat A = *AA; 26 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 27 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 28 int *ai=a->i,*ailen=a->ilen; 29 int *aj=a->j,stepval; 30 MatScalar *value = v,*ap,*aa = a->a,*bap; 31 32 PetscFunctionBegin; 33 stepval = (n-1)*4; 34 for (k=0; k<m; k++) { /* loop over added rows */ 35 row = im[k]; 36 rp = aj + ai[row]; 37 ap = aa + 16*ai[row]; 38 nrow = ailen[row]; 39 low = 0; 40 for (l=0; l<n; l++) { /* loop over added columns */ 41 col = in[l]; 42 value = v + k*(stepval+4)*4 + l*4; 43 low = 0; high = nrow; 44 while (high-low > 7) { 45 t = (low+high)/2; 46 if (rp[t] > col) high = t; 47 else low = t; 48 } 49 for (i=low; i<high; i++) { 50 if (rp[i] > col) break; 51 if (rp[i] == col) { 52 bap = ap + 16*i; 53 for (ii=0; ii<4; ii++,value+=stepval) { 54 for (jj=ii; jj<16; jj+=4) { 55 bap[jj] += *value++; 56 } 57 } 58 goto noinsert2; 59 } 60 } 61 N = nrow++ - 1; 62 /* shift up all the later entries in this row */ 63 for (ii=N; ii>=i; ii--) { 64 rp[ii+1] = rp[ii]; 65 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 66 } 67 if (N >= i) { 68 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 69 } 70 rp[i] = col; 71 bap = ap + 16*i; 72 for (ii=0; ii<4; ii++,value+=stepval) { 73 for (jj=ii; jj<16; jj+=4) { 74 bap[jj] = *value++; 75 } 76 } 77 noinsert2:; 78 low = i; 79 } 80 ailen[row] = nrow; 81 } 82 } 83 84 #if defined(PETSC_HAVE_FORTRAN_CAPS) 85 #define matsetvalues4_ MATSETVALUES4 86 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 87 #define matsetvalues4_ matsetvalues4 88 #endif 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "MatSetValues4_" 92 void matsetvalues4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v,InsertMode *is,int *err) 93 { 94 Mat A = *AA; 95 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 96 int *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 97 int *ai=a->i,*ailen=a->ilen; 98 int *aj=a->j,brow,bcol; 99 int ridx,cidx; 100 MatScalar *ap,value,*aa=a->a,*bap; 101 102 PetscFunctionBegin; 103 for (k=0; k<m; k++) { /* loop over added rows */ 104 row = im[k]; brow = row/4; 105 rp = aj + ai[brow]; 106 ap = aa + 16*ai[brow]; 107 nrow = ailen[brow]; 108 low = 0; 109 for (l=0; l<n; l++) { /* loop over added columns */ 110 col = in[l]; bcol = col/4; 111 ridx = row % 4; cidx = col % 4; 112 value = v[l + k*n]; 113 low = 0; high = nrow; 114 while (high-low > 7) { 115 t = (low+high)/2; 116 if (rp[t] > bcol) high = t; 117 else low = t; 118 } 119 for (i=low; i<high; i++) { 120 if (rp[i] > bcol) break; 121 if (rp[i] == bcol) { 122 bap = ap + 16*i + 4*cidx + ridx; 123 *bap += value; 124 goto noinsert1; 125 } 126 } 127 N = nrow++ - 1; 128 /* shift up all the later entries in this row */ 129 for (ii=N; ii>=i; ii--) { 130 rp[ii+1] = rp[ii]; 131 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 132 } 133 if (N>=i) { 134 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 135 } 136 rp[i] = bcol; 137 ap[16*i + 4*cidx + ridx] = value; 138 noinsert1:; 139 low = i; 140 } 141 ailen[brow] = nrow; 142 } 143 } 144 145 /* UGLY, ugly, ugly 146 When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 147 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 148 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 149 converts the entries into single precision and then calls ..._MatScalar() to put them 150 into the single precision data structures. 151 */ 152 #if defined(PETSC_USE_MAT_SINGLE) 153 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 154 #else 155 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 156 #endif 157 #if defined(PETSC_HAVE_DSCPACK) 158 EXTERN int MatUseDSCPACK_MPIBAIJ(Mat); 159 #endif 160 161 #define CHUNKSIZE 10 162 163 /* 164 Checks for missing diagonals 165 */ 166 #undef __FUNCT__ 167 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 168 int MatMissingDiagonal_SeqBAIJ(Mat A) 169 { 170 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 171 int *diag,*jj = a->j,i,ierr; 172 173 PetscFunctionBegin; 174 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 175 diag = a->diag; 176 for (i=0; i<a->mbs; i++) { 177 if (jj[diag[i]] != i) { 178 SETERRQ1(1,"Matrix is missing diagonal number %d",i); 179 } 180 } 181 PetscFunctionReturn(0); 182 } 183 184 #undef __FUNCT__ 185 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 186 int MatMarkDiagonal_SeqBAIJ(Mat A) 187 { 188 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 189 int i,j,*diag,m = a->mbs,ierr; 190 191 PetscFunctionBegin; 192 if (a->diag) PetscFunctionReturn(0); 193 194 ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr); 195 PetscLogObjectMemory(A,(m+1)*sizeof(int)); 196 for (i=0; i<m; i++) { 197 diag[i] = a->i[i+1]; 198 for (j=a->i[i]; j<a->i[i+1]; j++) { 199 if (a->j[j] == i) { 200 diag[i] = j; 201 break; 202 } 203 } 204 } 205 a->diag = diag; 206 PetscFunctionReturn(0); 207 } 208 209 210 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 211 212 #undef __FUNCT__ 213 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 214 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 215 { 216 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 217 int ierr,n = a->mbs,i; 218 219 PetscFunctionBegin; 220 *nn = n; 221 if (!ia) PetscFunctionReturn(0); 222 if (symmetric) { 223 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 224 } else if (oshift == 1) { 225 /* temporarily add 1 to i and j indices */ 226 int nz = a->i[n]; 227 for (i=0; i<nz; i++) a->j[i]++; 228 for (i=0; i<n+1; i++) a->i[i]++; 229 *ia = a->i; *ja = a->j; 230 } else { 231 *ia = a->i; *ja = a->j; 232 } 233 234 PetscFunctionReturn(0); 235 } 236 237 #undef __FUNCT__ 238 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 239 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 240 { 241 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 242 int i,n = a->mbs,ierr; 243 244 PetscFunctionBegin; 245 if (!ia) PetscFunctionReturn(0); 246 if (symmetric) { 247 ierr = PetscFree(*ia);CHKERRQ(ierr); 248 ierr = PetscFree(*ja);CHKERRQ(ierr); 249 } else if (oshift == 1) { 250 int nz = a->i[n]-1; 251 for (i=0; i<nz; i++) a->j[i]--; 252 for (i=0; i<n+1; i++) a->i[i]--; 253 } 254 PetscFunctionReturn(0); 255 } 256 257 #undef __FUNCT__ 258 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ" 259 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs) 260 { 261 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 262 263 PetscFunctionBegin; 264 *bs = baij->bs; 265 PetscFunctionReturn(0); 266 } 267 268 269 #undef __FUNCT__ 270 #define __FUNCT__ "MatDestroy_SeqBAIJ" 271 int MatDestroy_SeqBAIJ(Mat A) 272 { 273 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 274 int ierr; 275 276 PetscFunctionBegin; 277 #if defined(PETSC_USE_LOG) 278 PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz); 279 #endif 280 ierr = PetscFree(a->a);CHKERRQ(ierr); 281 if (!a->singlemalloc) { 282 ierr = PetscFree(a->i);CHKERRQ(ierr); 283 ierr = PetscFree(a->j);CHKERRQ(ierr); 284 } 285 if (a->row) { 286 ierr = ISDestroy(a->row);CHKERRQ(ierr); 287 } 288 if (a->col) { 289 ierr = ISDestroy(a->col);CHKERRQ(ierr); 290 } 291 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 292 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 293 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 294 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 295 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 296 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 297 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 298 #if defined(PETSC_USE_MAT_SINGLE) 299 if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);} 300 #endif 301 ierr = PetscFree(a);CHKERRQ(ierr); 302 PetscFunctionReturn(0); 303 } 304 305 #undef __FUNCT__ 306 #define __FUNCT__ "MatSetOption_SeqBAIJ" 307 int MatSetOption_SeqBAIJ(Mat A,MatOption op) 308 { 309 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 310 311 PetscFunctionBegin; 312 switch (op) { 313 case MAT_ROW_ORIENTED: 314 a->roworiented = PETSC_TRUE; 315 break; 316 case MAT_COLUMN_ORIENTED: 317 a->roworiented = PETSC_FALSE; 318 break; 319 case MAT_COLUMNS_SORTED: 320 a->sorted = PETSC_TRUE; 321 break; 322 case MAT_COLUMNS_UNSORTED: 323 a->sorted = PETSC_FALSE; 324 break; 325 case MAT_KEEP_ZEROED_ROWS: 326 a->keepzeroedrows = PETSC_TRUE; 327 break; 328 case MAT_NO_NEW_NONZERO_LOCATIONS: 329 a->nonew = 1; 330 break; 331 case MAT_NEW_NONZERO_LOCATION_ERR: 332 a->nonew = -1; 333 break; 334 case MAT_NEW_NONZERO_ALLOCATION_ERR: 335 a->nonew = -2; 336 break; 337 case MAT_YES_NEW_NONZERO_LOCATIONS: 338 a->nonew = 0; 339 break; 340 case MAT_ROWS_SORTED: 341 case MAT_ROWS_UNSORTED: 342 case MAT_YES_NEW_DIAGONALS: 343 case MAT_IGNORE_OFF_PROC_ENTRIES: 344 case MAT_USE_HASH_TABLE: 345 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n"); 346 break; 347 case MAT_NO_NEW_DIAGONALS: 348 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 349 case MAT_USE_SINGLE_PRECISION_SOLVES: 350 if (a->bs==4) { 351 PetscTruth sse_enabled_local,sse_enabled_global; 352 int ierr; 353 354 sse_enabled_local = PETSC_FALSE; 355 sse_enabled_global = PETSC_FALSE; 356 357 ierr = PetscSSEIsEnabled(A->comm,&sse_enabled_local,&sse_enabled_global);CHKERRQ(ierr); 358 #if defined(PETSC_HAVE_SSE) 359 if (sse_enabled_local) { 360 a->single_precision_solves = PETSC_TRUE; 361 A->ops->solve = MatSolve_SeqBAIJ_Update; 362 A->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 363 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES set\n"); 364 break; 365 } else { 366 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n"); 367 } 368 #else 369 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n"); 370 #endif 371 } 372 break; 373 default: 374 SETERRQ(PETSC_ERR_SUP,"unknown option"); 375 } 376 PetscFunctionReturn(0); 377 } 378 379 #undef __FUNCT__ 380 #define __FUNCT__ "MatGetRow_SeqBAIJ" 381 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 382 { 383 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 384 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr; 385 MatScalar *aa,*aa_i; 386 PetscScalar *v_i; 387 388 PetscFunctionBegin; 389 bs = a->bs; 390 ai = a->i; 391 aj = a->j; 392 aa = a->a; 393 bs2 = a->bs2; 394 395 if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range"); 396 397 bn = row/bs; /* Block number */ 398 bp = row % bs; /* Block Position */ 399 M = ai[bn+1] - ai[bn]; 400 *nz = bs*M; 401 402 if (v) { 403 *v = 0; 404 if (*nz) { 405 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 406 for (i=0; i<M; i++) { /* for each block in the block row */ 407 v_i = *v + i*bs; 408 aa_i = aa + bs2*(ai[bn] + i); 409 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 410 } 411 } 412 } 413 414 if (idx) { 415 *idx = 0; 416 if (*nz) { 417 ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr); 418 for (i=0; i<M; i++) { /* for each block in the block row */ 419 idx_i = *idx + i*bs; 420 itmp = bs*aj[ai[bn] + i]; 421 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 422 } 423 } 424 } 425 PetscFunctionReturn(0); 426 } 427 428 #undef __FUNCT__ 429 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 430 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 431 { 432 int ierr; 433 434 PetscFunctionBegin; 435 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 436 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 437 PetscFunctionReturn(0); 438 } 439 440 #undef __FUNCT__ 441 #define __FUNCT__ "MatTranspose_SeqBAIJ" 442 int MatTranspose_SeqBAIJ(Mat A,Mat *B) 443 { 444 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 445 Mat C; 446 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 447 int *rows,*cols,bs2=a->bs2; 448 PetscScalar *array; 449 450 PetscFunctionBegin; 451 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 452 ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr); 453 ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr); 454 455 #if defined(PETSC_USE_MAT_SINGLE) 456 ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 457 for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 458 #else 459 array = a->a; 460 #endif 461 462 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 463 ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr); 464 ierr = PetscFree(col);CHKERRQ(ierr); 465 ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr); 466 cols = rows + bs; 467 for (i=0; i<mbs; i++) { 468 cols[0] = i*bs; 469 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 470 len = ai[i+1] - ai[i]; 471 for (j=0; j<len; j++) { 472 rows[0] = (*aj++)*bs; 473 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 474 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 475 array += bs2; 476 } 477 } 478 ierr = PetscFree(rows);CHKERRQ(ierr); 479 #if defined(PETSC_USE_MAT_SINGLE) 480 ierr = PetscFree(array); 481 #endif 482 483 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 484 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 485 486 if (B) { 487 *B = C; 488 } else { 489 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 490 } 491 PetscFunctionReturn(0); 492 } 493 494 #undef __FUNCT__ 495 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 496 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 497 { 498 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 499 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 500 PetscScalar *aa; 501 FILE *file; 502 503 PetscFunctionBegin; 504 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 505 ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 506 col_lens[0] = MAT_FILE_COOKIE; 507 508 col_lens[1] = A->m; 509 col_lens[2] = A->n; 510 col_lens[3] = a->nz*bs2; 511 512 /* store lengths of each row and write (including header) to file */ 513 count = 0; 514 for (i=0; i<a->mbs; i++) { 515 for (j=0; j<bs; j++) { 516 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 517 } 518 } 519 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 520 ierr = PetscFree(col_lens);CHKERRQ(ierr); 521 522 /* store column indices (zero start index) */ 523 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 524 count = 0; 525 for (i=0; i<a->mbs; i++) { 526 for (j=0; j<bs; j++) { 527 for (k=a->i[i]; k<a->i[i+1]; k++) { 528 for (l=0; l<bs; l++) { 529 jj[count++] = bs*a->j[k] + l; 530 } 531 } 532 } 533 } 534 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr); 535 ierr = PetscFree(jj);CHKERRQ(ierr); 536 537 /* store nonzero values */ 538 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 539 count = 0; 540 for (i=0; i<a->mbs; i++) { 541 for (j=0; j<bs; j++) { 542 for (k=a->i[i]; k<a->i[i+1]; k++) { 543 for (l=0; l<bs; l++) { 544 aa[count++] = a->a[bs2*k + l*bs + j]; 545 } 546 } 547 } 548 } 549 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr); 550 ierr = PetscFree(aa);CHKERRQ(ierr); 551 552 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 553 if (file) { 554 fprintf(file,"-matload_block_size %d\n",a->bs); 555 } 556 PetscFunctionReturn(0); 557 } 558 559 extern int MatMPIBAIJFactorInfo_DSCPACK(Mat,PetscViewer); 560 561 #undef __FUNCT__ 562 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 563 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 564 { 565 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 566 int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 567 PetscViewerFormat format; 568 569 PetscFunctionBegin; 570 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 571 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_LONG) { 572 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 573 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 574 Mat aij; 575 ierr = MatConvert(A,MATSEQAIJ,&aij);CHKERRQ(ierr); 576 ierr = MatView(aij,viewer);CHKERRQ(ierr); 577 ierr = MatDestroy(aij);CHKERRQ(ierr); 578 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 579 #if defined(PETSC_HAVE_DSCPACK) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX) 580 ierr = MatMPIBAIJFactorInfo_DSCPACK(A,viewer);CHKERRQ(ierr); 581 #endif 582 PetscFunctionReturn(0); 583 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 584 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 585 for (i=0; i<a->mbs; i++) { 586 for (j=0; j<bs; j++) { 587 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 588 for (k=a->i[i]; k<a->i[i+1]; k++) { 589 for (l=0; l<bs; l++) { 590 #if defined(PETSC_USE_COMPLEX) 591 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 592 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %gi) ",bs*a->j[k]+l, 593 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 594 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 595 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %gi) ",bs*a->j[k]+l, 596 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 597 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 598 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 599 } 600 #else 601 if (a->a[bs2*k + l*bs + j] != 0.0) { 602 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 603 } 604 #endif 605 } 606 } 607 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 608 } 609 } 610 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 611 } else { 612 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 613 for (i=0; i<a->mbs; i++) { 614 for (j=0; j<bs; j++) { 615 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 616 for (k=a->i[i]; k<a->i[i+1]; k++) { 617 for (l=0; l<bs; l++) { 618 #if defined(PETSC_USE_COMPLEX) 619 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 620 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 621 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 622 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 623 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 624 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 625 } else { 626 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 627 } 628 #else 629 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 630 #endif 631 } 632 } 633 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 634 } 635 } 636 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 637 } 638 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 639 PetscFunctionReturn(0); 640 } 641 642 #undef __FUNCT__ 643 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 644 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 645 { 646 Mat A = (Mat) Aa; 647 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 648 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2; 649 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 650 MatScalar *aa; 651 PetscViewer viewer; 652 653 PetscFunctionBegin; 654 655 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 656 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 657 658 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 659 660 /* loop over matrix elements drawing boxes */ 661 color = PETSC_DRAW_BLUE; 662 for (i=0,row=0; i<mbs; i++,row+=bs) { 663 for (j=a->i[i]; j<a->i[i+1]; j++) { 664 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 665 x_l = a->j[j]*bs; x_r = x_l + 1.0; 666 aa = a->a + j*bs2; 667 for (k=0; k<bs; k++) { 668 for (l=0; l<bs; l++) { 669 if (PetscRealPart(*aa++) >= 0.) continue; 670 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 671 } 672 } 673 } 674 } 675 color = PETSC_DRAW_CYAN; 676 for (i=0,row=0; i<mbs; i++,row+=bs) { 677 for (j=a->i[i]; j<a->i[i+1]; j++) { 678 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 679 x_l = a->j[j]*bs; x_r = x_l + 1.0; 680 aa = a->a + j*bs2; 681 for (k=0; k<bs; k++) { 682 for (l=0; l<bs; l++) { 683 if (PetscRealPart(*aa++) != 0.) continue; 684 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 685 } 686 } 687 } 688 } 689 690 color = PETSC_DRAW_RED; 691 for (i=0,row=0; i<mbs; i++,row+=bs) { 692 for (j=a->i[i]; j<a->i[i+1]; j++) { 693 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 694 x_l = a->j[j]*bs; x_r = x_l + 1.0; 695 aa = a->a + j*bs2; 696 for (k=0; k<bs; k++) { 697 for (l=0; l<bs; l++) { 698 if (PetscRealPart(*aa++) <= 0.) continue; 699 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 700 } 701 } 702 } 703 } 704 PetscFunctionReturn(0); 705 } 706 707 #undef __FUNCT__ 708 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 709 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 710 { 711 int ierr; 712 PetscReal xl,yl,xr,yr,w,h; 713 PetscDraw draw; 714 PetscTruth isnull; 715 716 PetscFunctionBegin; 717 718 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 719 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 720 721 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 722 xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0; 723 xr += w; yr += h; xl = -w; yl = -h; 724 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 725 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 726 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 727 PetscFunctionReturn(0); 728 } 729 730 #undef __FUNCT__ 731 #define __FUNCT__ "MatView_SeqBAIJ" 732 int MatView_SeqBAIJ(Mat A,PetscViewer viewer) 733 { 734 int ierr; 735 PetscTruth issocket,isascii,isbinary,isdraw; 736 737 PetscFunctionBegin; 738 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 739 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 740 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 741 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 742 if (issocket) { 743 SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported"); 744 } else if (isascii){ 745 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 746 } else if (isbinary) { 747 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 748 } else if (isdraw) { 749 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 750 } else { 751 SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name); 752 } 753 PetscFunctionReturn(0); 754 } 755 756 757 #undef __FUNCT__ 758 #define __FUNCT__ "MatGetValues_SeqBAIJ" 759 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v) 760 { 761 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 762 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 763 int *ai = a->i,*ailen = a->ilen; 764 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 765 MatScalar *ap,*aa = a->a,zero = 0.0; 766 767 PetscFunctionBegin; 768 for (k=0; k<m; k++) { /* loop over rows */ 769 row = im[k]; brow = row/bs; 770 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 771 if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 772 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 773 nrow = ailen[brow]; 774 for (l=0; l<n; l++) { /* loop over columns */ 775 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 776 if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 777 col = in[l] ; 778 bcol = col/bs; 779 cidx = col%bs; 780 ridx = row%bs; 781 high = nrow; 782 low = 0; /* assume unsorted */ 783 while (high-low > 5) { 784 t = (low+high)/2; 785 if (rp[t] > bcol) high = t; 786 else low = t; 787 } 788 for (i=low; i<high; i++) { 789 if (rp[i] > bcol) break; 790 if (rp[i] == bcol) { 791 *v++ = ap[bs2*i+bs*cidx+ridx]; 792 goto finished; 793 } 794 } 795 *v++ = zero; 796 finished:; 797 } 798 } 799 PetscFunctionReturn(0); 800 } 801 802 #if defined(PETSC_USE_MAT_SINGLE) 803 #undef __FUNCT__ 804 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 805 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 806 { 807 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 808 int ierr,i,N = m*n*b->bs2; 809 MatScalar *vsingle; 810 811 PetscFunctionBegin; 812 if (N > b->setvalueslen) { 813 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 814 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 815 b->setvalueslen = N; 816 } 817 vsingle = b->setvaluescopy; 818 for (i=0; i<N; i++) { 819 vsingle[i] = v[i]; 820 } 821 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 822 PetscFunctionReturn(0); 823 } 824 #endif 825 826 827 #undef __FUNCT__ 828 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 829 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,int *im,int n,int *in,MatScalar *v,InsertMode is) 830 { 831 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 832 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 833 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 834 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 835 PetscTruth roworiented=a->roworiented; 836 MatScalar *value = v,*ap,*aa = a->a,*bap; 837 838 PetscFunctionBegin; 839 if (roworiented) { 840 stepval = (n-1)*bs; 841 } else { 842 stepval = (m-1)*bs; 843 } 844 for (k=0; k<m; k++) { /* loop over added rows */ 845 row = im[k]; 846 if (row < 0) continue; 847 #if defined(PETSC_USE_BOPT_g) 848 if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 849 #endif 850 rp = aj + ai[row]; 851 ap = aa + bs2*ai[row]; 852 rmax = imax[row]; 853 nrow = ailen[row]; 854 low = 0; 855 for (l=0; l<n; l++) { /* loop over added columns */ 856 if (in[l] < 0) continue; 857 #if defined(PETSC_USE_BOPT_g) 858 if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 859 #endif 860 col = in[l]; 861 if (roworiented) { 862 value = v + k*(stepval+bs)*bs + l*bs; 863 } else { 864 value = v + l*(stepval+bs)*bs + k*bs; 865 } 866 if (!sorted) low = 0; high = nrow; 867 while (high-low > 7) { 868 t = (low+high)/2; 869 if (rp[t] > col) high = t; 870 else low = t; 871 } 872 for (i=low; i<high; i++) { 873 if (rp[i] > col) break; 874 if (rp[i] == col) { 875 bap = ap + bs2*i; 876 if (roworiented) { 877 if (is == ADD_VALUES) { 878 for (ii=0; ii<bs; ii++,value+=stepval) { 879 for (jj=ii; jj<bs2; jj+=bs) { 880 bap[jj] += *value++; 881 } 882 } 883 } else { 884 for (ii=0; ii<bs; ii++,value+=stepval) { 885 for (jj=ii; jj<bs2; jj+=bs) { 886 bap[jj] = *value++; 887 } 888 } 889 } 890 } else { 891 if (is == ADD_VALUES) { 892 for (ii=0; ii<bs; ii++,value+=stepval) { 893 for (jj=0; jj<bs; jj++) { 894 *bap++ += *value++; 895 } 896 } 897 } else { 898 for (ii=0; ii<bs; ii++,value+=stepval) { 899 for (jj=0; jj<bs; jj++) { 900 *bap++ = *value++; 901 } 902 } 903 } 904 } 905 goto noinsert2; 906 } 907 } 908 if (nonew == 1) goto noinsert2; 909 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 910 if (nrow >= rmax) { 911 /* there is no extra room in row, therefore enlarge */ 912 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 913 MatScalar *new_a; 914 915 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 916 917 /* malloc new storage space */ 918 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 919 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 920 new_j = (int*)(new_a + bs2*new_nz); 921 new_i = new_j + new_nz; 922 923 /* copy over old data into new slots */ 924 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 925 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 926 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 927 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 928 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 929 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 930 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 931 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 932 /* free up old matrix storage */ 933 ierr = PetscFree(a->a);CHKERRQ(ierr); 934 if (!a->singlemalloc) { 935 ierr = PetscFree(a->i);CHKERRQ(ierr); 936 ierr = PetscFree(a->j);CHKERRQ(ierr); 937 } 938 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 939 a->singlemalloc = PETSC_TRUE; 940 941 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 942 rmax = imax[row] = imax[row] + CHUNKSIZE; 943 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 944 a->maxnz += bs2*CHUNKSIZE; 945 a->reallocs++; 946 a->nz++; 947 } 948 N = nrow++ - 1; 949 /* shift up all the later entries in this row */ 950 for (ii=N; ii>=i; ii--) { 951 rp[ii+1] = rp[ii]; 952 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 953 } 954 if (N >= i) { 955 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 956 } 957 rp[i] = col; 958 bap = ap + bs2*i; 959 if (roworiented) { 960 for (ii=0; ii<bs; ii++,value+=stepval) { 961 for (jj=ii; jj<bs2; jj+=bs) { 962 bap[jj] = *value++; 963 } 964 } 965 } else { 966 for (ii=0; ii<bs; ii++,value+=stepval) { 967 for (jj=0; jj<bs; jj++) { 968 *bap++ = *value++; 969 } 970 } 971 } 972 noinsert2:; 973 low = i; 974 } 975 ailen[row] = nrow; 976 } 977 PetscFunctionReturn(0); 978 } 979 980 #undef __FUNCT__ 981 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 982 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 983 { 984 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 985 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 986 int m = A->m,*ip,N,*ailen = a->ilen; 987 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 988 MatScalar *aa = a->a,*ap; 989 #if defined(PETSC_HAVE_DSCPACK) 990 PetscTruth flag; 991 #endif 992 993 PetscFunctionBegin; 994 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 995 996 if (m) rmax = ailen[0]; 997 for (i=1; i<mbs; i++) { 998 /* move each row back by the amount of empty slots (fshift) before it*/ 999 fshift += imax[i-1] - ailen[i-1]; 1000 rmax = PetscMax(rmax,ailen[i]); 1001 if (fshift) { 1002 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1003 N = ailen[i]; 1004 for (j=0; j<N; j++) { 1005 ip[j-fshift] = ip[j]; 1006 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1007 } 1008 } 1009 ai[i] = ai[i-1] + ailen[i-1]; 1010 } 1011 if (mbs) { 1012 fshift += imax[mbs-1] - ailen[mbs-1]; 1013 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1014 } 1015 /* reset ilen and imax for each row */ 1016 for (i=0; i<mbs; i++) { 1017 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1018 } 1019 a->nz = ai[mbs]; 1020 1021 /* diagonals may have moved, so kill the diagonal pointers */ 1022 if (fshift && a->diag) { 1023 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1024 PetscLogObjectMemory(A,-(mbs+1)*sizeof(int)); 1025 a->diag = 0; 1026 } 1027 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2); 1028 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",a->reallocs); 1029 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 1030 a->reallocs = 0; 1031 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1032 1033 #if defined(PETSC_HAVE_DSCPACK) 1034 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_baij_dscpack",&flag);CHKERRQ(ierr); 1035 if (flag) { ierr = MatUseDSCPACK_MPIBAIJ(A);CHKERRQ(ierr); } 1036 #endif 1037 1038 PetscFunctionReturn(0); 1039 } 1040 1041 1042 1043 /* 1044 This function returns an array of flags which indicate the locations of contiguous 1045 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1046 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1047 Assume: sizes should be long enough to hold all the values. 1048 */ 1049 #undef __FUNCT__ 1050 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1051 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 1052 { 1053 int i,j,k,row; 1054 PetscTruth flg; 1055 1056 PetscFunctionBegin; 1057 for (i=0,j=0; i<n; j++) { 1058 row = idx[i]; 1059 if (row%bs!=0) { /* Not the begining of a block */ 1060 sizes[j] = 1; 1061 i++; 1062 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1063 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1064 i++; 1065 } else { /* Begining of the block, so check if the complete block exists */ 1066 flg = PETSC_TRUE; 1067 for (k=1; k<bs; k++) { 1068 if (row+k != idx[i+k]) { /* break in the block */ 1069 flg = PETSC_FALSE; 1070 break; 1071 } 1072 } 1073 if (flg == PETSC_TRUE) { /* No break in the bs */ 1074 sizes[j] = bs; 1075 i+= bs; 1076 } else { 1077 sizes[j] = 1; 1078 i++; 1079 } 1080 } 1081 } 1082 *bs_max = j; 1083 PetscFunctionReturn(0); 1084 } 1085 1086 #undef __FUNCT__ 1087 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1088 int MatZeroRows_SeqBAIJ(Mat A,IS is,PetscScalar *diag) 1089 { 1090 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1091 int ierr,i,j,k,count,is_n,*is_idx,*rows; 1092 int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max; 1093 PetscScalar zero = 0.0; 1094 MatScalar *aa; 1095 1096 PetscFunctionBegin; 1097 /* Make a copy of the IS and sort it */ 1098 ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr); 1099 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1100 1101 /* allocate memory for rows,sizes */ 1102 ierr = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr); 1103 sizes = rows + is_n; 1104 1105 /* copy IS values to rows, and sort them */ 1106 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1107 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1108 if (baij->keepzeroedrows) { 1109 for (i=0; i<is_n; i++) { sizes[i] = 1; } 1110 bs_max = is_n; 1111 } else { 1112 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 1113 } 1114 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 1115 1116 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1117 row = rows[j]; 1118 if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row); 1119 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1120 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 1121 if (sizes[i] == bs && !baij->keepzeroedrows) { 1122 if (diag) { 1123 if (baij->ilen[row/bs] > 0) { 1124 baij->ilen[row/bs] = 1; 1125 baij->j[baij->i[row/bs]] = row/bs; 1126 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 1127 } 1128 /* Now insert all the diagonal values for this bs */ 1129 for (k=0; k<bs; k++) { 1130 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 1131 } 1132 } else { /* (!diag) */ 1133 baij->ilen[row/bs] = 0; 1134 } /* end (!diag) */ 1135 } else { /* (sizes[i] != bs) */ 1136 #if defined (PETSC_USE_DEBUG) 1137 if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1"); 1138 #endif 1139 for (k=0; k<count; k++) { 1140 aa[0] = zero; 1141 aa += bs; 1142 } 1143 if (diag) { 1144 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1145 } 1146 } 1147 } 1148 1149 ierr = PetscFree(rows);CHKERRQ(ierr); 1150 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1151 PetscFunctionReturn(0); 1152 } 1153 1154 #undef __FUNCT__ 1155 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1156 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is) 1157 { 1158 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1159 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 1160 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1161 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 1162 int ridx,cidx,bs2=a->bs2,ierr; 1163 PetscTruth roworiented=a->roworiented; 1164 MatScalar *ap,value,*aa=a->a,*bap; 1165 1166 PetscFunctionBegin; 1167 for (k=0; k<m; k++) { /* loop over added rows */ 1168 row = im[k]; brow = row/bs; 1169 if (row < 0) continue; 1170 #if defined(PETSC_USE_BOPT_g) 1171 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m); 1172 #endif 1173 rp = aj + ai[brow]; 1174 ap = aa + bs2*ai[brow]; 1175 rmax = imax[brow]; 1176 nrow = ailen[brow]; 1177 low = 0; 1178 for (l=0; l<n; l++) { /* loop over added columns */ 1179 if (in[l] < 0) continue; 1180 #if defined(PETSC_USE_BOPT_g) 1181 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n); 1182 #endif 1183 col = in[l]; bcol = col/bs; 1184 ridx = row % bs; cidx = col % bs; 1185 if (roworiented) { 1186 value = v[l + k*n]; 1187 } else { 1188 value = v[k + l*m]; 1189 } 1190 if (!sorted) low = 0; high = nrow; 1191 while (high-low > 7) { 1192 t = (low+high)/2; 1193 if (rp[t] > bcol) high = t; 1194 else low = t; 1195 } 1196 for (i=low; i<high; i++) { 1197 if (rp[i] > bcol) break; 1198 if (rp[i] == bcol) { 1199 bap = ap + bs2*i + bs*cidx + ridx; 1200 if (is == ADD_VALUES) *bap += value; 1201 else *bap = value; 1202 goto noinsert1; 1203 } 1204 } 1205 if (nonew == 1) goto noinsert1; 1206 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 1207 if (nrow >= rmax) { 1208 /* there is no extra room in row, therefore enlarge */ 1209 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1210 MatScalar *new_a; 1211 1212 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 1213 1214 /* Malloc new storage space */ 1215 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1216 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1217 new_j = (int*)(new_a + bs2*new_nz); 1218 new_i = new_j + new_nz; 1219 1220 /* copy over old data into new slots */ 1221 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 1222 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1223 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 1224 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1225 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 1226 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1227 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1228 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1229 /* free up old matrix storage */ 1230 ierr = PetscFree(a->a);CHKERRQ(ierr); 1231 if (!a->singlemalloc) { 1232 ierr = PetscFree(a->i);CHKERRQ(ierr); 1233 ierr = PetscFree(a->j);CHKERRQ(ierr); 1234 } 1235 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1236 a->singlemalloc = PETSC_TRUE; 1237 1238 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1239 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1240 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 1241 a->maxnz += bs2*CHUNKSIZE; 1242 a->reallocs++; 1243 a->nz++; 1244 } 1245 N = nrow++ - 1; 1246 /* shift up all the later entries in this row */ 1247 for (ii=N; ii>=i; ii--) { 1248 rp[ii+1] = rp[ii]; 1249 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1250 } 1251 if (N>=i) { 1252 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1253 } 1254 rp[i] = bcol; 1255 ap[bs2*i + bs*cidx + ridx] = value; 1256 noinsert1:; 1257 low = i; 1258 } 1259 ailen[brow] = nrow; 1260 } 1261 PetscFunctionReturn(0); 1262 } 1263 1264 1265 #undef __FUNCT__ 1266 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1267 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info) 1268 { 1269 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1270 Mat outA; 1271 int ierr; 1272 PetscTruth row_identity,col_identity; 1273 1274 PetscFunctionBegin; 1275 if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1276 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1277 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1278 if (!row_identity || !col_identity) { 1279 SETERRQ(1,"Row and column permutations must be identity for in-place ILU"); 1280 } 1281 1282 outA = inA; 1283 inA->factor = FACTOR_LU; 1284 1285 if (!a->diag) { 1286 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1287 } 1288 1289 a->row = row; 1290 a->col = col; 1291 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1292 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1293 1294 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1295 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1296 PetscLogObjectParent(inA,a->icol); 1297 1298 /* 1299 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1300 for ILU(0) factorization with natural ordering 1301 */ 1302 if (a->bs < 8) { 1303 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1304 } else { 1305 if (!a->solve_work) { 1306 ierr = PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1307 PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar)); 1308 } 1309 } 1310 1311 ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr); 1312 1313 PetscFunctionReturn(0); 1314 } 1315 #undef __FUNCT__ 1316 #define __FUNCT__ "MatPrintHelp_SeqBAIJ" 1317 int MatPrintHelp_SeqBAIJ(Mat A) 1318 { 1319 static PetscTruth called = PETSC_FALSE; 1320 MPI_Comm comm = A->comm; 1321 int ierr; 1322 1323 PetscFunctionBegin; 1324 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1325 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1326 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1327 PetscFunctionReturn(0); 1328 } 1329 1330 EXTERN_C_BEGIN 1331 #undef __FUNCT__ 1332 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1333 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices) 1334 { 1335 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1336 int i,nz,nbs; 1337 1338 PetscFunctionBegin; 1339 nz = baij->maxnz/baij->bs2; 1340 nbs = baij->nbs; 1341 for (i=0; i<nz; i++) { 1342 baij->j[i] = indices[i]; 1343 } 1344 baij->nz = nz; 1345 for (i=0; i<nbs; i++) { 1346 baij->ilen[i] = baij->imax[i]; 1347 } 1348 1349 PetscFunctionReturn(0); 1350 } 1351 EXTERN_C_END 1352 1353 #undef __FUNCT__ 1354 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1355 /*@ 1356 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1357 in the matrix. 1358 1359 Input Parameters: 1360 + mat - the SeqBAIJ matrix 1361 - indices - the column indices 1362 1363 Level: advanced 1364 1365 Notes: 1366 This can be called if you have precomputed the nonzero structure of the 1367 matrix and want to provide it to the matrix object to improve the performance 1368 of the MatSetValues() operation. 1369 1370 You MUST have set the correct numbers of nonzeros per row in the call to 1371 MatCreateSeqBAIJ(). 1372 1373 MUST be called before any calls to MatSetValues(); 1374 1375 @*/ 1376 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices) 1377 { 1378 int ierr,(*f)(Mat,int *); 1379 1380 PetscFunctionBegin; 1381 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1382 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1383 if (f) { 1384 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1385 } else { 1386 SETERRQ(1,"Wrong type of matrix to set column indices"); 1387 } 1388 PetscFunctionReturn(0); 1389 } 1390 1391 #undef __FUNCT__ 1392 #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1393 int MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1394 { 1395 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1396 int ierr,i,j,n,row,bs,*ai,*aj,mbs; 1397 PetscReal atmp; 1398 PetscScalar *x,zero = 0.0; 1399 MatScalar *aa; 1400 int ncols,brow,krow,kcol; 1401 1402 PetscFunctionBegin; 1403 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1404 bs = a->bs; 1405 aa = a->a; 1406 ai = a->i; 1407 aj = a->j; 1408 mbs = a->mbs; 1409 1410 ierr = VecSet(&zero,v);CHKERRQ(ierr); 1411 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1412 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1413 if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1414 for (i=0; i<mbs; i++) { 1415 ncols = ai[1] - ai[0]; ai++; 1416 brow = bs*i; 1417 for (j=0; j<ncols; j++){ 1418 /* bcol = bs*(*aj); */ 1419 for (kcol=0; kcol<bs; kcol++){ 1420 for (krow=0; krow<bs; krow++){ 1421 atmp = PetscAbsScalar(*aa); aa++; 1422 row = brow + krow; /* row index */ 1423 /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */ 1424 if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1425 } 1426 } 1427 aj++; 1428 } 1429 } 1430 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1431 PetscFunctionReturn(0); 1432 } 1433 1434 #undef __FUNCT__ 1435 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1436 int MatSetUpPreallocation_SeqBAIJ(Mat A) 1437 { 1438 int ierr; 1439 1440 PetscFunctionBegin; 1441 ierr = MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1442 PetscFunctionReturn(0); 1443 } 1444 1445 #undef __FUNCT__ 1446 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1447 int MatGetArray_SeqBAIJ(Mat A,PetscScalar **array) 1448 { 1449 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1450 PetscFunctionBegin; 1451 *array = a->a; 1452 PetscFunctionReturn(0); 1453 } 1454 1455 #undef __FUNCT__ 1456 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1457 int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar **array) 1458 { 1459 PetscFunctionBegin; 1460 PetscFunctionReturn(0); 1461 } 1462 1463 /* -------------------------------------------------------------------*/ 1464 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 1465 MatGetRow_SeqBAIJ, 1466 MatRestoreRow_SeqBAIJ, 1467 MatMult_SeqBAIJ_N, 1468 MatMultAdd_SeqBAIJ_N, 1469 MatMultTranspose_SeqBAIJ, 1470 MatMultTransposeAdd_SeqBAIJ, 1471 MatSolve_SeqBAIJ_N, 1472 0, 1473 0, 1474 0, 1475 MatLUFactor_SeqBAIJ, 1476 0, 1477 0, 1478 MatTranspose_SeqBAIJ, 1479 MatGetInfo_SeqBAIJ, 1480 MatEqual_SeqBAIJ, 1481 MatGetDiagonal_SeqBAIJ, 1482 MatDiagonalScale_SeqBAIJ, 1483 MatNorm_SeqBAIJ, 1484 0, 1485 MatAssemblyEnd_SeqBAIJ, 1486 0, 1487 MatSetOption_SeqBAIJ, 1488 MatZeroEntries_SeqBAIJ, 1489 MatZeroRows_SeqBAIJ, 1490 MatLUFactorSymbolic_SeqBAIJ, 1491 MatLUFactorNumeric_SeqBAIJ_N, 1492 0, 1493 0, 1494 MatSetUpPreallocation_SeqBAIJ, 1495 MatILUFactorSymbolic_SeqBAIJ, 1496 0, 1497 MatGetArray_SeqBAIJ, 1498 MatRestoreArray_SeqBAIJ, 1499 MatDuplicate_SeqBAIJ, 1500 0, 1501 0, 1502 MatILUFactor_SeqBAIJ, 1503 0, 1504 0, 1505 MatGetSubMatrices_SeqBAIJ, 1506 MatIncreaseOverlap_SeqBAIJ, 1507 MatGetValues_SeqBAIJ, 1508 0, 1509 MatPrintHelp_SeqBAIJ, 1510 MatScale_SeqBAIJ, 1511 0, 1512 0, 1513 0, 1514 MatGetBlockSize_SeqBAIJ, 1515 MatGetRowIJ_SeqBAIJ, 1516 MatRestoreRowIJ_SeqBAIJ, 1517 0, 1518 0, 1519 0, 1520 0, 1521 0, 1522 0, 1523 MatSetValuesBlocked_SeqBAIJ, 1524 MatGetSubMatrix_SeqBAIJ, 1525 MatDestroy_SeqBAIJ, 1526 MatView_SeqBAIJ, 1527 MatGetPetscMaps_Petsc, 1528 0, 1529 0, 1530 0, 1531 0, 1532 0, 1533 0, 1534 MatGetRowMax_SeqBAIJ, 1535 MatConvert_Basic}; 1536 1537 EXTERN_C_BEGIN 1538 #undef __FUNCT__ 1539 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 1540 int MatStoreValues_SeqBAIJ(Mat mat) 1541 { 1542 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1543 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1544 int ierr; 1545 1546 PetscFunctionBegin; 1547 if (aij->nonew != 1) { 1548 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1549 } 1550 1551 /* allocate space for values if not already there */ 1552 if (!aij->saved_values) { 1553 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1554 } 1555 1556 /* copy values over */ 1557 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1558 PetscFunctionReturn(0); 1559 } 1560 EXTERN_C_END 1561 1562 EXTERN_C_BEGIN 1563 #undef __FUNCT__ 1564 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 1565 int MatRetrieveValues_SeqBAIJ(Mat mat) 1566 { 1567 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1568 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 1569 1570 PetscFunctionBegin; 1571 if (aij->nonew != 1) { 1572 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1573 } 1574 if (!aij->saved_values) { 1575 SETERRQ(1,"Must call MatStoreValues(A);first"); 1576 } 1577 1578 /* copy values over */ 1579 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1580 PetscFunctionReturn(0); 1581 } 1582 EXTERN_C_END 1583 1584 EXTERN_C_BEGIN 1585 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*); 1586 EXTERN_C_END 1587 1588 EXTERN_C_BEGIN 1589 #undef __FUNCT__ 1590 #define __FUNCT__ "MatCreate_SeqBAIJ" 1591 int MatCreate_SeqBAIJ(Mat B) 1592 { 1593 int ierr,size; 1594 Mat_SeqBAIJ *b; 1595 1596 PetscFunctionBegin; 1597 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1598 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1599 1600 B->m = B->M = PetscMax(B->m,B->M); 1601 B->n = B->N = PetscMax(B->n,B->N); 1602 ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 1603 B->data = (void*)b; 1604 ierr = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 1605 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1606 B->factor = 0; 1607 B->lupivotthreshold = 1.0; 1608 B->mapping = 0; 1609 b->row = 0; 1610 b->col = 0; 1611 b->icol = 0; 1612 b->reallocs = 0; 1613 b->saved_values = 0; 1614 #if defined(PETSC_USE_MAT_SINGLE) 1615 b->setvalueslen = 0; 1616 b->setvaluescopy = PETSC_NULL; 1617 #endif 1618 b->single_precision_solves = PETSC_FALSE; 1619 1620 ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr); 1621 ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr); 1622 1623 b->sorted = PETSC_FALSE; 1624 b->roworiented = PETSC_TRUE; 1625 b->nonew = 0; 1626 b->diag = 0; 1627 b->solve_work = 0; 1628 b->mult_work = 0; 1629 B->spptr = 0; 1630 B->info.nz_unneeded = (PetscReal)b->maxnz; 1631 b->keepzeroedrows = PETSC_FALSE; 1632 1633 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1634 "MatStoreValues_SeqBAIJ", 1635 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 1636 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1637 "MatRetrieveValues_SeqBAIJ", 1638 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 1639 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 1640 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 1641 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 1642 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_SeqBAIJ_SeqAIJ_C", 1643 "MatConvert_SeqBAIJ_SeqAIJ", 1644 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 1645 PetscFunctionReturn(0); 1646 } 1647 EXTERN_C_END 1648 1649 #undef __FUNCT__ 1650 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 1651 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1652 { 1653 Mat C; 1654 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 1655 int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr; 1656 1657 PetscFunctionBegin; 1658 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1659 1660 *B = 0; 1661 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1662 ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr); 1663 c = (Mat_SeqBAIJ*)C->data; 1664 1665 c->bs = a->bs; 1666 c->bs2 = a->bs2; 1667 c->mbs = a->mbs; 1668 c->nbs = a->nbs; 1669 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1670 1671 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1672 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1673 for (i=0; i<mbs; i++) { 1674 c->imax[i] = a->imax[i]; 1675 c->ilen[i] = a->ilen[i]; 1676 } 1677 1678 /* allocate the matrix space */ 1679 c->singlemalloc = PETSC_TRUE; 1680 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1681 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1682 c->j = (int*)(c->a + nz*bs2); 1683 c->i = c->j + nz; 1684 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1685 if (mbs > 0) { 1686 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1687 if (cpvalues == MAT_COPY_VALUES) { 1688 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1689 } else { 1690 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1691 } 1692 } 1693 1694 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1695 c->sorted = a->sorted; 1696 c->roworiented = a->roworiented; 1697 c->nonew = a->nonew; 1698 1699 if (a->diag) { 1700 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1701 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1702 for (i=0; i<mbs; i++) { 1703 c->diag[i] = a->diag[i]; 1704 } 1705 } else c->diag = 0; 1706 c->nz = a->nz; 1707 c->maxnz = a->maxnz; 1708 c->solve_work = 0; 1709 C->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1710 c->mult_work = 0; 1711 C->preallocated = PETSC_TRUE; 1712 C->assembled = PETSC_TRUE; 1713 *B = C; 1714 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1715 PetscFunctionReturn(0); 1716 } 1717 1718 EXTERN_C_BEGIN 1719 #undef __FUNCT__ 1720 #define __FUNCT__ "MatLoad_SeqBAIJ" 1721 int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A) 1722 { 1723 Mat_SeqBAIJ *a; 1724 Mat B; 1725 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1726 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1727 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1728 int *masked,nmask,tmp,bs2,ishift; 1729 PetscScalar *aa; 1730 MPI_Comm comm = ((PetscObject)viewer)->comm; 1731 1732 PetscFunctionBegin; 1733 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1734 bs2 = bs*bs; 1735 1736 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1737 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1738 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1739 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1740 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1741 M = header[1]; N = header[2]; nz = header[3]; 1742 1743 if (header[3] < 0) { 1744 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 1745 } 1746 1747 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1748 1749 /* 1750 This code adds extra rows to make sure the number of rows is 1751 divisible by the blocksize 1752 */ 1753 mbs = M/bs; 1754 extra_rows = bs - M + bs*(mbs); 1755 if (extra_rows == bs) extra_rows = 0; 1756 else mbs++; 1757 if (extra_rows) { 1758 PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 1759 } 1760 1761 /* read in row lengths */ 1762 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1763 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1764 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1765 1766 /* read in column indices */ 1767 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1768 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1769 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1770 1771 /* loop over row lengths determining block row lengths */ 1772 ierr = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr); 1773 ierr = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1774 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1775 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1776 masked = mask + mbs; 1777 rowcount = 0; nzcount = 0; 1778 for (i=0; i<mbs; i++) { 1779 nmask = 0; 1780 for (j=0; j<bs; j++) { 1781 kmax = rowlengths[rowcount]; 1782 for (k=0; k<kmax; k++) { 1783 tmp = jj[nzcount++]/bs; 1784 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1785 } 1786 rowcount++; 1787 } 1788 browlengths[i] += nmask; 1789 /* zero out the mask elements we set */ 1790 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1791 } 1792 1793 /* create our matrix */ 1794 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr); 1795 B = *A; 1796 a = (Mat_SeqBAIJ*)B->data; 1797 1798 /* set matrix "i" values */ 1799 a->i[0] = 0; 1800 for (i=1; i<= mbs; i++) { 1801 a->i[i] = a->i[i-1] + browlengths[i-1]; 1802 a->ilen[i-1] = browlengths[i-1]; 1803 } 1804 a->nz = 0; 1805 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 1806 1807 /* read in nonzero values */ 1808 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1809 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1810 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1811 1812 /* set "a" and "j" values into matrix */ 1813 nzcount = 0; jcount = 0; 1814 for (i=0; i<mbs; i++) { 1815 nzcountb = nzcount; 1816 nmask = 0; 1817 for (j=0; j<bs; j++) { 1818 kmax = rowlengths[i*bs+j]; 1819 for (k=0; k<kmax; k++) { 1820 tmp = jj[nzcount++]/bs; 1821 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1822 } 1823 } 1824 /* sort the masked values */ 1825 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1826 1827 /* set "j" values into matrix */ 1828 maskcount = 1; 1829 for (j=0; j<nmask; j++) { 1830 a->j[jcount++] = masked[j]; 1831 mask[masked[j]] = maskcount++; 1832 } 1833 /* set "a" values into matrix */ 1834 ishift = bs2*a->i[i]; 1835 for (j=0; j<bs; j++) { 1836 kmax = rowlengths[i*bs+j]; 1837 for (k=0; k<kmax; k++) { 1838 tmp = jj[nzcountb]/bs ; 1839 block = mask[tmp] - 1; 1840 point = jj[nzcountb] - bs*tmp; 1841 idx = ishift + bs2*block + j + bs*point; 1842 a->a[idx] = (MatScalar)aa[nzcountb++]; 1843 } 1844 } 1845 /* zero out the mask elements we set */ 1846 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1847 } 1848 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1849 1850 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1851 ierr = PetscFree(browlengths);CHKERRQ(ierr); 1852 ierr = PetscFree(aa);CHKERRQ(ierr); 1853 ierr = PetscFree(jj);CHKERRQ(ierr); 1854 ierr = PetscFree(mask);CHKERRQ(ierr); 1855 1856 B->assembled = PETSC_TRUE; 1857 1858 ierr = MatView_Private(B);CHKERRQ(ierr); 1859 PetscFunctionReturn(0); 1860 } 1861 EXTERN_C_END 1862 1863 #undef __FUNCT__ 1864 #define __FUNCT__ "MatCreateSeqBAIJ" 1865 /*@C 1866 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 1867 compressed row) format. For good matrix assembly performance the 1868 user should preallocate the matrix storage by setting the parameter nz 1869 (or the array nnz). By setting these parameters accurately, performance 1870 during matrix assembly can be increased by more than a factor of 50. 1871 1872 Collective on MPI_Comm 1873 1874 Input Parameters: 1875 + comm - MPI communicator, set to PETSC_COMM_SELF 1876 . bs - size of block 1877 . m - number of rows 1878 . n - number of columns 1879 . nz - number of nonzero blocks per block row (same for all rows) 1880 - nnz - array containing the number of nonzero blocks in the various block rows 1881 (possibly different for each block row) or PETSC_NULL 1882 1883 Output Parameter: 1884 . A - the matrix 1885 1886 Options Database Keys: 1887 . -mat_no_unroll - uses code that does not unroll the loops in the 1888 block calculations (much slower) 1889 . -mat_block_size - size of the blocks to use 1890 1891 Level: intermediate 1892 1893 Notes: 1894 A nonzero block is any block that as 1 or more nonzeros in it 1895 1896 The block AIJ format is fully compatible with standard Fortran 77 1897 storage. That is, the stored row and column indices can begin at 1898 either one (as in Fortran) or zero. See the users' manual for details. 1899 1900 Specify the preallocated storage with either nz or nnz (not both). 1901 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1902 allocation. For additional details, see the users manual chapter on 1903 matrices. 1904 1905 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1906 @*/ 1907 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A) 1908 { 1909 int ierr; 1910 1911 PetscFunctionBegin; 1912 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1913 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 1914 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1915 PetscFunctionReturn(0); 1916 } 1917 1918 #undef __FUNCT__ 1919 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 1920 /*@C 1921 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 1922 per row in the matrix. For good matrix assembly performance the 1923 user should preallocate the matrix storage by setting the parameter nz 1924 (or the array nnz). By setting these parameters accurately, performance 1925 during matrix assembly can be increased by more than a factor of 50. 1926 1927 Collective on MPI_Comm 1928 1929 Input Parameters: 1930 + A - the matrix 1931 . bs - size of block 1932 . nz - number of block nonzeros per block row (same for all rows) 1933 - nnz - array containing the number of block nonzeros in the various block rows 1934 (possibly different for each block row) or PETSC_NULL 1935 1936 Options Database Keys: 1937 . -mat_no_unroll - uses code that does not unroll the loops in the 1938 block calculations (much slower) 1939 . -mat_block_size - size of the blocks to use 1940 1941 Level: intermediate 1942 1943 Notes: 1944 The block AIJ format is fully compatible with standard Fortran 77 1945 storage. That is, the stored row and column indices can begin at 1946 either one (as in Fortran) or zero. See the users' manual for details. 1947 1948 Specify the preallocated storage with either nz or nnz (not both). 1949 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1950 allocation. For additional details, see the users manual chapter on 1951 matrices. 1952 1953 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1954 @*/ 1955 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz) 1956 { 1957 Mat_SeqBAIJ *b; 1958 int i,len,ierr,mbs,nbs,bs2,newbs = bs; 1959 PetscTruth flg; 1960 1961 PetscFunctionBegin; 1962 ierr = PetscTypeCompare((PetscObject)B,MATSEQBAIJ,&flg);CHKERRQ(ierr); 1963 if (!flg) PetscFunctionReturn(0); 1964 1965 B->preallocated = PETSC_TRUE; 1966 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr); 1967 if (nnz && newbs != bs) { 1968 SETERRQ(1,"Cannot change blocksize from command line if setting nnz"); 1969 } 1970 bs = newbs; 1971 1972 mbs = B->m/bs; 1973 nbs = B->n/bs; 1974 bs2 = bs*bs; 1975 1976 if (mbs*bs!=B->m || nbs*bs!=B->n) { 1977 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs); 1978 } 1979 1980 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1981 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1982 if (nnz) { 1983 for (i=0; i<mbs; i++) { 1984 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1985 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs); 1986 } 1987 } 1988 1989 b = (Mat_SeqBAIJ*)B->data; 1990 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1991 B->ops->solve = MatSolve_SeqBAIJ_Update; 1992 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 1993 if (!flg) { 1994 switch (bs) { 1995 case 1: 1996 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1997 B->ops->mult = MatMult_SeqBAIJ_1; 1998 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 1999 break; 2000 case 2: 2001 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2002 B->ops->mult = MatMult_SeqBAIJ_2; 2003 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2004 break; 2005 case 3: 2006 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2007 B->ops->mult = MatMult_SeqBAIJ_3; 2008 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2009 break; 2010 case 4: 2011 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2012 B->ops->mult = MatMult_SeqBAIJ_4; 2013 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2014 break; 2015 case 5: 2016 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2017 B->ops->mult = MatMult_SeqBAIJ_5; 2018 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2019 break; 2020 case 6: 2021 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 2022 B->ops->mult = MatMult_SeqBAIJ_6; 2023 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2024 break; 2025 case 7: 2026 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 2027 B->ops->mult = MatMult_SeqBAIJ_7; 2028 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2029 break; 2030 default: 2031 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 2032 B->ops->mult = MatMult_SeqBAIJ_N; 2033 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2034 break; 2035 } 2036 } 2037 b->bs = bs; 2038 b->mbs = mbs; 2039 b->nbs = nbs; 2040 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 2041 if (!nnz) { 2042 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2043 else if (nz <= 0) nz = 1; 2044 for (i=0; i<mbs; i++) b->imax[i] = nz; 2045 nz = nz*mbs; 2046 } else { 2047 nz = 0; 2048 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2049 } 2050 2051 /* allocate the matrix space */ 2052 len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 2053 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 2054 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2055 b->j = (int*)(b->a + nz*bs2); 2056 ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 2057 b->i = b->j + nz; 2058 b->singlemalloc = PETSC_TRUE; 2059 2060 b->i[0] = 0; 2061 for (i=1; i<mbs+1; i++) { 2062 b->i[i] = b->i[i-1] + b->imax[i-1]; 2063 } 2064 2065 /* b->ilen will count nonzeros in each block row so far. */ 2066 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 2067 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 2068 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2069 2070 b->bs = bs; 2071 b->bs2 = bs2; 2072 b->mbs = mbs; 2073 b->nz = 0; 2074 b->maxnz = nz*bs2; 2075 B->info.nz_unneeded = (PetscReal)b->maxnz; 2076 PetscFunctionReturn(0); 2077 } 2078 2079