1 /*$Id: baij.c,v 1.245 2001/08/07 03:02:55 balay Exp bsmith $*/ 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" 8 #include "src/vec/vecimpl.h" 9 #include "src/inline/spops.h" 10 #include "petscsys.h" /*I "petscmat.h" I*/ 11 12 /* 13 Special version for Fun3d sequential benchmark 14 */ 15 #if defined(PETSC_HAVE_FORTRAN_CAPS) 16 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 17 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 18 #define matsetvaluesblocked4_ matsetvaluesblocked4 19 #endif 20 21 #undef __FUNCT__ 22 #define __FUNCT__ "matsetvaluesblocked4_" 23 void matsetvaluesblocked4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v) 24 { 25 Mat A = *AA; 26 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 27 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 28 int *ai=a->i,*ailen=a->ilen; 29 int *aj=a->j,stepval; 30 PetscScalar *value = v; 31 MatScalar *ap,*aa = a->a,*bap; 32 33 PetscFunctionBegin; 34 stepval = (n-1)*4; 35 for (k=0; k<m; k++) { /* loop over added rows */ 36 row = im[k]; 37 rp = aj + ai[row]; 38 ap = aa + 16*ai[row]; 39 nrow = ailen[row]; 40 low = 0; 41 for (l=0; l<n; l++) { /* loop over added columns */ 42 col = in[l]; 43 value = v + k*(stepval+4)*4 + l*4; 44 low = 0; high = nrow; 45 while (high-low > 7) { 46 t = (low+high)/2; 47 if (rp[t] > col) high = t; 48 else low = t; 49 } 50 for (i=low; i<high; i++) { 51 if (rp[i] > col) break; 52 if (rp[i] == col) { 53 bap = ap + 16*i; 54 for (ii=0; ii<4; ii++,value+=stepval) { 55 for (jj=ii; jj<16; jj+=4) { 56 bap[jj] += *value++; 57 } 58 } 59 goto noinsert2; 60 } 61 } 62 N = nrow++ - 1; 63 /* shift up all the later entries in this row */ 64 for (ii=N; ii>=i; ii--) { 65 rp[ii+1] = rp[ii]; 66 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 67 } 68 if (N >= i) { 69 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 70 } 71 rp[i] = col; 72 bap = ap + 16*i; 73 for (ii=0; ii<4; ii++,value+=stepval) { 74 for (jj=ii; jj<16; jj+=4) { 75 bap[jj] = *value++; 76 } 77 } 78 noinsert2:; 79 low = i; 80 } 81 ailen[row] = nrow; 82 } 83 } 84 85 #if defined(PETSC_HAVE_FORTRAN_CAPS) 86 #define matsetvalues4_ MATSETVALUES4 87 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 88 #define matsetvalues4_ matsetvalues4 89 #endif 90 91 #undef __FUNCT__ 92 #define __FUNCT__ "MatSetValues4_" 93 void matsetvalues4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v) 94 { 95 Mat A = *AA; 96 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 97 int *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 98 int *ai=a->i,*ailen=a->ilen; 99 int *aj=a->j,brow,bcol; 100 int ridx,cidx; 101 MatScalar *ap,value,*aa=a->a,*bap; 102 103 PetscFunctionBegin; 104 for (k=0; k<m; k++) { /* loop over added rows */ 105 row = im[k]; brow = row/4; 106 rp = aj + ai[brow]; 107 ap = aa + 16*ai[brow]; 108 nrow = ailen[brow]; 109 low = 0; 110 for (l=0; l<n; l++) { /* loop over added columns */ 111 col = in[l]; bcol = col/4; 112 ridx = row % 4; cidx = col % 4; 113 value = v[l + k*n]; 114 low = 0; high = nrow; 115 while (high-low > 7) { 116 t = (low+high)/2; 117 if (rp[t] > bcol) high = t; 118 else low = t; 119 } 120 for (i=low; i<high; i++) { 121 if (rp[i] > bcol) break; 122 if (rp[i] == bcol) { 123 bap = ap + 16*i + 4*cidx + ridx; 124 *bap += value; 125 goto noinsert1; 126 } 127 } 128 N = nrow++ - 1; 129 /* shift up all the later entries in this row */ 130 for (ii=N; ii>=i; ii--) { 131 rp[ii+1] = rp[ii]; 132 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 133 } 134 if (N>=i) { 135 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 136 } 137 rp[i] = bcol; 138 ap[16*i + 4*cidx + ridx] = value; 139 noinsert1:; 140 low = i; 141 } 142 ailen[brow] = nrow; 143 } 144 } 145 146 /* UGLY, ugly, ugly 147 When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 148 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 149 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 150 converts the entries into single precision and then calls ..._MatScalar() to put them 151 into the single precision data structures. 152 */ 153 #if defined(PETSC_USE_MAT_SINGLE) 154 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 155 #else 156 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 157 #endif 158 #if defined(PETSC_HAVE_DSCPACK) 159 EXTERN int MatUseDSCPACK_MPIBAIJ(Mat); 160 #endif 161 162 #define CHUNKSIZE 10 163 164 /* 165 Checks for missing diagonals 166 */ 167 #undef __FUNCT__ 168 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 169 int MatMissingDiagonal_SeqBAIJ(Mat A) 170 { 171 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 172 int *diag,*jj = a->j,i,ierr; 173 174 PetscFunctionBegin; 175 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 176 diag = a->diag; 177 for (i=0; i<a->mbs; i++) { 178 if (jj[diag[i]] != i) { 179 SETERRQ1(1,"Matrix is missing diagonal number %d",i); 180 } 181 } 182 PetscFunctionReturn(0); 183 } 184 185 #undef __FUNCT__ 186 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 187 int MatMarkDiagonal_SeqBAIJ(Mat A) 188 { 189 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 190 int i,j,*diag,m = a->mbs,ierr; 191 192 PetscFunctionBegin; 193 if (a->diag) PetscFunctionReturn(0); 194 195 ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr); 196 PetscLogObjectMemory(A,(m+1)*sizeof(int)); 197 for (i=0; i<m; i++) { 198 diag[i] = a->i[i+1]; 199 for (j=a->i[i]; j<a->i[i+1]; j++) { 200 if (a->j[j] == i) { 201 diag[i] = j; 202 break; 203 } 204 } 205 } 206 a->diag = diag; 207 PetscFunctionReturn(0); 208 } 209 210 211 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 212 213 #undef __FUNCT__ 214 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 215 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 216 { 217 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 218 int ierr,n = a->mbs,i; 219 220 PetscFunctionBegin; 221 *nn = n; 222 if (!ia) PetscFunctionReturn(0); 223 if (symmetric) { 224 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 225 } else if (oshift == 1) { 226 /* temporarily add 1 to i and j indices */ 227 int nz = a->i[n]; 228 for (i=0; i<nz; i++) a->j[i]++; 229 for (i=0; i<n+1; i++) a->i[i]++; 230 *ia = a->i; *ja = a->j; 231 } else { 232 *ia = a->i; *ja = a->j; 233 } 234 235 PetscFunctionReturn(0); 236 } 237 238 #undef __FUNCT__ 239 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 240 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 241 { 242 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 243 int i,n = a->mbs,ierr; 244 245 PetscFunctionBegin; 246 if (!ia) PetscFunctionReturn(0); 247 if (symmetric) { 248 ierr = PetscFree(*ia);CHKERRQ(ierr); 249 ierr = PetscFree(*ja);CHKERRQ(ierr); 250 } else if (oshift == 1) { 251 int nz = a->i[n]-1; 252 for (i=0; i<nz; i++) a->j[i]--; 253 for (i=0; i<n+1; i++) a->i[i]--; 254 } 255 PetscFunctionReturn(0); 256 } 257 258 #undef __FUNCT__ 259 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ" 260 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs) 261 { 262 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 263 264 PetscFunctionBegin; 265 *bs = baij->bs; 266 PetscFunctionReturn(0); 267 } 268 269 270 #undef __FUNCT__ 271 #define __FUNCT__ "MatDestroy_SeqBAIJ" 272 int MatDestroy_SeqBAIJ(Mat A) 273 { 274 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 275 int ierr; 276 277 PetscFunctionBegin; 278 #if defined(PETSC_USE_LOG) 279 PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz); 280 #endif 281 ierr = PetscFree(a->a);CHKERRQ(ierr); 282 if (!a->singlemalloc) { 283 ierr = PetscFree(a->i);CHKERRQ(ierr); 284 ierr = PetscFree(a->j);CHKERRQ(ierr); 285 } 286 if (a->row) { 287 ierr = ISDestroy(a->row);CHKERRQ(ierr); 288 } 289 if (a->col) { 290 ierr = ISDestroy(a->col);CHKERRQ(ierr); 291 } 292 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 293 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 294 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 295 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 296 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 297 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 298 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 299 #if defined(PETSC_USE_MAT_SINGLE) 300 if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);} 301 #endif 302 ierr = PetscFree(a);CHKERRQ(ierr); 303 PetscFunctionReturn(0); 304 } 305 306 #undef __FUNCT__ 307 #define __FUNCT__ "MatSetOption_SeqBAIJ" 308 int MatSetOption_SeqBAIJ(Mat A,MatOption op) 309 { 310 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 311 312 PetscFunctionBegin; 313 switch (op) { 314 case MAT_ROW_ORIENTED: 315 a->roworiented = PETSC_TRUE; 316 break; 317 case MAT_COLUMN_ORIENTED: 318 a->roworiented = PETSC_FALSE; 319 break; 320 case MAT_COLUMNS_SORTED: 321 a->sorted = PETSC_TRUE; 322 break; 323 case MAT_COLUMNS_UNSORTED: 324 a->sorted = PETSC_FALSE; 325 break; 326 case MAT_KEEP_ZEROED_ROWS: 327 a->keepzeroedrows = PETSC_TRUE; 328 break; 329 case MAT_NO_NEW_NONZERO_LOCATIONS: 330 a->nonew = 1; 331 break; 332 case MAT_NEW_NONZERO_LOCATION_ERR: 333 a->nonew = -1; 334 break; 335 case MAT_NEW_NONZERO_ALLOCATION_ERR: 336 a->nonew = -2; 337 break; 338 case MAT_YES_NEW_NONZERO_LOCATIONS: 339 a->nonew = 0; 340 break; 341 case MAT_ROWS_SORTED: 342 case MAT_ROWS_UNSORTED: 343 case MAT_YES_NEW_DIAGONALS: 344 case MAT_IGNORE_OFF_PROC_ENTRIES: 345 case MAT_USE_HASH_TABLE: 346 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n"); 347 break; 348 case MAT_NO_NEW_DIAGONALS: 349 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 350 break; 351 default: 352 SETERRQ(PETSC_ERR_SUP,"unknown option"); 353 } 354 PetscFunctionReturn(0); 355 } 356 357 #undef __FUNCT__ 358 #define __FUNCT__ "MatGetRow_SeqBAIJ" 359 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 360 { 361 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 362 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr; 363 MatScalar *aa,*aa_i; 364 PetscScalar *v_i; 365 366 PetscFunctionBegin; 367 bs = a->bs; 368 ai = a->i; 369 aj = a->j; 370 aa = a->a; 371 bs2 = a->bs2; 372 373 if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range"); 374 375 bn = row/bs; /* Block number */ 376 bp = row % bs; /* Block Position */ 377 M = ai[bn+1] - ai[bn]; 378 *nz = bs*M; 379 380 if (v) { 381 *v = 0; 382 if (*nz) { 383 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 384 for (i=0; i<M; i++) { /* for each block in the block row */ 385 v_i = *v + i*bs; 386 aa_i = aa + bs2*(ai[bn] + i); 387 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 388 } 389 } 390 } 391 392 if (idx) { 393 *idx = 0; 394 if (*nz) { 395 ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr); 396 for (i=0; i<M; i++) { /* for each block in the block row */ 397 idx_i = *idx + i*bs; 398 itmp = bs*aj[ai[bn] + i]; 399 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 400 } 401 } 402 } 403 PetscFunctionReturn(0); 404 } 405 406 #undef __FUNCT__ 407 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 408 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 409 { 410 int ierr; 411 412 PetscFunctionBegin; 413 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 414 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 415 PetscFunctionReturn(0); 416 } 417 418 #undef __FUNCT__ 419 #define __FUNCT__ "MatTranspose_SeqBAIJ" 420 int MatTranspose_SeqBAIJ(Mat A,Mat *B) 421 { 422 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 423 Mat C; 424 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 425 int *rows,*cols,bs2=a->bs2; 426 PetscScalar *array; 427 428 PetscFunctionBegin; 429 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 430 ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr); 431 ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr); 432 433 #if defined(PETSC_USE_MAT_SINGLE) 434 ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 435 for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 436 #else 437 array = a->a; 438 #endif 439 440 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 441 ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr); 442 ierr = PetscFree(col);CHKERRQ(ierr); 443 ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr); 444 cols = rows + bs; 445 for (i=0; i<mbs; i++) { 446 cols[0] = i*bs; 447 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 448 len = ai[i+1] - ai[i]; 449 for (j=0; j<len; j++) { 450 rows[0] = (*aj++)*bs; 451 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 452 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 453 array += bs2; 454 } 455 } 456 ierr = PetscFree(rows);CHKERRQ(ierr); 457 #if defined(PETSC_USE_MAT_SINGLE) 458 ierr = PetscFree(array); 459 #endif 460 461 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 462 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 463 464 if (B) { 465 *B = C; 466 } else { 467 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 468 } 469 PetscFunctionReturn(0); 470 } 471 472 #undef __FUNCT__ 473 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 474 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 475 { 476 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 477 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 478 PetscScalar *aa; 479 FILE *file; 480 481 PetscFunctionBegin; 482 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 483 ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 484 col_lens[0] = MAT_FILE_COOKIE; 485 486 col_lens[1] = A->m; 487 col_lens[2] = A->n; 488 col_lens[3] = a->nz*bs2; 489 490 /* store lengths of each row and write (including header) to file */ 491 count = 0; 492 for (i=0; i<a->mbs; i++) { 493 for (j=0; j<bs; j++) { 494 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 495 } 496 } 497 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 498 ierr = PetscFree(col_lens);CHKERRQ(ierr); 499 500 /* store column indices (zero start index) */ 501 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 502 count = 0; 503 for (i=0; i<a->mbs; i++) { 504 for (j=0; j<bs; j++) { 505 for (k=a->i[i]; k<a->i[i+1]; k++) { 506 for (l=0; l<bs; l++) { 507 jj[count++] = bs*a->j[k] + l; 508 } 509 } 510 } 511 } 512 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr); 513 ierr = PetscFree(jj);CHKERRQ(ierr); 514 515 /* store nonzero values */ 516 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 517 count = 0; 518 for (i=0; i<a->mbs; i++) { 519 for (j=0; j<bs; j++) { 520 for (k=a->i[i]; k<a->i[i+1]; k++) { 521 for (l=0; l<bs; l++) { 522 aa[count++] = a->a[bs2*k + l*bs + j]; 523 } 524 } 525 } 526 } 527 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr); 528 ierr = PetscFree(aa);CHKERRQ(ierr); 529 530 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 531 if (file) { 532 fprintf(file,"-matload_block_size %d\n",a->bs); 533 } 534 PetscFunctionReturn(0); 535 } 536 537 extern int MatMPIBAIJFactorInfo_DSCPACK(Mat,PetscViewer); 538 539 #undef __FUNCT__ 540 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 541 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 542 { 543 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 544 int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 545 PetscViewerFormat format; 546 547 PetscFunctionBegin; 548 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 549 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 550 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 551 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 552 Mat aij; 553 ierr = MatConvert(A,MATSEQAIJ,&aij);CHKERRQ(ierr); 554 ierr = MatView(aij,viewer);CHKERRQ(ierr); 555 ierr = MatDestroy(aij);CHKERRQ(ierr); 556 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 557 #if defined(PETSC_HAVE_DSCPACK) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX) 558 ierr = MatMPIBAIJFactorInfo_DSCPACK(A,viewer);CHKERRQ(ierr); 559 #endif 560 PetscFunctionReturn(0); 561 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 562 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 563 for (i=0; i<a->mbs; i++) { 564 for (j=0; j<bs; j++) { 565 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 566 for (k=a->i[i]; k<a->i[i+1]; k++) { 567 for (l=0; l<bs; l++) { 568 #if defined(PETSC_USE_COMPLEX) 569 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 570 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %gi) ",bs*a->j[k]+l, 571 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 572 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 573 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %gi) ",bs*a->j[k]+l, 574 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 575 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 576 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 577 } 578 #else 579 if (a->a[bs2*k + l*bs + j] != 0.0) { 580 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 581 } 582 #endif 583 } 584 } 585 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 586 } 587 } 588 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 589 } else { 590 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 591 for (i=0; i<a->mbs; i++) { 592 for (j=0; j<bs; j++) { 593 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 594 for (k=a->i[i]; k<a->i[i+1]; k++) { 595 for (l=0; l<bs; l++) { 596 #if defined(PETSC_USE_COMPLEX) 597 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 598 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 599 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 600 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 601 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 602 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 603 } else { 604 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 605 } 606 #else 607 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 608 #endif 609 } 610 } 611 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 612 } 613 } 614 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 615 } 616 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 617 PetscFunctionReturn(0); 618 } 619 620 #undef __FUNCT__ 621 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 622 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 623 { 624 Mat A = (Mat) Aa; 625 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 626 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2; 627 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 628 MatScalar *aa; 629 PetscViewer viewer; 630 631 PetscFunctionBegin; 632 633 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 634 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 635 636 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 637 638 /* loop over matrix elements drawing boxes */ 639 color = PETSC_DRAW_BLUE; 640 for (i=0,row=0; i<mbs; i++,row+=bs) { 641 for (j=a->i[i]; j<a->i[i+1]; j++) { 642 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 643 x_l = a->j[j]*bs; x_r = x_l + 1.0; 644 aa = a->a + j*bs2; 645 for (k=0; k<bs; k++) { 646 for (l=0; l<bs; l++) { 647 if (PetscRealPart(*aa++) >= 0.) continue; 648 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 649 } 650 } 651 } 652 } 653 color = PETSC_DRAW_CYAN; 654 for (i=0,row=0; i<mbs; i++,row+=bs) { 655 for (j=a->i[i]; j<a->i[i+1]; j++) { 656 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 657 x_l = a->j[j]*bs; x_r = x_l + 1.0; 658 aa = a->a + j*bs2; 659 for (k=0; k<bs; k++) { 660 for (l=0; l<bs; l++) { 661 if (PetscRealPart(*aa++) != 0.) continue; 662 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 663 } 664 } 665 } 666 } 667 668 color = PETSC_DRAW_RED; 669 for (i=0,row=0; i<mbs; i++,row+=bs) { 670 for (j=a->i[i]; j<a->i[i+1]; j++) { 671 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 672 x_l = a->j[j]*bs; x_r = x_l + 1.0; 673 aa = a->a + j*bs2; 674 for (k=0; k<bs; k++) { 675 for (l=0; l<bs; l++) { 676 if (PetscRealPart(*aa++) <= 0.) continue; 677 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 678 } 679 } 680 } 681 } 682 PetscFunctionReturn(0); 683 } 684 685 #undef __FUNCT__ 686 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 687 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 688 { 689 int ierr; 690 PetscReal xl,yl,xr,yr,w,h; 691 PetscDraw draw; 692 PetscTruth isnull; 693 694 PetscFunctionBegin; 695 696 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 697 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 698 699 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 700 xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0; 701 xr += w; yr += h; xl = -w; yl = -h; 702 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 703 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 704 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 705 PetscFunctionReturn(0); 706 } 707 708 #undef __FUNCT__ 709 #define __FUNCT__ "MatView_SeqBAIJ" 710 int MatView_SeqBAIJ(Mat A,PetscViewer viewer) 711 { 712 int ierr; 713 PetscTruth issocket,isascii,isbinary,isdraw; 714 715 PetscFunctionBegin; 716 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 717 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 718 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 719 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 720 if (issocket) { 721 SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported"); 722 } else if (isascii){ 723 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 724 } else if (isbinary) { 725 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 726 } else if (isdraw) { 727 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 728 } else { 729 SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name); 730 } 731 PetscFunctionReturn(0); 732 } 733 734 735 #undef __FUNCT__ 736 #define __FUNCT__ "MatGetValues_SeqBAIJ" 737 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v) 738 { 739 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 740 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 741 int *ai = a->i,*ailen = a->ilen; 742 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 743 MatScalar *ap,*aa = a->a,zero = 0.0; 744 745 PetscFunctionBegin; 746 for (k=0; k<m; k++) { /* loop over rows */ 747 row = im[k]; brow = row/bs; 748 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 749 if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 750 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 751 nrow = ailen[brow]; 752 for (l=0; l<n; l++) { /* loop over columns */ 753 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 754 if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 755 col = in[l] ; 756 bcol = col/bs; 757 cidx = col%bs; 758 ridx = row%bs; 759 high = nrow; 760 low = 0; /* assume unsorted */ 761 while (high-low > 5) { 762 t = (low+high)/2; 763 if (rp[t] > bcol) high = t; 764 else low = t; 765 } 766 for (i=low; i<high; i++) { 767 if (rp[i] > bcol) break; 768 if (rp[i] == bcol) { 769 *v++ = ap[bs2*i+bs*cidx+ridx]; 770 goto finished; 771 } 772 } 773 *v++ = zero; 774 finished:; 775 } 776 } 777 PetscFunctionReturn(0); 778 } 779 780 #if defined(PETSC_USE_MAT_SINGLE) 781 #undef __FUNCT__ 782 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 783 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 784 { 785 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 786 int ierr,i,N = m*n*b->bs2; 787 MatScalar *vsingle; 788 789 PetscFunctionBegin; 790 if (N > b->setvalueslen) { 791 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 792 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 793 b->setvalueslen = N; 794 } 795 vsingle = b->setvaluescopy; 796 for (i=0; i<N; i++) { 797 vsingle[i] = v[i]; 798 } 799 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 800 PetscFunctionReturn(0); 801 } 802 #endif 803 804 805 #undef __FUNCT__ 806 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 807 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,int *im,int n,int *in,MatScalar *v,InsertMode is) 808 { 809 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 810 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 811 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 812 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 813 PetscTruth roworiented=a->roworiented; 814 MatScalar *value = v,*ap,*aa = a->a,*bap; 815 816 PetscFunctionBegin; 817 if (roworiented) { 818 stepval = (n-1)*bs; 819 } else { 820 stepval = (m-1)*bs; 821 } 822 for (k=0; k<m; k++) { /* loop over added rows */ 823 row = im[k]; 824 if (row < 0) continue; 825 #if defined(PETSC_USE_BOPT_g) 826 if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 827 #endif 828 rp = aj + ai[row]; 829 ap = aa + bs2*ai[row]; 830 rmax = imax[row]; 831 nrow = ailen[row]; 832 low = 0; 833 for (l=0; l<n; l++) { /* loop over added columns */ 834 if (in[l] < 0) continue; 835 #if defined(PETSC_USE_BOPT_g) 836 if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 837 #endif 838 col = in[l]; 839 if (roworiented) { 840 value = v + k*(stepval+bs)*bs + l*bs; 841 } else { 842 value = v + l*(stepval+bs)*bs + k*bs; 843 } 844 if (!sorted) low = 0; high = nrow; 845 while (high-low > 7) { 846 t = (low+high)/2; 847 if (rp[t] > col) high = t; 848 else low = t; 849 } 850 for (i=low; i<high; i++) { 851 if (rp[i] > col) break; 852 if (rp[i] == col) { 853 bap = ap + bs2*i; 854 if (roworiented) { 855 if (is == ADD_VALUES) { 856 for (ii=0; ii<bs; ii++,value+=stepval) { 857 for (jj=ii; jj<bs2; jj+=bs) { 858 bap[jj] += *value++; 859 } 860 } 861 } else { 862 for (ii=0; ii<bs; ii++,value+=stepval) { 863 for (jj=ii; jj<bs2; jj+=bs) { 864 bap[jj] = *value++; 865 } 866 } 867 } 868 } else { 869 if (is == ADD_VALUES) { 870 for (ii=0; ii<bs; ii++,value+=stepval) { 871 for (jj=0; jj<bs; jj++) { 872 *bap++ += *value++; 873 } 874 } 875 } else { 876 for (ii=0; ii<bs; ii++,value+=stepval) { 877 for (jj=0; jj<bs; jj++) { 878 *bap++ = *value++; 879 } 880 } 881 } 882 } 883 goto noinsert2; 884 } 885 } 886 if (nonew == 1) goto noinsert2; 887 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 888 if (nrow >= rmax) { 889 /* there is no extra room in row, therefore enlarge */ 890 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 891 MatScalar *new_a; 892 893 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 894 895 /* malloc new storage space */ 896 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 897 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 898 new_j = (int*)(new_a + bs2*new_nz); 899 new_i = new_j + new_nz; 900 901 /* copy over old data into new slots */ 902 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 903 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 904 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 905 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 906 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 907 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 908 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 909 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 910 /* free up old matrix storage */ 911 ierr = PetscFree(a->a);CHKERRQ(ierr); 912 if (!a->singlemalloc) { 913 ierr = PetscFree(a->i);CHKERRQ(ierr); 914 ierr = PetscFree(a->j);CHKERRQ(ierr); 915 } 916 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 917 a->singlemalloc = PETSC_TRUE; 918 919 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 920 rmax = imax[row] = imax[row] + CHUNKSIZE; 921 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 922 a->maxnz += bs2*CHUNKSIZE; 923 a->reallocs++; 924 a->nz++; 925 } 926 N = nrow++ - 1; 927 /* shift up all the later entries in this row */ 928 for (ii=N; ii>=i; ii--) { 929 rp[ii+1] = rp[ii]; 930 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 931 } 932 if (N >= i) { 933 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 934 } 935 rp[i] = col; 936 bap = ap + bs2*i; 937 if (roworiented) { 938 for (ii=0; ii<bs; ii++,value+=stepval) { 939 for (jj=ii; jj<bs2; jj+=bs) { 940 bap[jj] = *value++; 941 } 942 } 943 } else { 944 for (ii=0; ii<bs; ii++,value+=stepval) { 945 for (jj=0; jj<bs; jj++) { 946 *bap++ = *value++; 947 } 948 } 949 } 950 noinsert2:; 951 low = i; 952 } 953 ailen[row] = nrow; 954 } 955 PetscFunctionReturn(0); 956 } 957 958 #undef __FUNCT__ 959 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 960 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 961 { 962 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 963 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 964 int m = A->m,*ip,N,*ailen = a->ilen; 965 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 966 MatScalar *aa = a->a,*ap; 967 #if defined(PETSC_HAVE_DSCPACK) 968 PetscTruth flag; 969 #endif 970 971 PetscFunctionBegin; 972 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 973 974 if (m) rmax = ailen[0]; 975 for (i=1; i<mbs; i++) { 976 /* move each row back by the amount of empty slots (fshift) before it*/ 977 fshift += imax[i-1] - ailen[i-1]; 978 rmax = PetscMax(rmax,ailen[i]); 979 if (fshift) { 980 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 981 N = ailen[i]; 982 for (j=0; j<N; j++) { 983 ip[j-fshift] = ip[j]; 984 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 985 } 986 } 987 ai[i] = ai[i-1] + ailen[i-1]; 988 } 989 if (mbs) { 990 fshift += imax[mbs-1] - ailen[mbs-1]; 991 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 992 } 993 /* reset ilen and imax for each row */ 994 for (i=0; i<mbs; i++) { 995 ailen[i] = imax[i] = ai[i+1] - ai[i]; 996 } 997 a->nz = ai[mbs]; 998 999 /* diagonals may have moved, so kill the diagonal pointers */ 1000 if (fshift && a->diag) { 1001 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1002 PetscLogObjectMemory(A,-(mbs+1)*sizeof(int)); 1003 a->diag = 0; 1004 } 1005 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2); 1006 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",a->reallocs); 1007 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 1008 a->reallocs = 0; 1009 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1010 1011 #if defined(PETSC_HAVE_DSCPACK) 1012 ierr = PetscOptionsHasName(A->prefix,"-mat_baij_dscpack",&flag);CHKERRQ(ierr); 1013 if (flag) { ierr = MatUseDSCPACK_MPIBAIJ(A);CHKERRQ(ierr); } 1014 #endif 1015 1016 PetscFunctionReturn(0); 1017 } 1018 1019 1020 1021 /* 1022 This function returns an array of flags which indicate the locations of contiguous 1023 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1024 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1025 Assume: sizes should be long enough to hold all the values. 1026 */ 1027 #undef __FUNCT__ 1028 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1029 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 1030 { 1031 int i,j,k,row; 1032 PetscTruth flg; 1033 1034 PetscFunctionBegin; 1035 for (i=0,j=0; i<n; j++) { 1036 row = idx[i]; 1037 if (row%bs!=0) { /* Not the begining of a block */ 1038 sizes[j] = 1; 1039 i++; 1040 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1041 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1042 i++; 1043 } else { /* Begining of the block, so check if the complete block exists */ 1044 flg = PETSC_TRUE; 1045 for (k=1; k<bs; k++) { 1046 if (row+k != idx[i+k]) { /* break in the block */ 1047 flg = PETSC_FALSE; 1048 break; 1049 } 1050 } 1051 if (flg == PETSC_TRUE) { /* No break in the bs */ 1052 sizes[j] = bs; 1053 i+= bs; 1054 } else { 1055 sizes[j] = 1; 1056 i++; 1057 } 1058 } 1059 } 1060 *bs_max = j; 1061 PetscFunctionReturn(0); 1062 } 1063 1064 #undef __FUNCT__ 1065 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1066 int MatZeroRows_SeqBAIJ(Mat A,IS is,PetscScalar *diag) 1067 { 1068 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1069 int ierr,i,j,k,count,is_n,*is_idx,*rows; 1070 int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max; 1071 PetscScalar zero = 0.0; 1072 MatScalar *aa; 1073 1074 PetscFunctionBegin; 1075 /* Make a copy of the IS and sort it */ 1076 ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr); 1077 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1078 1079 /* allocate memory for rows,sizes */ 1080 ierr = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr); 1081 sizes = rows + is_n; 1082 1083 /* copy IS values to rows, and sort them */ 1084 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1085 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1086 if (baij->keepzeroedrows) { 1087 for (i=0; i<is_n; i++) { sizes[i] = 1; } 1088 bs_max = is_n; 1089 } else { 1090 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 1091 } 1092 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 1093 1094 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1095 row = rows[j]; 1096 if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row); 1097 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1098 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 1099 if (sizes[i] == bs && !baij->keepzeroedrows) { 1100 if (diag) { 1101 if (baij->ilen[row/bs] > 0) { 1102 baij->ilen[row/bs] = 1; 1103 baij->j[baij->i[row/bs]] = row/bs; 1104 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 1105 } 1106 /* Now insert all the diagonal values for this bs */ 1107 for (k=0; k<bs; k++) { 1108 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 1109 } 1110 } else { /* (!diag) */ 1111 baij->ilen[row/bs] = 0; 1112 } /* end (!diag) */ 1113 } else { /* (sizes[i] != bs) */ 1114 #if defined (PETSC_USE_DEBUG) 1115 if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1"); 1116 #endif 1117 for (k=0; k<count; k++) { 1118 aa[0] = zero; 1119 aa += bs; 1120 } 1121 if (diag) { 1122 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1123 } 1124 } 1125 } 1126 1127 ierr = PetscFree(rows);CHKERRQ(ierr); 1128 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1129 PetscFunctionReturn(0); 1130 } 1131 1132 #undef __FUNCT__ 1133 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1134 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is) 1135 { 1136 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1137 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 1138 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1139 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 1140 int ridx,cidx,bs2=a->bs2,ierr; 1141 PetscTruth roworiented=a->roworiented; 1142 MatScalar *ap,value,*aa=a->a,*bap; 1143 1144 PetscFunctionBegin; 1145 for (k=0; k<m; k++) { /* loop over added rows */ 1146 row = im[k]; brow = row/bs; 1147 if (row < 0) continue; 1148 #if defined(PETSC_USE_BOPT_g) 1149 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m); 1150 #endif 1151 rp = aj + ai[brow]; 1152 ap = aa + bs2*ai[brow]; 1153 rmax = imax[brow]; 1154 nrow = ailen[brow]; 1155 low = 0; 1156 for (l=0; l<n; l++) { /* loop over added columns */ 1157 if (in[l] < 0) continue; 1158 #if defined(PETSC_USE_BOPT_g) 1159 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n); 1160 #endif 1161 col = in[l]; bcol = col/bs; 1162 ridx = row % bs; cidx = col % bs; 1163 if (roworiented) { 1164 value = v[l + k*n]; 1165 } else { 1166 value = v[k + l*m]; 1167 } 1168 if (!sorted) low = 0; high = nrow; 1169 while (high-low > 7) { 1170 t = (low+high)/2; 1171 if (rp[t] > bcol) high = t; 1172 else low = t; 1173 } 1174 for (i=low; i<high; i++) { 1175 if (rp[i] > bcol) break; 1176 if (rp[i] == bcol) { 1177 bap = ap + bs2*i + bs*cidx + ridx; 1178 if (is == ADD_VALUES) *bap += value; 1179 else *bap = value; 1180 goto noinsert1; 1181 } 1182 } 1183 if (nonew == 1) goto noinsert1; 1184 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 1185 if (nrow >= rmax) { 1186 /* there is no extra room in row, therefore enlarge */ 1187 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1188 MatScalar *new_a; 1189 1190 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 1191 1192 /* Malloc new storage space */ 1193 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1194 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1195 new_j = (int*)(new_a + bs2*new_nz); 1196 new_i = new_j + new_nz; 1197 1198 /* copy over old data into new slots */ 1199 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 1200 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1201 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 1202 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1203 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 1204 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1205 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1206 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1207 /* free up old matrix storage */ 1208 ierr = PetscFree(a->a);CHKERRQ(ierr); 1209 if (!a->singlemalloc) { 1210 ierr = PetscFree(a->i);CHKERRQ(ierr); 1211 ierr = PetscFree(a->j);CHKERRQ(ierr); 1212 } 1213 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1214 a->singlemalloc = PETSC_TRUE; 1215 1216 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1217 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1218 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 1219 a->maxnz += bs2*CHUNKSIZE; 1220 a->reallocs++; 1221 a->nz++; 1222 } 1223 N = nrow++ - 1; 1224 /* shift up all the later entries in this row */ 1225 for (ii=N; ii>=i; ii--) { 1226 rp[ii+1] = rp[ii]; 1227 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1228 } 1229 if (N>=i) { 1230 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1231 } 1232 rp[i] = bcol; 1233 ap[bs2*i + bs*cidx + ridx] = value; 1234 noinsert1:; 1235 low = i; 1236 } 1237 ailen[brow] = nrow; 1238 } 1239 PetscFunctionReturn(0); 1240 } 1241 1242 1243 #undef __FUNCT__ 1244 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1245 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info) 1246 { 1247 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1248 Mat outA; 1249 int ierr; 1250 PetscTruth row_identity,col_identity; 1251 1252 PetscFunctionBegin; 1253 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1254 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1255 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1256 if (!row_identity || !col_identity) { 1257 SETERRQ(1,"Row and column permutations must be identity for in-place ILU"); 1258 } 1259 1260 outA = inA; 1261 inA->factor = FACTOR_LU; 1262 1263 if (!a->diag) { 1264 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1265 } 1266 1267 a->row = row; 1268 a->col = col; 1269 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1270 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1271 1272 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1273 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1274 PetscLogObjectParent(inA,a->icol); 1275 1276 /* 1277 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1278 for ILU(0) factorization with natural ordering 1279 */ 1280 if (a->bs < 8) { 1281 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1282 } else { 1283 if (!a->solve_work) { 1284 ierr = PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1285 PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar)); 1286 } 1287 } 1288 1289 ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr); 1290 1291 PetscFunctionReturn(0); 1292 } 1293 #undef __FUNCT__ 1294 #define __FUNCT__ "MatPrintHelp_SeqBAIJ" 1295 int MatPrintHelp_SeqBAIJ(Mat A) 1296 { 1297 static PetscTruth called = PETSC_FALSE; 1298 MPI_Comm comm = A->comm; 1299 int ierr; 1300 1301 PetscFunctionBegin; 1302 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1303 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1304 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1305 PetscFunctionReturn(0); 1306 } 1307 1308 EXTERN_C_BEGIN 1309 #undef __FUNCT__ 1310 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1311 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices) 1312 { 1313 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1314 int i,nz,nbs; 1315 1316 PetscFunctionBegin; 1317 nz = baij->maxnz/baij->bs2; 1318 nbs = baij->nbs; 1319 for (i=0; i<nz; i++) { 1320 baij->j[i] = indices[i]; 1321 } 1322 baij->nz = nz; 1323 for (i=0; i<nbs; i++) { 1324 baij->ilen[i] = baij->imax[i]; 1325 } 1326 1327 PetscFunctionReturn(0); 1328 } 1329 EXTERN_C_END 1330 1331 #undef __FUNCT__ 1332 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1333 /*@ 1334 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1335 in the matrix. 1336 1337 Input Parameters: 1338 + mat - the SeqBAIJ matrix 1339 - indices - the column indices 1340 1341 Level: advanced 1342 1343 Notes: 1344 This can be called if you have precomputed the nonzero structure of the 1345 matrix and want to provide it to the matrix object to improve the performance 1346 of the MatSetValues() operation. 1347 1348 You MUST have set the correct numbers of nonzeros per row in the call to 1349 MatCreateSeqBAIJ(). 1350 1351 MUST be called before any calls to MatSetValues(); 1352 1353 @*/ 1354 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices) 1355 { 1356 int ierr,(*f)(Mat,int *); 1357 1358 PetscFunctionBegin; 1359 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1360 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1361 if (f) { 1362 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1363 } else { 1364 SETERRQ(1,"Wrong type of matrix to set column indices"); 1365 } 1366 PetscFunctionReturn(0); 1367 } 1368 1369 #undef __FUNCT__ 1370 #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1371 int MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1372 { 1373 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1374 int ierr,i,j,n,row,bs,*ai,*aj,mbs; 1375 PetscReal atmp; 1376 PetscScalar *x,zero = 0.0; 1377 MatScalar *aa; 1378 int ncols,brow,krow,kcol; 1379 1380 PetscFunctionBegin; 1381 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1382 bs = a->bs; 1383 aa = a->a; 1384 ai = a->i; 1385 aj = a->j; 1386 mbs = a->mbs; 1387 1388 ierr = VecSet(&zero,v);CHKERRQ(ierr); 1389 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1390 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1391 if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1392 for (i=0; i<mbs; i++) { 1393 ncols = ai[1] - ai[0]; ai++; 1394 brow = bs*i; 1395 for (j=0; j<ncols; j++){ 1396 /* bcol = bs*(*aj); */ 1397 for (kcol=0; kcol<bs; kcol++){ 1398 for (krow=0; krow<bs; krow++){ 1399 atmp = PetscAbsScalar(*aa); aa++; 1400 row = brow + krow; /* row index */ 1401 /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */ 1402 if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1403 } 1404 } 1405 aj++; 1406 } 1407 } 1408 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1409 PetscFunctionReturn(0); 1410 } 1411 1412 #undef __FUNCT__ 1413 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1414 int MatSetUpPreallocation_SeqBAIJ(Mat A) 1415 { 1416 int ierr; 1417 1418 PetscFunctionBegin; 1419 ierr = MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1420 PetscFunctionReturn(0); 1421 } 1422 1423 #undef __FUNCT__ 1424 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1425 int MatGetArray_SeqBAIJ(Mat A,PetscScalar **array) 1426 { 1427 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1428 PetscFunctionBegin; 1429 *array = a->a; 1430 PetscFunctionReturn(0); 1431 } 1432 1433 #undef __FUNCT__ 1434 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1435 int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar **array) 1436 { 1437 PetscFunctionBegin; 1438 PetscFunctionReturn(0); 1439 } 1440 1441 #include "petscblaslapack.h" 1442 #undef __FUNCT__ 1443 #define __FUNCT__ "MatAXPY_SeqBAIJ" 1444 int MatAXPY_SeqBAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str) 1445 { 1446 int ierr,one=1; 1447 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 1448 1449 PetscFunctionBegin; 1450 if (str == SAME_NONZERO_PATTERN) { 1451 BLaxpy_(&x->nz,a,x->a,&one,y->a,&one); 1452 } else { 1453 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1454 } 1455 PetscFunctionReturn(0); 1456 } 1457 1458 /* -------------------------------------------------------------------*/ 1459 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 1460 MatGetRow_SeqBAIJ, 1461 MatRestoreRow_SeqBAIJ, 1462 MatMult_SeqBAIJ_N, 1463 MatMultAdd_SeqBAIJ_N, 1464 MatMultTranspose_SeqBAIJ, 1465 MatMultTransposeAdd_SeqBAIJ, 1466 MatSolve_SeqBAIJ_N, 1467 0, 1468 0, 1469 0, 1470 MatLUFactor_SeqBAIJ, 1471 0, 1472 0, 1473 MatTranspose_SeqBAIJ, 1474 MatGetInfo_SeqBAIJ, 1475 MatEqual_SeqBAIJ, 1476 MatGetDiagonal_SeqBAIJ, 1477 MatDiagonalScale_SeqBAIJ, 1478 MatNorm_SeqBAIJ, 1479 0, 1480 MatAssemblyEnd_SeqBAIJ, 1481 0, 1482 MatSetOption_SeqBAIJ, 1483 MatZeroEntries_SeqBAIJ, 1484 MatZeroRows_SeqBAIJ, 1485 MatLUFactorSymbolic_SeqBAIJ, 1486 MatLUFactorNumeric_SeqBAIJ_N, 1487 0, 1488 0, 1489 MatSetUpPreallocation_SeqBAIJ, 1490 MatILUFactorSymbolic_SeqBAIJ, 1491 0, 1492 MatGetArray_SeqBAIJ, 1493 MatRestoreArray_SeqBAIJ, 1494 MatDuplicate_SeqBAIJ, 1495 0, 1496 0, 1497 MatILUFactor_SeqBAIJ, 1498 0, 1499 MatAXPY_SeqBAIJ, 1500 MatGetSubMatrices_SeqBAIJ, 1501 MatIncreaseOverlap_SeqBAIJ, 1502 MatGetValues_SeqBAIJ, 1503 0, 1504 MatPrintHelp_SeqBAIJ, 1505 MatScale_SeqBAIJ, 1506 0, 1507 0, 1508 0, 1509 MatGetBlockSize_SeqBAIJ, 1510 MatGetRowIJ_SeqBAIJ, 1511 MatRestoreRowIJ_SeqBAIJ, 1512 0, 1513 0, 1514 0, 1515 0, 1516 0, 1517 0, 1518 MatSetValuesBlocked_SeqBAIJ, 1519 MatGetSubMatrix_SeqBAIJ, 1520 MatDestroy_SeqBAIJ, 1521 MatView_SeqBAIJ, 1522 MatGetPetscMaps_Petsc, 1523 0, 1524 0, 1525 0, 1526 0, 1527 0, 1528 0, 1529 MatGetRowMax_SeqBAIJ, 1530 MatConvert_Basic}; 1531 1532 EXTERN_C_BEGIN 1533 #undef __FUNCT__ 1534 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 1535 int MatStoreValues_SeqBAIJ(Mat mat) 1536 { 1537 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1538 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1539 int ierr; 1540 1541 PetscFunctionBegin; 1542 if (aij->nonew != 1) { 1543 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1544 } 1545 1546 /* allocate space for values if not already there */ 1547 if (!aij->saved_values) { 1548 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1549 } 1550 1551 /* copy values over */ 1552 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1553 PetscFunctionReturn(0); 1554 } 1555 EXTERN_C_END 1556 1557 EXTERN_C_BEGIN 1558 #undef __FUNCT__ 1559 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 1560 int MatRetrieveValues_SeqBAIJ(Mat mat) 1561 { 1562 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1563 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 1564 1565 PetscFunctionBegin; 1566 if (aij->nonew != 1) { 1567 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1568 } 1569 if (!aij->saved_values) { 1570 SETERRQ(1,"Must call MatStoreValues(A);first"); 1571 } 1572 1573 /* copy values over */ 1574 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1575 PetscFunctionReturn(0); 1576 } 1577 EXTERN_C_END 1578 1579 EXTERN_C_BEGIN 1580 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*); 1581 EXTERN_C_END 1582 1583 EXTERN_C_BEGIN 1584 #undef __FUNCT__ 1585 #define __FUNCT__ "MatCreate_SeqBAIJ" 1586 int MatCreate_SeqBAIJ(Mat B) 1587 { 1588 int ierr,size; 1589 Mat_SeqBAIJ *b; 1590 1591 PetscFunctionBegin; 1592 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1593 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1594 1595 B->m = B->M = PetscMax(B->m,B->M); 1596 B->n = B->N = PetscMax(B->n,B->N); 1597 ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 1598 B->data = (void*)b; 1599 ierr = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 1600 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1601 B->factor = 0; 1602 B->lupivotthreshold = 1.0; 1603 B->mapping = 0; 1604 b->row = 0; 1605 b->col = 0; 1606 b->icol = 0; 1607 b->reallocs = 0; 1608 b->saved_values = 0; 1609 #if defined(PETSC_USE_MAT_SINGLE) 1610 b->setvalueslen = 0; 1611 b->setvaluescopy = PETSC_NULL; 1612 #endif 1613 1614 ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr); 1615 ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr); 1616 1617 b->sorted = PETSC_FALSE; 1618 b->roworiented = PETSC_TRUE; 1619 b->nonew = 0; 1620 b->diag = 0; 1621 b->solve_work = 0; 1622 b->mult_work = 0; 1623 B->spptr = 0; 1624 B->info.nz_unneeded = (PetscReal)b->maxnz; 1625 b->keepzeroedrows = PETSC_FALSE; 1626 1627 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1628 "MatStoreValues_SeqBAIJ", 1629 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 1630 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1631 "MatRetrieveValues_SeqBAIJ", 1632 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 1633 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 1634 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 1635 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 1636 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_SeqBAIJ_SeqAIJ_C", 1637 "MatConvert_SeqBAIJ_SeqAIJ", 1638 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 1639 PetscFunctionReturn(0); 1640 } 1641 EXTERN_C_END 1642 1643 #undef __FUNCT__ 1644 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 1645 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1646 { 1647 Mat C; 1648 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 1649 int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr; 1650 1651 PetscFunctionBegin; 1652 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1653 1654 *B = 0; 1655 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1656 ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr); 1657 c = (Mat_SeqBAIJ*)C->data; 1658 1659 c->bs = a->bs; 1660 c->bs2 = a->bs2; 1661 c->mbs = a->mbs; 1662 c->nbs = a->nbs; 1663 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1664 1665 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1666 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1667 for (i=0; i<mbs; i++) { 1668 c->imax[i] = a->imax[i]; 1669 c->ilen[i] = a->ilen[i]; 1670 } 1671 1672 /* allocate the matrix space */ 1673 c->singlemalloc = PETSC_TRUE; 1674 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1675 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1676 c->j = (int*)(c->a + nz*bs2); 1677 c->i = c->j + nz; 1678 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1679 if (mbs > 0) { 1680 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1681 if (cpvalues == MAT_COPY_VALUES) { 1682 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1683 } else { 1684 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1685 } 1686 } 1687 1688 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1689 c->sorted = a->sorted; 1690 c->roworiented = a->roworiented; 1691 c->nonew = a->nonew; 1692 1693 if (a->diag) { 1694 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1695 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1696 for (i=0; i<mbs; i++) { 1697 c->diag[i] = a->diag[i]; 1698 } 1699 } else c->diag = 0; 1700 c->nz = a->nz; 1701 c->maxnz = a->maxnz; 1702 c->solve_work = 0; 1703 C->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1704 c->mult_work = 0; 1705 C->preallocated = PETSC_TRUE; 1706 C->assembled = PETSC_TRUE; 1707 *B = C; 1708 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1709 PetscFunctionReturn(0); 1710 } 1711 1712 EXTERN_C_BEGIN 1713 #undef __FUNCT__ 1714 #define __FUNCT__ "MatLoad_SeqBAIJ" 1715 int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A) 1716 { 1717 Mat_SeqBAIJ *a; 1718 Mat B; 1719 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1720 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1721 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1722 int *masked,nmask,tmp,bs2,ishift; 1723 PetscScalar *aa; 1724 MPI_Comm comm = ((PetscObject)viewer)->comm; 1725 1726 PetscFunctionBegin; 1727 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1728 bs2 = bs*bs; 1729 1730 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1731 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1732 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1733 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1734 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1735 M = header[1]; N = header[2]; nz = header[3]; 1736 1737 if (header[3] < 0) { 1738 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 1739 } 1740 1741 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1742 1743 /* 1744 This code adds extra rows to make sure the number of rows is 1745 divisible by the blocksize 1746 */ 1747 mbs = M/bs; 1748 extra_rows = bs - M + bs*(mbs); 1749 if (extra_rows == bs) extra_rows = 0; 1750 else mbs++; 1751 if (extra_rows) { 1752 PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 1753 } 1754 1755 /* read in row lengths */ 1756 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1757 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1758 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1759 1760 /* read in column indices */ 1761 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1762 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1763 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1764 1765 /* loop over row lengths determining block row lengths */ 1766 ierr = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr); 1767 ierr = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1768 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1769 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1770 masked = mask + mbs; 1771 rowcount = 0; nzcount = 0; 1772 for (i=0; i<mbs; i++) { 1773 nmask = 0; 1774 for (j=0; j<bs; j++) { 1775 kmax = rowlengths[rowcount]; 1776 for (k=0; k<kmax; k++) { 1777 tmp = jj[nzcount++]/bs; 1778 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1779 } 1780 rowcount++; 1781 } 1782 browlengths[i] += nmask; 1783 /* zero out the mask elements we set */ 1784 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1785 } 1786 1787 /* create our matrix */ 1788 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr); 1789 B = *A; 1790 a = (Mat_SeqBAIJ*)B->data; 1791 1792 /* set matrix "i" values */ 1793 a->i[0] = 0; 1794 for (i=1; i<= mbs; i++) { 1795 a->i[i] = a->i[i-1] + browlengths[i-1]; 1796 a->ilen[i-1] = browlengths[i-1]; 1797 } 1798 a->nz = 0; 1799 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 1800 1801 /* read in nonzero values */ 1802 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1803 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1804 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1805 1806 /* set "a" and "j" values into matrix */ 1807 nzcount = 0; jcount = 0; 1808 for (i=0; i<mbs; i++) { 1809 nzcountb = nzcount; 1810 nmask = 0; 1811 for (j=0; j<bs; j++) { 1812 kmax = rowlengths[i*bs+j]; 1813 for (k=0; k<kmax; k++) { 1814 tmp = jj[nzcount++]/bs; 1815 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1816 } 1817 } 1818 /* sort the masked values */ 1819 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1820 1821 /* set "j" values into matrix */ 1822 maskcount = 1; 1823 for (j=0; j<nmask; j++) { 1824 a->j[jcount++] = masked[j]; 1825 mask[masked[j]] = maskcount++; 1826 } 1827 /* set "a" values into matrix */ 1828 ishift = bs2*a->i[i]; 1829 for (j=0; j<bs; j++) { 1830 kmax = rowlengths[i*bs+j]; 1831 for (k=0; k<kmax; k++) { 1832 tmp = jj[nzcountb]/bs ; 1833 block = mask[tmp] - 1; 1834 point = jj[nzcountb] - bs*tmp; 1835 idx = ishift + bs2*block + j + bs*point; 1836 a->a[idx] = (MatScalar)aa[nzcountb++]; 1837 } 1838 } 1839 /* zero out the mask elements we set */ 1840 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1841 } 1842 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1843 1844 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1845 ierr = PetscFree(browlengths);CHKERRQ(ierr); 1846 ierr = PetscFree(aa);CHKERRQ(ierr); 1847 ierr = PetscFree(jj);CHKERRQ(ierr); 1848 ierr = PetscFree(mask);CHKERRQ(ierr); 1849 1850 B->assembled = PETSC_TRUE; 1851 1852 ierr = MatView_Private(B);CHKERRQ(ierr); 1853 PetscFunctionReturn(0); 1854 } 1855 EXTERN_C_END 1856 1857 #undef __FUNCT__ 1858 #define __FUNCT__ "MatCreateSeqBAIJ" 1859 /*@C 1860 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 1861 compressed row) format. For good matrix assembly performance the 1862 user should preallocate the matrix storage by setting the parameter nz 1863 (or the array nnz). By setting these parameters accurately, performance 1864 during matrix assembly can be increased by more than a factor of 50. 1865 1866 Collective on MPI_Comm 1867 1868 Input Parameters: 1869 + comm - MPI communicator, set to PETSC_COMM_SELF 1870 . bs - size of block 1871 . m - number of rows 1872 . n - number of columns 1873 . nz - number of nonzero blocks per block row (same for all rows) 1874 - nnz - array containing the number of nonzero blocks in the various block rows 1875 (possibly different for each block row) or PETSC_NULL 1876 1877 Output Parameter: 1878 . A - the matrix 1879 1880 Options Database Keys: 1881 . -mat_no_unroll - uses code that does not unroll the loops in the 1882 block calculations (much slower) 1883 . -mat_block_size - size of the blocks to use 1884 1885 Level: intermediate 1886 1887 Notes: 1888 A nonzero block is any block that as 1 or more nonzeros in it 1889 1890 The block AIJ format is fully compatible with standard Fortran 77 1891 storage. That is, the stored row and column indices can begin at 1892 either one (as in Fortran) or zero. See the users' manual for details. 1893 1894 Specify the preallocated storage with either nz or nnz (not both). 1895 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1896 allocation. For additional details, see the users manual chapter on 1897 matrices. 1898 1899 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1900 @*/ 1901 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A) 1902 { 1903 int ierr; 1904 1905 PetscFunctionBegin; 1906 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1907 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 1908 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1909 PetscFunctionReturn(0); 1910 } 1911 1912 #undef __FUNCT__ 1913 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 1914 /*@C 1915 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 1916 per row in the matrix. For good matrix assembly performance the 1917 user should preallocate the matrix storage by setting the parameter nz 1918 (or the array nnz). By setting these parameters accurately, performance 1919 during matrix assembly can be increased by more than a factor of 50. 1920 1921 Collective on MPI_Comm 1922 1923 Input Parameters: 1924 + A - the matrix 1925 . bs - size of block 1926 . nz - number of block nonzeros per block row (same for all rows) 1927 - nnz - array containing the number of block nonzeros in the various block rows 1928 (possibly different for each block row) or PETSC_NULL 1929 1930 Options Database Keys: 1931 . -mat_no_unroll - uses code that does not unroll the loops in the 1932 block calculations (much slower) 1933 . -mat_block_size - size of the blocks to use 1934 1935 Level: intermediate 1936 1937 Notes: 1938 The block AIJ format is fully compatible with standard Fortran 77 1939 storage. That is, the stored row and column indices can begin at 1940 either one (as in Fortran) or zero. See the users' manual for details. 1941 1942 Specify the preallocated storage with either nz or nnz (not both). 1943 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1944 allocation. For additional details, see the users manual chapter on 1945 matrices. 1946 1947 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1948 @*/ 1949 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz) 1950 { 1951 Mat_SeqBAIJ *b; 1952 int i,len,ierr,mbs,nbs,bs2,newbs = bs; 1953 PetscTruth flg; 1954 1955 PetscFunctionBegin; 1956 ierr = PetscTypeCompare((PetscObject)B,MATSEQBAIJ,&flg);CHKERRQ(ierr); 1957 if (!flg) PetscFunctionReturn(0); 1958 1959 B->preallocated = PETSC_TRUE; 1960 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr); 1961 if (nnz && newbs != bs) { 1962 SETERRQ(1,"Cannot change blocksize from command line if setting nnz"); 1963 } 1964 bs = newbs; 1965 1966 mbs = B->m/bs; 1967 nbs = B->n/bs; 1968 bs2 = bs*bs; 1969 1970 if (mbs*bs!=B->m || nbs*bs!=B->n) { 1971 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs); 1972 } 1973 1974 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1975 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1976 if (nnz) { 1977 for (i=0; i<mbs; i++) { 1978 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1979 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs); 1980 } 1981 } 1982 1983 b = (Mat_SeqBAIJ*)B->data; 1984 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1985 B->ops->solve = MatSolve_SeqBAIJ_Update; 1986 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 1987 if (!flg) { 1988 switch (bs) { 1989 case 1: 1990 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1991 B->ops->mult = MatMult_SeqBAIJ_1; 1992 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 1993 break; 1994 case 2: 1995 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 1996 B->ops->mult = MatMult_SeqBAIJ_2; 1997 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 1998 break; 1999 case 3: 2000 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2001 B->ops->mult = MatMult_SeqBAIJ_3; 2002 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2003 break; 2004 case 4: 2005 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2006 B->ops->mult = MatMult_SeqBAIJ_4; 2007 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2008 break; 2009 case 5: 2010 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2011 B->ops->mult = MatMult_SeqBAIJ_5; 2012 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2013 break; 2014 case 6: 2015 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 2016 B->ops->mult = MatMult_SeqBAIJ_6; 2017 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2018 break; 2019 case 7: 2020 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 2021 B->ops->mult = MatMult_SeqBAIJ_7; 2022 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2023 break; 2024 default: 2025 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 2026 B->ops->mult = MatMult_SeqBAIJ_N; 2027 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2028 break; 2029 } 2030 } 2031 b->bs = bs; 2032 b->mbs = mbs; 2033 b->nbs = nbs; 2034 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 2035 if (!nnz) { 2036 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2037 else if (nz <= 0) nz = 1; 2038 for (i=0; i<mbs; i++) b->imax[i] = nz; 2039 nz = nz*mbs; 2040 } else { 2041 nz = 0; 2042 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2043 } 2044 2045 /* allocate the matrix space */ 2046 len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 2047 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 2048 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2049 b->j = (int*)(b->a + nz*bs2); 2050 ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 2051 b->i = b->j + nz; 2052 b->singlemalloc = PETSC_TRUE; 2053 2054 b->i[0] = 0; 2055 for (i=1; i<mbs+1; i++) { 2056 b->i[i] = b->i[i-1] + b->imax[i-1]; 2057 } 2058 2059 /* b->ilen will count nonzeros in each block row so far. */ 2060 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 2061 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 2062 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2063 2064 b->bs = bs; 2065 b->bs2 = bs2; 2066 b->mbs = mbs; 2067 b->nz = 0; 2068 b->maxnz = nz*bs2; 2069 B->info.nz_unneeded = (PetscReal)b->maxnz; 2070 PetscFunctionReturn(0); 2071 } 2072 2073