1 /*$Id: baij.c,v 1.245 2001/08/07 03:02:55 balay Exp bsmith $*/ 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" 8 #include "src/vec/vecimpl.h" 9 #include "src/inline/spops.h" 10 #include "petscsys.h" /*I "petscmat.h" I*/ 11 12 /* 13 Special version for Fun3d sequential benchmark 14 */ 15 #if defined(PETSC_HAVE_FORTRAN_CAPS) 16 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 17 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 18 #define matsetvaluesblocked4_ matsetvaluesblocked4 19 #endif 20 21 #undef __FUNCT__ 22 #define __FUNCT__ "matsetvaluesblocked4_" 23 void matsetvaluesblocked4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v) 24 { 25 Mat A = *AA; 26 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 27 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 28 int *ai=a->i,*ailen=a->ilen; 29 int *aj=a->j,stepval; 30 PetscScalar *value = v; 31 MatScalar *ap,*aa = a->a,*bap; 32 33 PetscFunctionBegin; 34 stepval = (n-1)*4; 35 for (k=0; k<m; k++) { /* loop over added rows */ 36 row = im[k]; 37 rp = aj + ai[row]; 38 ap = aa + 16*ai[row]; 39 nrow = ailen[row]; 40 low = 0; 41 for (l=0; l<n; l++) { /* loop over added columns */ 42 col = in[l]; 43 value = v + k*(stepval+4)*4 + l*4; 44 low = 0; high = nrow; 45 while (high-low > 7) { 46 t = (low+high)/2; 47 if (rp[t] > col) high = t; 48 else low = t; 49 } 50 for (i=low; i<high; i++) { 51 if (rp[i] > col) break; 52 if (rp[i] == col) { 53 bap = ap + 16*i; 54 for (ii=0; ii<4; ii++,value+=stepval) { 55 for (jj=ii; jj<16; jj+=4) { 56 bap[jj] += *value++; 57 } 58 } 59 goto noinsert2; 60 } 61 } 62 N = nrow++ - 1; 63 /* shift up all the later entries in this row */ 64 for (ii=N; ii>=i; ii--) { 65 rp[ii+1] = rp[ii]; 66 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 67 } 68 if (N >= i) { 69 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 70 } 71 rp[i] = col; 72 bap = ap + 16*i; 73 for (ii=0; ii<4; ii++,value+=stepval) { 74 for (jj=ii; jj<16; jj+=4) { 75 bap[jj] = *value++; 76 } 77 } 78 noinsert2:; 79 low = i; 80 } 81 ailen[row] = nrow; 82 } 83 } 84 85 #if defined(PETSC_HAVE_FORTRAN_CAPS) 86 #define matsetvalues4_ MATSETVALUES4 87 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 88 #define matsetvalues4_ matsetvalues4 89 #endif 90 91 #undef __FUNCT__ 92 #define __FUNCT__ "MatSetValues4_" 93 void matsetvalues4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v) 94 { 95 Mat A = *AA; 96 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 97 int *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 98 int *ai=a->i,*ailen=a->ilen; 99 int *aj=a->j,brow,bcol; 100 int ridx,cidx; 101 MatScalar *ap,value,*aa=a->a,*bap; 102 103 PetscFunctionBegin; 104 for (k=0; k<m; k++) { /* loop over added rows */ 105 row = im[k]; brow = row/4; 106 rp = aj + ai[brow]; 107 ap = aa + 16*ai[brow]; 108 nrow = ailen[brow]; 109 low = 0; 110 for (l=0; l<n; l++) { /* loop over added columns */ 111 col = in[l]; bcol = col/4; 112 ridx = row % 4; cidx = col % 4; 113 value = v[l + k*n]; 114 low = 0; high = nrow; 115 while (high-low > 7) { 116 t = (low+high)/2; 117 if (rp[t] > bcol) high = t; 118 else low = t; 119 } 120 for (i=low; i<high; i++) { 121 if (rp[i] > bcol) break; 122 if (rp[i] == bcol) { 123 bap = ap + 16*i + 4*cidx + ridx; 124 *bap += value; 125 goto noinsert1; 126 } 127 } 128 N = nrow++ - 1; 129 /* shift up all the later entries in this row */ 130 for (ii=N; ii>=i; ii--) { 131 rp[ii+1] = rp[ii]; 132 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 133 } 134 if (N>=i) { 135 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 136 } 137 rp[i] = bcol; 138 ap[16*i + 4*cidx + ridx] = value; 139 noinsert1:; 140 low = i; 141 } 142 ailen[brow] = nrow; 143 } 144 } 145 146 /* UGLY, ugly, ugly 147 When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 148 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 149 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 150 converts the entries into single precision and then calls ..._MatScalar() to put them 151 into the single precision data structures. 152 */ 153 #if defined(PETSC_USE_MAT_SINGLE) 154 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 155 #else 156 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 157 #endif 158 #if defined(PETSC_HAVE_DSCPACK) 159 EXTERN int MatUseDSCPACK_MPIBAIJ(Mat); 160 #endif 161 162 #define CHUNKSIZE 10 163 164 /* 165 Checks for missing diagonals 166 */ 167 #undef __FUNCT__ 168 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 169 int MatMissingDiagonal_SeqBAIJ(Mat A) 170 { 171 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 172 int *diag,*jj = a->j,i,ierr; 173 174 PetscFunctionBegin; 175 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 176 diag = a->diag; 177 for (i=0; i<a->mbs; i++) { 178 if (jj[diag[i]] != i) { 179 SETERRQ1(1,"Matrix is missing diagonal number %d",i); 180 } 181 } 182 PetscFunctionReturn(0); 183 } 184 185 #undef __FUNCT__ 186 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 187 int MatMarkDiagonal_SeqBAIJ(Mat A) 188 { 189 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 190 int i,j,*diag,m = a->mbs,ierr; 191 192 PetscFunctionBegin; 193 if (a->diag) PetscFunctionReturn(0); 194 195 ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr); 196 PetscLogObjectMemory(A,(m+1)*sizeof(int)); 197 for (i=0; i<m; i++) { 198 diag[i] = a->i[i+1]; 199 for (j=a->i[i]; j<a->i[i+1]; j++) { 200 if (a->j[j] == i) { 201 diag[i] = j; 202 break; 203 } 204 } 205 } 206 a->diag = diag; 207 PetscFunctionReturn(0); 208 } 209 210 211 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 212 213 #undef __FUNCT__ 214 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 215 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 216 { 217 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 218 int ierr,n = a->mbs,i; 219 220 PetscFunctionBegin; 221 *nn = n; 222 if (!ia) PetscFunctionReturn(0); 223 if (symmetric) { 224 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 225 } else if (oshift == 1) { 226 /* temporarily add 1 to i and j indices */ 227 int nz = a->i[n]; 228 for (i=0; i<nz; i++) a->j[i]++; 229 for (i=0; i<n+1; i++) a->i[i]++; 230 *ia = a->i; *ja = a->j; 231 } else { 232 *ia = a->i; *ja = a->j; 233 } 234 235 PetscFunctionReturn(0); 236 } 237 238 #undef __FUNCT__ 239 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 240 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 241 { 242 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 243 int i,n = a->mbs,ierr; 244 245 PetscFunctionBegin; 246 if (!ia) PetscFunctionReturn(0); 247 if (symmetric) { 248 ierr = PetscFree(*ia);CHKERRQ(ierr); 249 ierr = PetscFree(*ja);CHKERRQ(ierr); 250 } else if (oshift == 1) { 251 int nz = a->i[n]-1; 252 for (i=0; i<nz; i++) a->j[i]--; 253 for (i=0; i<n+1; i++) a->i[i]--; 254 } 255 PetscFunctionReturn(0); 256 } 257 258 #undef __FUNCT__ 259 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ" 260 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs) 261 { 262 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 263 264 PetscFunctionBegin; 265 *bs = baij->bs; 266 PetscFunctionReturn(0); 267 } 268 269 270 #undef __FUNCT__ 271 #define __FUNCT__ "MatDestroy_SeqBAIJ" 272 int MatDestroy_SeqBAIJ(Mat A) 273 { 274 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 275 int ierr; 276 277 PetscFunctionBegin; 278 #if defined(PETSC_USE_LOG) 279 PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz); 280 #endif 281 ierr = PetscFree(a->a);CHKERRQ(ierr); 282 if (!a->singlemalloc) { 283 ierr = PetscFree(a->i);CHKERRQ(ierr); 284 ierr = PetscFree(a->j);CHKERRQ(ierr); 285 } 286 if (a->row) { 287 ierr = ISDestroy(a->row);CHKERRQ(ierr); 288 } 289 if (a->col) { 290 ierr = ISDestroy(a->col);CHKERRQ(ierr); 291 } 292 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 293 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 294 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 295 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 296 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 297 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 298 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 299 #if defined(PETSC_USE_MAT_SINGLE) 300 if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);} 301 #endif 302 ierr = PetscFree(a);CHKERRQ(ierr); 303 PetscFunctionReturn(0); 304 } 305 306 #undef __FUNCT__ 307 #define __FUNCT__ "MatSetOption_SeqBAIJ" 308 int MatSetOption_SeqBAIJ(Mat A,MatOption op) 309 { 310 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 311 312 PetscFunctionBegin; 313 switch (op) { 314 case MAT_ROW_ORIENTED: 315 a->roworiented = PETSC_TRUE; 316 break; 317 case MAT_COLUMN_ORIENTED: 318 a->roworiented = PETSC_FALSE; 319 break; 320 case MAT_COLUMNS_SORTED: 321 a->sorted = PETSC_TRUE; 322 break; 323 case MAT_COLUMNS_UNSORTED: 324 a->sorted = PETSC_FALSE; 325 break; 326 case MAT_KEEP_ZEROED_ROWS: 327 a->keepzeroedrows = PETSC_TRUE; 328 break; 329 case MAT_NO_NEW_NONZERO_LOCATIONS: 330 a->nonew = 1; 331 break; 332 case MAT_NEW_NONZERO_LOCATION_ERR: 333 a->nonew = -1; 334 break; 335 case MAT_NEW_NONZERO_ALLOCATION_ERR: 336 a->nonew = -2; 337 break; 338 case MAT_YES_NEW_NONZERO_LOCATIONS: 339 a->nonew = 0; 340 break; 341 case MAT_ROWS_SORTED: 342 case MAT_ROWS_UNSORTED: 343 case MAT_YES_NEW_DIAGONALS: 344 case MAT_IGNORE_OFF_PROC_ENTRIES: 345 case MAT_USE_HASH_TABLE: 346 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n"); 347 break; 348 case MAT_NO_NEW_DIAGONALS: 349 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 350 default: 351 SETERRQ(PETSC_ERR_SUP,"unknown option"); 352 } 353 PetscFunctionReturn(0); 354 } 355 356 #undef __FUNCT__ 357 #define __FUNCT__ "MatGetRow_SeqBAIJ" 358 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 359 { 360 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 361 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr; 362 MatScalar *aa,*aa_i; 363 PetscScalar *v_i; 364 365 PetscFunctionBegin; 366 bs = a->bs; 367 ai = a->i; 368 aj = a->j; 369 aa = a->a; 370 bs2 = a->bs2; 371 372 if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range"); 373 374 bn = row/bs; /* Block number */ 375 bp = row % bs; /* Block Position */ 376 M = ai[bn+1] - ai[bn]; 377 *nz = bs*M; 378 379 if (v) { 380 *v = 0; 381 if (*nz) { 382 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 383 for (i=0; i<M; i++) { /* for each block in the block row */ 384 v_i = *v + i*bs; 385 aa_i = aa + bs2*(ai[bn] + i); 386 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 387 } 388 } 389 } 390 391 if (idx) { 392 *idx = 0; 393 if (*nz) { 394 ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr); 395 for (i=0; i<M; i++) { /* for each block in the block row */ 396 idx_i = *idx + i*bs; 397 itmp = bs*aj[ai[bn] + i]; 398 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 399 } 400 } 401 } 402 PetscFunctionReturn(0); 403 } 404 405 #undef __FUNCT__ 406 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 407 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 408 { 409 int ierr; 410 411 PetscFunctionBegin; 412 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 413 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 414 PetscFunctionReturn(0); 415 } 416 417 #undef __FUNCT__ 418 #define __FUNCT__ "MatTranspose_SeqBAIJ" 419 int MatTranspose_SeqBAIJ(Mat A,Mat *B) 420 { 421 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 422 Mat C; 423 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 424 int *rows,*cols,bs2=a->bs2; 425 PetscScalar *array; 426 427 PetscFunctionBegin; 428 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 429 ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr); 430 ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr); 431 432 #if defined(PETSC_USE_MAT_SINGLE) 433 ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 434 for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 435 #else 436 array = a->a; 437 #endif 438 439 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 440 ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr); 441 ierr = PetscFree(col);CHKERRQ(ierr); 442 ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr); 443 cols = rows + bs; 444 for (i=0; i<mbs; i++) { 445 cols[0] = i*bs; 446 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 447 len = ai[i+1] - ai[i]; 448 for (j=0; j<len; j++) { 449 rows[0] = (*aj++)*bs; 450 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 451 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 452 array += bs2; 453 } 454 } 455 ierr = PetscFree(rows);CHKERRQ(ierr); 456 #if defined(PETSC_USE_MAT_SINGLE) 457 ierr = PetscFree(array); 458 #endif 459 460 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 461 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 462 463 if (B) { 464 *B = C; 465 } else { 466 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 467 } 468 PetscFunctionReturn(0); 469 } 470 471 #undef __FUNCT__ 472 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 473 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 474 { 475 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 476 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 477 PetscScalar *aa; 478 FILE *file; 479 480 PetscFunctionBegin; 481 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 482 ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 483 col_lens[0] = MAT_FILE_COOKIE; 484 485 col_lens[1] = A->m; 486 col_lens[2] = A->n; 487 col_lens[3] = a->nz*bs2; 488 489 /* store lengths of each row and write (including header) to file */ 490 count = 0; 491 for (i=0; i<a->mbs; i++) { 492 for (j=0; j<bs; j++) { 493 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 494 } 495 } 496 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 497 ierr = PetscFree(col_lens);CHKERRQ(ierr); 498 499 /* store column indices (zero start index) */ 500 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 501 count = 0; 502 for (i=0; i<a->mbs; i++) { 503 for (j=0; j<bs; j++) { 504 for (k=a->i[i]; k<a->i[i+1]; k++) { 505 for (l=0; l<bs; l++) { 506 jj[count++] = bs*a->j[k] + l; 507 } 508 } 509 } 510 } 511 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr); 512 ierr = PetscFree(jj);CHKERRQ(ierr); 513 514 /* store nonzero values */ 515 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 516 count = 0; 517 for (i=0; i<a->mbs; i++) { 518 for (j=0; j<bs; j++) { 519 for (k=a->i[i]; k<a->i[i+1]; k++) { 520 for (l=0; l<bs; l++) { 521 aa[count++] = a->a[bs2*k + l*bs + j]; 522 } 523 } 524 } 525 } 526 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr); 527 ierr = PetscFree(aa);CHKERRQ(ierr); 528 529 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 530 if (file) { 531 fprintf(file,"-matload_block_size %d\n",a->bs); 532 } 533 PetscFunctionReturn(0); 534 } 535 536 extern int MatMPIBAIJFactorInfo_DSCPACK(Mat,PetscViewer); 537 538 #undef __FUNCT__ 539 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 540 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 541 { 542 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 543 int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 544 PetscViewerFormat format; 545 546 PetscFunctionBegin; 547 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 548 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 549 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 550 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 551 Mat aij; 552 ierr = MatConvert(A,MATSEQAIJ,&aij);CHKERRQ(ierr); 553 ierr = MatView(aij,viewer);CHKERRQ(ierr); 554 ierr = MatDestroy(aij);CHKERRQ(ierr); 555 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 556 #if defined(PETSC_HAVE_DSCPACK) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX) 557 ierr = MatMPIBAIJFactorInfo_DSCPACK(A,viewer);CHKERRQ(ierr); 558 #endif 559 PetscFunctionReturn(0); 560 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 561 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 562 for (i=0; i<a->mbs; i++) { 563 for (j=0; j<bs; j++) { 564 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 565 for (k=a->i[i]; k<a->i[i+1]; k++) { 566 for (l=0; l<bs; l++) { 567 #if defined(PETSC_USE_COMPLEX) 568 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 569 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %gi) ",bs*a->j[k]+l, 570 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 571 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 572 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %gi) ",bs*a->j[k]+l, 573 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 574 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 575 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 576 } 577 #else 578 if (a->a[bs2*k + l*bs + j] != 0.0) { 579 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 580 } 581 #endif 582 } 583 } 584 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 585 } 586 } 587 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 588 } else { 589 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 590 for (i=0; i<a->mbs; i++) { 591 for (j=0; j<bs; j++) { 592 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 593 for (k=a->i[i]; k<a->i[i+1]; k++) { 594 for (l=0; l<bs; l++) { 595 #if defined(PETSC_USE_COMPLEX) 596 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 597 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 598 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 599 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 600 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 601 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 602 } else { 603 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 604 } 605 #else 606 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 607 #endif 608 } 609 } 610 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 611 } 612 } 613 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 614 } 615 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 616 PetscFunctionReturn(0); 617 } 618 619 #undef __FUNCT__ 620 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 621 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 622 { 623 Mat A = (Mat) Aa; 624 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 625 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2; 626 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 627 MatScalar *aa; 628 PetscViewer viewer; 629 630 PetscFunctionBegin; 631 632 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 633 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 634 635 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 636 637 /* loop over matrix elements drawing boxes */ 638 color = PETSC_DRAW_BLUE; 639 for (i=0,row=0; i<mbs; i++,row+=bs) { 640 for (j=a->i[i]; j<a->i[i+1]; j++) { 641 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 642 x_l = a->j[j]*bs; x_r = x_l + 1.0; 643 aa = a->a + j*bs2; 644 for (k=0; k<bs; k++) { 645 for (l=0; l<bs; l++) { 646 if (PetscRealPart(*aa++) >= 0.) continue; 647 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 648 } 649 } 650 } 651 } 652 color = PETSC_DRAW_CYAN; 653 for (i=0,row=0; i<mbs; i++,row+=bs) { 654 for (j=a->i[i]; j<a->i[i+1]; j++) { 655 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 656 x_l = a->j[j]*bs; x_r = x_l + 1.0; 657 aa = a->a + j*bs2; 658 for (k=0; k<bs; k++) { 659 for (l=0; l<bs; l++) { 660 if (PetscRealPart(*aa++) != 0.) continue; 661 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 662 } 663 } 664 } 665 } 666 667 color = PETSC_DRAW_RED; 668 for (i=0,row=0; i<mbs; i++,row+=bs) { 669 for (j=a->i[i]; j<a->i[i+1]; j++) { 670 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 671 x_l = a->j[j]*bs; x_r = x_l + 1.0; 672 aa = a->a + j*bs2; 673 for (k=0; k<bs; k++) { 674 for (l=0; l<bs; l++) { 675 if (PetscRealPart(*aa++) <= 0.) continue; 676 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 677 } 678 } 679 } 680 } 681 PetscFunctionReturn(0); 682 } 683 684 #undef __FUNCT__ 685 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 686 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 687 { 688 int ierr; 689 PetscReal xl,yl,xr,yr,w,h; 690 PetscDraw draw; 691 PetscTruth isnull; 692 693 PetscFunctionBegin; 694 695 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 696 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 697 698 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 699 xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0; 700 xr += w; yr += h; xl = -w; yl = -h; 701 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 702 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 703 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 704 PetscFunctionReturn(0); 705 } 706 707 #undef __FUNCT__ 708 #define __FUNCT__ "MatView_SeqBAIJ" 709 int MatView_SeqBAIJ(Mat A,PetscViewer viewer) 710 { 711 int ierr; 712 PetscTruth issocket,isascii,isbinary,isdraw; 713 714 PetscFunctionBegin; 715 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 716 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 717 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 718 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 719 if (issocket) { 720 SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported"); 721 } else if (isascii){ 722 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 723 } else if (isbinary) { 724 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 725 } else if (isdraw) { 726 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 727 } else { 728 SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name); 729 } 730 PetscFunctionReturn(0); 731 } 732 733 734 #undef __FUNCT__ 735 #define __FUNCT__ "MatGetValues_SeqBAIJ" 736 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v) 737 { 738 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 739 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 740 int *ai = a->i,*ailen = a->ilen; 741 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 742 MatScalar *ap,*aa = a->a,zero = 0.0; 743 744 PetscFunctionBegin; 745 for (k=0; k<m; k++) { /* loop over rows */ 746 row = im[k]; brow = row/bs; 747 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 748 if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 749 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 750 nrow = ailen[brow]; 751 for (l=0; l<n; l++) { /* loop over columns */ 752 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 753 if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 754 col = in[l] ; 755 bcol = col/bs; 756 cidx = col%bs; 757 ridx = row%bs; 758 high = nrow; 759 low = 0; /* assume unsorted */ 760 while (high-low > 5) { 761 t = (low+high)/2; 762 if (rp[t] > bcol) high = t; 763 else low = t; 764 } 765 for (i=low; i<high; i++) { 766 if (rp[i] > bcol) break; 767 if (rp[i] == bcol) { 768 *v++ = ap[bs2*i+bs*cidx+ridx]; 769 goto finished; 770 } 771 } 772 *v++ = zero; 773 finished:; 774 } 775 } 776 PetscFunctionReturn(0); 777 } 778 779 #if defined(PETSC_USE_MAT_SINGLE) 780 #undef __FUNCT__ 781 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 782 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 783 { 784 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 785 int ierr,i,N = m*n*b->bs2; 786 MatScalar *vsingle; 787 788 PetscFunctionBegin; 789 if (N > b->setvalueslen) { 790 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 791 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 792 b->setvalueslen = N; 793 } 794 vsingle = b->setvaluescopy; 795 for (i=0; i<N; i++) { 796 vsingle[i] = v[i]; 797 } 798 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 799 PetscFunctionReturn(0); 800 } 801 #endif 802 803 804 #undef __FUNCT__ 805 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 806 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,int *im,int n,int *in,MatScalar *v,InsertMode is) 807 { 808 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 809 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 810 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 811 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 812 PetscTruth roworiented=a->roworiented; 813 MatScalar *value = v,*ap,*aa = a->a,*bap; 814 815 PetscFunctionBegin; 816 if (roworiented) { 817 stepval = (n-1)*bs; 818 } else { 819 stepval = (m-1)*bs; 820 } 821 for (k=0; k<m; k++) { /* loop over added rows */ 822 row = im[k]; 823 if (row < 0) continue; 824 #if defined(PETSC_USE_BOPT_g) 825 if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 826 #endif 827 rp = aj + ai[row]; 828 ap = aa + bs2*ai[row]; 829 rmax = imax[row]; 830 nrow = ailen[row]; 831 low = 0; 832 for (l=0; l<n; l++) { /* loop over added columns */ 833 if (in[l] < 0) continue; 834 #if defined(PETSC_USE_BOPT_g) 835 if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 836 #endif 837 col = in[l]; 838 if (roworiented) { 839 value = v + k*(stepval+bs)*bs + l*bs; 840 } else { 841 value = v + l*(stepval+bs)*bs + k*bs; 842 } 843 if (!sorted) low = 0; high = nrow; 844 while (high-low > 7) { 845 t = (low+high)/2; 846 if (rp[t] > col) high = t; 847 else low = t; 848 } 849 for (i=low; i<high; i++) { 850 if (rp[i] > col) break; 851 if (rp[i] == col) { 852 bap = ap + bs2*i; 853 if (roworiented) { 854 if (is == ADD_VALUES) { 855 for (ii=0; ii<bs; ii++,value+=stepval) { 856 for (jj=ii; jj<bs2; jj+=bs) { 857 bap[jj] += *value++; 858 } 859 } 860 } else { 861 for (ii=0; ii<bs; ii++,value+=stepval) { 862 for (jj=ii; jj<bs2; jj+=bs) { 863 bap[jj] = *value++; 864 } 865 } 866 } 867 } else { 868 if (is == ADD_VALUES) { 869 for (ii=0; ii<bs; ii++,value+=stepval) { 870 for (jj=0; jj<bs; jj++) { 871 *bap++ += *value++; 872 } 873 } 874 } else { 875 for (ii=0; ii<bs; ii++,value+=stepval) { 876 for (jj=0; jj<bs; jj++) { 877 *bap++ = *value++; 878 } 879 } 880 } 881 } 882 goto noinsert2; 883 } 884 } 885 if (nonew == 1) goto noinsert2; 886 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 887 if (nrow >= rmax) { 888 /* there is no extra room in row, therefore enlarge */ 889 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 890 MatScalar *new_a; 891 892 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 893 894 /* malloc new storage space */ 895 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 896 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 897 new_j = (int*)(new_a + bs2*new_nz); 898 new_i = new_j + new_nz; 899 900 /* copy over old data into new slots */ 901 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 902 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 903 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 904 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 905 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 906 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 907 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 908 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 909 /* free up old matrix storage */ 910 ierr = PetscFree(a->a);CHKERRQ(ierr); 911 if (!a->singlemalloc) { 912 ierr = PetscFree(a->i);CHKERRQ(ierr); 913 ierr = PetscFree(a->j);CHKERRQ(ierr); 914 } 915 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 916 a->singlemalloc = PETSC_TRUE; 917 918 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 919 rmax = imax[row] = imax[row] + CHUNKSIZE; 920 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 921 a->maxnz += bs2*CHUNKSIZE; 922 a->reallocs++; 923 a->nz++; 924 } 925 N = nrow++ - 1; 926 /* shift up all the later entries in this row */ 927 for (ii=N; ii>=i; ii--) { 928 rp[ii+1] = rp[ii]; 929 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 930 } 931 if (N >= i) { 932 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 933 } 934 rp[i] = col; 935 bap = ap + bs2*i; 936 if (roworiented) { 937 for (ii=0; ii<bs; ii++,value+=stepval) { 938 for (jj=ii; jj<bs2; jj+=bs) { 939 bap[jj] = *value++; 940 } 941 } 942 } else { 943 for (ii=0; ii<bs; ii++,value+=stepval) { 944 for (jj=0; jj<bs; jj++) { 945 *bap++ = *value++; 946 } 947 } 948 } 949 noinsert2:; 950 low = i; 951 } 952 ailen[row] = nrow; 953 } 954 PetscFunctionReturn(0); 955 } 956 957 #undef __FUNCT__ 958 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 959 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 960 { 961 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 962 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 963 int m = A->m,*ip,N,*ailen = a->ilen; 964 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 965 MatScalar *aa = a->a,*ap; 966 #if defined(PETSC_HAVE_DSCPACK) 967 PetscTruth flag; 968 #endif 969 970 PetscFunctionBegin; 971 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 972 973 if (m) rmax = ailen[0]; 974 for (i=1; i<mbs; i++) { 975 /* move each row back by the amount of empty slots (fshift) before it*/ 976 fshift += imax[i-1] - ailen[i-1]; 977 rmax = PetscMax(rmax,ailen[i]); 978 if (fshift) { 979 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 980 N = ailen[i]; 981 for (j=0; j<N; j++) { 982 ip[j-fshift] = ip[j]; 983 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 984 } 985 } 986 ai[i] = ai[i-1] + ailen[i-1]; 987 } 988 if (mbs) { 989 fshift += imax[mbs-1] - ailen[mbs-1]; 990 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 991 } 992 /* reset ilen and imax for each row */ 993 for (i=0; i<mbs; i++) { 994 ailen[i] = imax[i] = ai[i+1] - ai[i]; 995 } 996 a->nz = ai[mbs]; 997 998 /* diagonals may have moved, so kill the diagonal pointers */ 999 if (fshift && a->diag) { 1000 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1001 PetscLogObjectMemory(A,-(mbs+1)*sizeof(int)); 1002 a->diag = 0; 1003 } 1004 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2); 1005 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",a->reallocs); 1006 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 1007 a->reallocs = 0; 1008 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1009 1010 #if defined(PETSC_HAVE_DSCPACK) 1011 ierr = PetscOptionsHasName(A->prefix,"-mat_baij_dscpack",&flag);CHKERRQ(ierr); 1012 if (flag) { ierr = MatUseDSCPACK_MPIBAIJ(A);CHKERRQ(ierr); } 1013 #endif 1014 1015 PetscFunctionReturn(0); 1016 } 1017 1018 1019 1020 /* 1021 This function returns an array of flags which indicate the locations of contiguous 1022 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1023 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1024 Assume: sizes should be long enough to hold all the values. 1025 */ 1026 #undef __FUNCT__ 1027 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1028 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 1029 { 1030 int i,j,k,row; 1031 PetscTruth flg; 1032 1033 PetscFunctionBegin; 1034 for (i=0,j=0; i<n; j++) { 1035 row = idx[i]; 1036 if (row%bs!=0) { /* Not the begining of a block */ 1037 sizes[j] = 1; 1038 i++; 1039 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1040 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1041 i++; 1042 } else { /* Begining of the block, so check if the complete block exists */ 1043 flg = PETSC_TRUE; 1044 for (k=1; k<bs; k++) { 1045 if (row+k != idx[i+k]) { /* break in the block */ 1046 flg = PETSC_FALSE; 1047 break; 1048 } 1049 } 1050 if (flg == PETSC_TRUE) { /* No break in the bs */ 1051 sizes[j] = bs; 1052 i+= bs; 1053 } else { 1054 sizes[j] = 1; 1055 i++; 1056 } 1057 } 1058 } 1059 *bs_max = j; 1060 PetscFunctionReturn(0); 1061 } 1062 1063 #undef __FUNCT__ 1064 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1065 int MatZeroRows_SeqBAIJ(Mat A,IS is,PetscScalar *diag) 1066 { 1067 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1068 int ierr,i,j,k,count,is_n,*is_idx,*rows; 1069 int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max; 1070 PetscScalar zero = 0.0; 1071 MatScalar *aa; 1072 1073 PetscFunctionBegin; 1074 /* Make a copy of the IS and sort it */ 1075 ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr); 1076 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1077 1078 /* allocate memory for rows,sizes */ 1079 ierr = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr); 1080 sizes = rows + is_n; 1081 1082 /* copy IS values to rows, and sort them */ 1083 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1084 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1085 if (baij->keepzeroedrows) { 1086 for (i=0; i<is_n; i++) { sizes[i] = 1; } 1087 bs_max = is_n; 1088 } else { 1089 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 1090 } 1091 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 1092 1093 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1094 row = rows[j]; 1095 if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row); 1096 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1097 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 1098 if (sizes[i] == bs && !baij->keepzeroedrows) { 1099 if (diag) { 1100 if (baij->ilen[row/bs] > 0) { 1101 baij->ilen[row/bs] = 1; 1102 baij->j[baij->i[row/bs]] = row/bs; 1103 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 1104 } 1105 /* Now insert all the diagonal values for this bs */ 1106 for (k=0; k<bs; k++) { 1107 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 1108 } 1109 } else { /* (!diag) */ 1110 baij->ilen[row/bs] = 0; 1111 } /* end (!diag) */ 1112 } else { /* (sizes[i] != bs) */ 1113 #if defined (PETSC_USE_DEBUG) 1114 if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1"); 1115 #endif 1116 for (k=0; k<count; k++) { 1117 aa[0] = zero; 1118 aa += bs; 1119 } 1120 if (diag) { 1121 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1122 } 1123 } 1124 } 1125 1126 ierr = PetscFree(rows);CHKERRQ(ierr); 1127 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1128 PetscFunctionReturn(0); 1129 } 1130 1131 #undef __FUNCT__ 1132 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1133 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is) 1134 { 1135 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1136 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 1137 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1138 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 1139 int ridx,cidx,bs2=a->bs2,ierr; 1140 PetscTruth roworiented=a->roworiented; 1141 MatScalar *ap,value,*aa=a->a,*bap; 1142 1143 PetscFunctionBegin; 1144 for (k=0; k<m; k++) { /* loop over added rows */ 1145 row = im[k]; brow = row/bs; 1146 if (row < 0) continue; 1147 #if defined(PETSC_USE_BOPT_g) 1148 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m); 1149 #endif 1150 rp = aj + ai[brow]; 1151 ap = aa + bs2*ai[brow]; 1152 rmax = imax[brow]; 1153 nrow = ailen[brow]; 1154 low = 0; 1155 for (l=0; l<n; l++) { /* loop over added columns */ 1156 if (in[l] < 0) continue; 1157 #if defined(PETSC_USE_BOPT_g) 1158 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n); 1159 #endif 1160 col = in[l]; bcol = col/bs; 1161 ridx = row % bs; cidx = col % bs; 1162 if (roworiented) { 1163 value = v[l + k*n]; 1164 } else { 1165 value = v[k + l*m]; 1166 } 1167 if (!sorted) low = 0; high = nrow; 1168 while (high-low > 7) { 1169 t = (low+high)/2; 1170 if (rp[t] > bcol) high = t; 1171 else low = t; 1172 } 1173 for (i=low; i<high; i++) { 1174 if (rp[i] > bcol) break; 1175 if (rp[i] == bcol) { 1176 bap = ap + bs2*i + bs*cidx + ridx; 1177 if (is == ADD_VALUES) *bap += value; 1178 else *bap = value; 1179 goto noinsert1; 1180 } 1181 } 1182 if (nonew == 1) goto noinsert1; 1183 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 1184 if (nrow >= rmax) { 1185 /* there is no extra room in row, therefore enlarge */ 1186 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1187 MatScalar *new_a; 1188 1189 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 1190 1191 /* Malloc new storage space */ 1192 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1193 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1194 new_j = (int*)(new_a + bs2*new_nz); 1195 new_i = new_j + new_nz; 1196 1197 /* copy over old data into new slots */ 1198 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 1199 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1200 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 1201 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1202 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 1203 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1204 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1205 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1206 /* free up old matrix storage */ 1207 ierr = PetscFree(a->a);CHKERRQ(ierr); 1208 if (!a->singlemalloc) { 1209 ierr = PetscFree(a->i);CHKERRQ(ierr); 1210 ierr = PetscFree(a->j);CHKERRQ(ierr); 1211 } 1212 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1213 a->singlemalloc = PETSC_TRUE; 1214 1215 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1216 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1217 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 1218 a->maxnz += bs2*CHUNKSIZE; 1219 a->reallocs++; 1220 a->nz++; 1221 } 1222 N = nrow++ - 1; 1223 /* shift up all the later entries in this row */ 1224 for (ii=N; ii>=i; ii--) { 1225 rp[ii+1] = rp[ii]; 1226 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1227 } 1228 if (N>=i) { 1229 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1230 } 1231 rp[i] = bcol; 1232 ap[bs2*i + bs*cidx + ridx] = value; 1233 noinsert1:; 1234 low = i; 1235 } 1236 ailen[brow] = nrow; 1237 } 1238 PetscFunctionReturn(0); 1239 } 1240 1241 1242 #undef __FUNCT__ 1243 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1244 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info) 1245 { 1246 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1247 Mat outA; 1248 int ierr; 1249 PetscTruth row_identity,col_identity; 1250 1251 PetscFunctionBegin; 1252 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1253 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1254 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1255 if (!row_identity || !col_identity) { 1256 SETERRQ(1,"Row and column permutations must be identity for in-place ILU"); 1257 } 1258 1259 outA = inA; 1260 inA->factor = FACTOR_LU; 1261 1262 if (!a->diag) { 1263 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1264 } 1265 1266 a->row = row; 1267 a->col = col; 1268 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1269 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1270 1271 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1272 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1273 PetscLogObjectParent(inA,a->icol); 1274 1275 /* 1276 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1277 for ILU(0) factorization with natural ordering 1278 */ 1279 if (a->bs < 8) { 1280 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1281 } else { 1282 if (!a->solve_work) { 1283 ierr = PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1284 PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar)); 1285 } 1286 } 1287 1288 ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr); 1289 1290 PetscFunctionReturn(0); 1291 } 1292 #undef __FUNCT__ 1293 #define __FUNCT__ "MatPrintHelp_SeqBAIJ" 1294 int MatPrintHelp_SeqBAIJ(Mat A) 1295 { 1296 static PetscTruth called = PETSC_FALSE; 1297 MPI_Comm comm = A->comm; 1298 int ierr; 1299 1300 PetscFunctionBegin; 1301 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1302 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1303 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1304 PetscFunctionReturn(0); 1305 } 1306 1307 EXTERN_C_BEGIN 1308 #undef __FUNCT__ 1309 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1310 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices) 1311 { 1312 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1313 int i,nz,nbs; 1314 1315 PetscFunctionBegin; 1316 nz = baij->maxnz/baij->bs2; 1317 nbs = baij->nbs; 1318 for (i=0; i<nz; i++) { 1319 baij->j[i] = indices[i]; 1320 } 1321 baij->nz = nz; 1322 for (i=0; i<nbs; i++) { 1323 baij->ilen[i] = baij->imax[i]; 1324 } 1325 1326 PetscFunctionReturn(0); 1327 } 1328 EXTERN_C_END 1329 1330 #undef __FUNCT__ 1331 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1332 /*@ 1333 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1334 in the matrix. 1335 1336 Input Parameters: 1337 + mat - the SeqBAIJ matrix 1338 - indices - the column indices 1339 1340 Level: advanced 1341 1342 Notes: 1343 This can be called if you have precomputed the nonzero structure of the 1344 matrix and want to provide it to the matrix object to improve the performance 1345 of the MatSetValues() operation. 1346 1347 You MUST have set the correct numbers of nonzeros per row in the call to 1348 MatCreateSeqBAIJ(). 1349 1350 MUST be called before any calls to MatSetValues(); 1351 1352 @*/ 1353 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices) 1354 { 1355 int ierr,(*f)(Mat,int *); 1356 1357 PetscFunctionBegin; 1358 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1359 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1360 if (f) { 1361 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1362 } else { 1363 SETERRQ(1,"Wrong type of matrix to set column indices"); 1364 } 1365 PetscFunctionReturn(0); 1366 } 1367 1368 #undef __FUNCT__ 1369 #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1370 int MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1371 { 1372 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1373 int ierr,i,j,n,row,bs,*ai,*aj,mbs; 1374 PetscReal atmp; 1375 PetscScalar *x,zero = 0.0; 1376 MatScalar *aa; 1377 int ncols,brow,krow,kcol; 1378 1379 PetscFunctionBegin; 1380 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1381 bs = a->bs; 1382 aa = a->a; 1383 ai = a->i; 1384 aj = a->j; 1385 mbs = a->mbs; 1386 1387 ierr = VecSet(&zero,v);CHKERRQ(ierr); 1388 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1389 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1390 if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1391 for (i=0; i<mbs; i++) { 1392 ncols = ai[1] - ai[0]; ai++; 1393 brow = bs*i; 1394 for (j=0; j<ncols; j++){ 1395 /* bcol = bs*(*aj); */ 1396 for (kcol=0; kcol<bs; kcol++){ 1397 for (krow=0; krow<bs; krow++){ 1398 atmp = PetscAbsScalar(*aa); aa++; 1399 row = brow + krow; /* row index */ 1400 /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */ 1401 if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1402 } 1403 } 1404 aj++; 1405 } 1406 } 1407 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1408 PetscFunctionReturn(0); 1409 } 1410 1411 #undef __FUNCT__ 1412 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1413 int MatSetUpPreallocation_SeqBAIJ(Mat A) 1414 { 1415 int ierr; 1416 1417 PetscFunctionBegin; 1418 ierr = MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1419 PetscFunctionReturn(0); 1420 } 1421 1422 #undef __FUNCT__ 1423 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1424 int MatGetArray_SeqBAIJ(Mat A,PetscScalar **array) 1425 { 1426 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1427 PetscFunctionBegin; 1428 *array = a->a; 1429 PetscFunctionReturn(0); 1430 } 1431 1432 #undef __FUNCT__ 1433 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1434 int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar **array) 1435 { 1436 PetscFunctionBegin; 1437 PetscFunctionReturn(0); 1438 } 1439 1440 #include "petscblaslapack.h" 1441 #undef __FUNCT__ 1442 #define __FUNCT__ "MatAXPY_SeqBAIJ" 1443 int MatAXPY_SeqBAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str) 1444 { 1445 int ierr,one=1; 1446 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 1447 1448 PetscFunctionBegin; 1449 if (str == SAME_NONZERO_PATTERN) { 1450 BLaxpy_(&x->nz,a,x->a,&one,y->a,&one); 1451 } else { 1452 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1453 } 1454 PetscFunctionReturn(0); 1455 } 1456 1457 /* -------------------------------------------------------------------*/ 1458 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 1459 MatGetRow_SeqBAIJ, 1460 MatRestoreRow_SeqBAIJ, 1461 MatMult_SeqBAIJ_N, 1462 MatMultAdd_SeqBAIJ_N, 1463 MatMultTranspose_SeqBAIJ, 1464 MatMultTransposeAdd_SeqBAIJ, 1465 MatSolve_SeqBAIJ_N, 1466 0, 1467 0, 1468 0, 1469 MatLUFactor_SeqBAIJ, 1470 0, 1471 0, 1472 MatTranspose_SeqBAIJ, 1473 MatGetInfo_SeqBAIJ, 1474 MatEqual_SeqBAIJ, 1475 MatGetDiagonal_SeqBAIJ, 1476 MatDiagonalScale_SeqBAIJ, 1477 MatNorm_SeqBAIJ, 1478 0, 1479 MatAssemblyEnd_SeqBAIJ, 1480 0, 1481 MatSetOption_SeqBAIJ, 1482 MatZeroEntries_SeqBAIJ, 1483 MatZeroRows_SeqBAIJ, 1484 MatLUFactorSymbolic_SeqBAIJ, 1485 MatLUFactorNumeric_SeqBAIJ_N, 1486 0, 1487 0, 1488 MatSetUpPreallocation_SeqBAIJ, 1489 MatILUFactorSymbolic_SeqBAIJ, 1490 0, 1491 MatGetArray_SeqBAIJ, 1492 MatRestoreArray_SeqBAIJ, 1493 MatDuplicate_SeqBAIJ, 1494 0, 1495 0, 1496 MatILUFactor_SeqBAIJ, 1497 0, 1498 MatAXPY_SeqBAIJ, 1499 MatGetSubMatrices_SeqBAIJ, 1500 MatIncreaseOverlap_SeqBAIJ, 1501 MatGetValues_SeqBAIJ, 1502 0, 1503 MatPrintHelp_SeqBAIJ, 1504 MatScale_SeqBAIJ, 1505 0, 1506 0, 1507 0, 1508 MatGetBlockSize_SeqBAIJ, 1509 MatGetRowIJ_SeqBAIJ, 1510 MatRestoreRowIJ_SeqBAIJ, 1511 0, 1512 0, 1513 0, 1514 0, 1515 0, 1516 0, 1517 MatSetValuesBlocked_SeqBAIJ, 1518 MatGetSubMatrix_SeqBAIJ, 1519 MatDestroy_SeqBAIJ, 1520 MatView_SeqBAIJ, 1521 MatGetPetscMaps_Petsc, 1522 0, 1523 0, 1524 0, 1525 0, 1526 0, 1527 0, 1528 MatGetRowMax_SeqBAIJ, 1529 MatConvert_Basic}; 1530 1531 EXTERN_C_BEGIN 1532 #undef __FUNCT__ 1533 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 1534 int MatStoreValues_SeqBAIJ(Mat mat) 1535 { 1536 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1537 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1538 int ierr; 1539 1540 PetscFunctionBegin; 1541 if (aij->nonew != 1) { 1542 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1543 } 1544 1545 /* allocate space for values if not already there */ 1546 if (!aij->saved_values) { 1547 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1548 } 1549 1550 /* copy values over */ 1551 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1552 PetscFunctionReturn(0); 1553 } 1554 EXTERN_C_END 1555 1556 EXTERN_C_BEGIN 1557 #undef __FUNCT__ 1558 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 1559 int MatRetrieveValues_SeqBAIJ(Mat mat) 1560 { 1561 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1562 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 1563 1564 PetscFunctionBegin; 1565 if (aij->nonew != 1) { 1566 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1567 } 1568 if (!aij->saved_values) { 1569 SETERRQ(1,"Must call MatStoreValues(A);first"); 1570 } 1571 1572 /* copy values over */ 1573 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1574 PetscFunctionReturn(0); 1575 } 1576 EXTERN_C_END 1577 1578 EXTERN_C_BEGIN 1579 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*); 1580 EXTERN_C_END 1581 1582 EXTERN_C_BEGIN 1583 #undef __FUNCT__ 1584 #define __FUNCT__ "MatCreate_SeqBAIJ" 1585 int MatCreate_SeqBAIJ(Mat B) 1586 { 1587 int ierr,size; 1588 Mat_SeqBAIJ *b; 1589 1590 PetscFunctionBegin; 1591 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1592 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1593 1594 B->m = B->M = PetscMax(B->m,B->M); 1595 B->n = B->N = PetscMax(B->n,B->N); 1596 ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 1597 B->data = (void*)b; 1598 ierr = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 1599 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1600 B->factor = 0; 1601 B->lupivotthreshold = 1.0; 1602 B->mapping = 0; 1603 b->row = 0; 1604 b->col = 0; 1605 b->icol = 0; 1606 b->reallocs = 0; 1607 b->saved_values = 0; 1608 #if defined(PETSC_USE_MAT_SINGLE) 1609 b->setvalueslen = 0; 1610 b->setvaluescopy = PETSC_NULL; 1611 #endif 1612 1613 ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr); 1614 ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr); 1615 1616 b->sorted = PETSC_FALSE; 1617 b->roworiented = PETSC_TRUE; 1618 b->nonew = 0; 1619 b->diag = 0; 1620 b->solve_work = 0; 1621 b->mult_work = 0; 1622 B->spptr = 0; 1623 B->info.nz_unneeded = (PetscReal)b->maxnz; 1624 b->keepzeroedrows = PETSC_FALSE; 1625 1626 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1627 "MatStoreValues_SeqBAIJ", 1628 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 1629 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1630 "MatRetrieveValues_SeqBAIJ", 1631 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 1632 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 1633 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 1634 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 1635 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 1636 "MatConvert_SeqBAIJ_SeqAIJ", 1637 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 1638 PetscFunctionReturn(0); 1639 } 1640 EXTERN_C_END 1641 1642 #undef __FUNCT__ 1643 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 1644 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1645 { 1646 Mat C; 1647 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 1648 int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr; 1649 1650 PetscFunctionBegin; 1651 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1652 1653 *B = 0; 1654 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1655 ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr); 1656 c = (Mat_SeqBAIJ*)C->data; 1657 1658 c->bs = a->bs; 1659 c->bs2 = a->bs2; 1660 c->mbs = a->mbs; 1661 c->nbs = a->nbs; 1662 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1663 1664 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1665 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1666 for (i=0; i<mbs; i++) { 1667 c->imax[i] = a->imax[i]; 1668 c->ilen[i] = a->ilen[i]; 1669 } 1670 1671 /* allocate the matrix space */ 1672 c->singlemalloc = PETSC_TRUE; 1673 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1674 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1675 c->j = (int*)(c->a + nz*bs2); 1676 c->i = c->j + nz; 1677 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1678 if (mbs > 0) { 1679 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1680 if (cpvalues == MAT_COPY_VALUES) { 1681 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1682 } else { 1683 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1684 } 1685 } 1686 1687 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1688 c->sorted = a->sorted; 1689 c->roworiented = a->roworiented; 1690 c->nonew = a->nonew; 1691 1692 if (a->diag) { 1693 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1694 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1695 for (i=0; i<mbs; i++) { 1696 c->diag[i] = a->diag[i]; 1697 } 1698 } else c->diag = 0; 1699 c->nz = a->nz; 1700 c->maxnz = a->maxnz; 1701 c->solve_work = 0; 1702 C->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1703 c->mult_work = 0; 1704 C->preallocated = PETSC_TRUE; 1705 C->assembled = PETSC_TRUE; 1706 *B = C; 1707 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1708 PetscFunctionReturn(0); 1709 } 1710 1711 EXTERN_C_BEGIN 1712 #undef __FUNCT__ 1713 #define __FUNCT__ "MatLoad_SeqBAIJ" 1714 int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A) 1715 { 1716 Mat_SeqBAIJ *a; 1717 Mat B; 1718 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1719 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1720 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1721 int *masked,nmask,tmp,bs2,ishift; 1722 PetscScalar *aa; 1723 MPI_Comm comm = ((PetscObject)viewer)->comm; 1724 1725 PetscFunctionBegin; 1726 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1727 bs2 = bs*bs; 1728 1729 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1730 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1731 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1732 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1733 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1734 M = header[1]; N = header[2]; nz = header[3]; 1735 1736 if (header[3] < 0) { 1737 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 1738 } 1739 1740 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1741 1742 /* 1743 This code adds extra rows to make sure the number of rows is 1744 divisible by the blocksize 1745 */ 1746 mbs = M/bs; 1747 extra_rows = bs - M + bs*(mbs); 1748 if (extra_rows == bs) extra_rows = 0; 1749 else mbs++; 1750 if (extra_rows) { 1751 PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 1752 } 1753 1754 /* read in row lengths */ 1755 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1756 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1757 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1758 1759 /* read in column indices */ 1760 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1761 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1762 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1763 1764 /* loop over row lengths determining block row lengths */ 1765 ierr = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr); 1766 ierr = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1767 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1768 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1769 masked = mask + mbs; 1770 rowcount = 0; nzcount = 0; 1771 for (i=0; i<mbs; i++) { 1772 nmask = 0; 1773 for (j=0; j<bs; j++) { 1774 kmax = rowlengths[rowcount]; 1775 for (k=0; k<kmax; k++) { 1776 tmp = jj[nzcount++]/bs; 1777 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1778 } 1779 rowcount++; 1780 } 1781 browlengths[i] += nmask; 1782 /* zero out the mask elements we set */ 1783 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1784 } 1785 1786 /* create our matrix */ 1787 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr); 1788 B = *A; 1789 a = (Mat_SeqBAIJ*)B->data; 1790 1791 /* set matrix "i" values */ 1792 a->i[0] = 0; 1793 for (i=1; i<= mbs; i++) { 1794 a->i[i] = a->i[i-1] + browlengths[i-1]; 1795 a->ilen[i-1] = browlengths[i-1]; 1796 } 1797 a->nz = 0; 1798 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 1799 1800 /* read in nonzero values */ 1801 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1802 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1803 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1804 1805 /* set "a" and "j" values into matrix */ 1806 nzcount = 0; jcount = 0; 1807 for (i=0; i<mbs; i++) { 1808 nzcountb = nzcount; 1809 nmask = 0; 1810 for (j=0; j<bs; j++) { 1811 kmax = rowlengths[i*bs+j]; 1812 for (k=0; k<kmax; k++) { 1813 tmp = jj[nzcount++]/bs; 1814 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1815 } 1816 } 1817 /* sort the masked values */ 1818 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1819 1820 /* set "j" values into matrix */ 1821 maskcount = 1; 1822 for (j=0; j<nmask; j++) { 1823 a->j[jcount++] = masked[j]; 1824 mask[masked[j]] = maskcount++; 1825 } 1826 /* set "a" values into matrix */ 1827 ishift = bs2*a->i[i]; 1828 for (j=0; j<bs; j++) { 1829 kmax = rowlengths[i*bs+j]; 1830 for (k=0; k<kmax; k++) { 1831 tmp = jj[nzcountb]/bs ; 1832 block = mask[tmp] - 1; 1833 point = jj[nzcountb] - bs*tmp; 1834 idx = ishift + bs2*block + j + bs*point; 1835 a->a[idx] = (MatScalar)aa[nzcountb++]; 1836 } 1837 } 1838 /* zero out the mask elements we set */ 1839 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1840 } 1841 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1842 1843 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1844 ierr = PetscFree(browlengths);CHKERRQ(ierr); 1845 ierr = PetscFree(aa);CHKERRQ(ierr); 1846 ierr = PetscFree(jj);CHKERRQ(ierr); 1847 ierr = PetscFree(mask);CHKERRQ(ierr); 1848 1849 B->assembled = PETSC_TRUE; 1850 1851 ierr = MatView_Private(B);CHKERRQ(ierr); 1852 PetscFunctionReturn(0); 1853 } 1854 EXTERN_C_END 1855 1856 #undef __FUNCT__ 1857 #define __FUNCT__ "MatCreateSeqBAIJ" 1858 /*@C 1859 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 1860 compressed row) format. For good matrix assembly performance the 1861 user should preallocate the matrix storage by setting the parameter nz 1862 (or the array nnz). By setting these parameters accurately, performance 1863 during matrix assembly can be increased by more than a factor of 50. 1864 1865 Collective on MPI_Comm 1866 1867 Input Parameters: 1868 + comm - MPI communicator, set to PETSC_COMM_SELF 1869 . bs - size of block 1870 . m - number of rows 1871 . n - number of columns 1872 . nz - number of nonzero blocks per block row (same for all rows) 1873 - nnz - array containing the number of nonzero blocks in the various block rows 1874 (possibly different for each block row) or PETSC_NULL 1875 1876 Output Parameter: 1877 . A - the matrix 1878 1879 Options Database Keys: 1880 . -mat_no_unroll - uses code that does not unroll the loops in the 1881 block calculations (much slower) 1882 . -mat_block_size - size of the blocks to use 1883 1884 Level: intermediate 1885 1886 Notes: 1887 A nonzero block is any block that as 1 or more nonzeros in it 1888 1889 The block AIJ format is fully compatible with standard Fortran 77 1890 storage. That is, the stored row and column indices can begin at 1891 either one (as in Fortran) or zero. See the users' manual for details. 1892 1893 Specify the preallocated storage with either nz or nnz (not both). 1894 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1895 allocation. For additional details, see the users manual chapter on 1896 matrices. 1897 1898 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1899 @*/ 1900 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A) 1901 { 1902 int ierr; 1903 1904 PetscFunctionBegin; 1905 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1906 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 1907 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1908 PetscFunctionReturn(0); 1909 } 1910 1911 #undef __FUNCT__ 1912 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 1913 /*@C 1914 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 1915 per row in the matrix. For good matrix assembly performance the 1916 user should preallocate the matrix storage by setting the parameter nz 1917 (or the array nnz). By setting these parameters accurately, performance 1918 during matrix assembly can be increased by more than a factor of 50. 1919 1920 Collective on MPI_Comm 1921 1922 Input Parameters: 1923 + A - the matrix 1924 . bs - size of block 1925 . nz - number of block nonzeros per block row (same for all rows) 1926 - nnz - array containing the number of block nonzeros in the various block rows 1927 (possibly different for each block row) or PETSC_NULL 1928 1929 Options Database Keys: 1930 . -mat_no_unroll - uses code that does not unroll the loops in the 1931 block calculations (much slower) 1932 . -mat_block_size - size of the blocks to use 1933 1934 Level: intermediate 1935 1936 Notes: 1937 The block AIJ format is fully compatible with standard Fortran 77 1938 storage. That is, the stored row and column indices can begin at 1939 either one (as in Fortran) or zero. See the users' manual for details. 1940 1941 Specify the preallocated storage with either nz or nnz (not both). 1942 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1943 allocation. For additional details, see the users manual chapter on 1944 matrices. 1945 1946 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1947 @*/ 1948 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz) 1949 { 1950 Mat_SeqBAIJ *b; 1951 int i,len,ierr,mbs,nbs,bs2,newbs = bs; 1952 PetscTruth flg; 1953 1954 PetscFunctionBegin; 1955 ierr = PetscTypeCompare((PetscObject)B,MATSEQBAIJ,&flg);CHKERRQ(ierr); 1956 if (!flg) PetscFunctionReturn(0); 1957 1958 B->preallocated = PETSC_TRUE; 1959 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr); 1960 if (nnz && newbs != bs) { 1961 SETERRQ(1,"Cannot change blocksize from command line if setting nnz"); 1962 } 1963 bs = newbs; 1964 1965 mbs = B->m/bs; 1966 nbs = B->n/bs; 1967 bs2 = bs*bs; 1968 1969 if (mbs*bs!=B->m || nbs*bs!=B->n) { 1970 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs); 1971 } 1972 1973 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1974 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1975 if (nnz) { 1976 for (i=0; i<mbs; i++) { 1977 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1978 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs); 1979 } 1980 } 1981 1982 b = (Mat_SeqBAIJ*)B->data; 1983 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1984 B->ops->solve = MatSolve_SeqBAIJ_Update; 1985 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 1986 if (!flg) { 1987 switch (bs) { 1988 case 1: 1989 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1990 B->ops->mult = MatMult_SeqBAIJ_1; 1991 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 1992 break; 1993 case 2: 1994 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 1995 B->ops->mult = MatMult_SeqBAIJ_2; 1996 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 1997 break; 1998 case 3: 1999 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2000 B->ops->mult = MatMult_SeqBAIJ_3; 2001 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2002 break; 2003 case 4: 2004 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2005 B->ops->mult = MatMult_SeqBAIJ_4; 2006 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2007 break; 2008 case 5: 2009 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2010 B->ops->mult = MatMult_SeqBAIJ_5; 2011 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2012 break; 2013 case 6: 2014 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 2015 B->ops->mult = MatMult_SeqBAIJ_6; 2016 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2017 break; 2018 case 7: 2019 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 2020 B->ops->mult = MatMult_SeqBAIJ_7; 2021 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2022 break; 2023 default: 2024 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 2025 B->ops->mult = MatMult_SeqBAIJ_N; 2026 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2027 break; 2028 } 2029 } 2030 b->bs = bs; 2031 b->mbs = mbs; 2032 b->nbs = nbs; 2033 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 2034 if (!nnz) { 2035 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2036 else if (nz <= 0) nz = 1; 2037 for (i=0; i<mbs; i++) b->imax[i] = nz; 2038 nz = nz*mbs; 2039 } else { 2040 nz = 0; 2041 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2042 } 2043 2044 /* allocate the matrix space */ 2045 len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 2046 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 2047 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2048 b->j = (int*)(b->a + nz*bs2); 2049 ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 2050 b->i = b->j + nz; 2051 b->singlemalloc = PETSC_TRUE; 2052 2053 b->i[0] = 0; 2054 for (i=1; i<mbs+1; i++) { 2055 b->i[i] = b->i[i-1] + b->imax[i-1]; 2056 } 2057 2058 /* b->ilen will count nonzeros in each block row so far. */ 2059 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 2060 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 2061 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2062 2063 b->bs = bs; 2064 b->bs2 = bs2; 2065 b->mbs = mbs; 2066 b->nz = 0; 2067 b->maxnz = nz*bs2; 2068 B->info.nz_unneeded = (PetscReal)b->maxnz; 2069 PetscFunctionReturn(0); 2070 } 2071 2072